check_olditems = 1; $this->deals = [ prodam => 1, # Продам sdam => 2, # Сдам ]; $this->categories = [ kvartiry => 1, # Квартиры komnaty => 2, # Комнаты doma_dachi_kottedzhi => 3, # Дома, дачи, коттеджи zemelnye_uchastki => 4, # Земельные участки kommercheskaya_nedvizhimost => 5, # Коммерческая недвижимость garazhi_i_mashinomesta => 6, # Гаражи и машиноместа nedvizhimost_za_rubezhom => 7, # Недвижимость за рубежом ]; $this->objects = [ # комнаты??? komnaty => 0, # квартиры vtorichka => 1, # Вторичка novostroyka => 2, # Новостройка # Дома, дачи, коттеджи dom => 3, # Дома dacha => 4, # Дачи kottedzh => 5, # Коттеджи taunhaus => 6, # Таунхаусы # Земельные участки izhs => 7, # Поселений (ИЖС) selhoznaznacheniya => 8, # Сельхозназначения (СНТ, ДНП) promnaznacheniya => 9, # Промназначения # Коммерческая недвижимость gostinicy => 10, # Гостиница ofis => 11, # Офисное помещение drugoe => 12, # Помещение свободного назначения proizvodstvo => 13, # Производственное помещение sklad => 14, # Складское помещение magazin => 15, # Торговое помещение ]; $etc = "?user=1&view=list"; $this->sections = [ prodam => [ kvartiry => $etc, komnaty => $etc, doma_dachi_kottedzhi => $etc, zemelnye_uchastki => $etc, kommercheskaya_nedvizhimost => $etc, ], sdam => [ kvartiry => "/na_dlitelnyy_srok{$etc}", komnaty => "/na_dlitelnyy_srok{$etc}", doma_dachi_kottedzhi => "/na_dlitelnyy_srok{$etc}", kommercheskaya_nedvizhimost => $etc, ] ]; $this->cookies = ""; $this->datelimit = date("Y-m-d", time() - 7 * 86400) . " 00:00:00"; } public function getPages($url, $n = 0) { if($n){ $err = "getPages({$url}) - error receiving data"; $this->update_proxy(1, -1, $err); if($n > 3) return $this->__log($err); } // $file = "{$_ENV[ROOT]}/{$this->site->code}.html"; // $this->data = file_get_contents($file); $this->data = $this->get_url($url, false); // file_put_contents($file, $this->data); $r = preg_match("/]*\bclass=\"[^\"]*\bbreadcrumbs-link-count\b[^\"]*\"[^>]*>(.*)<\/span>/", $this->data); if(!$r) return $this->getPages($url, $n + 1); $r = preg_match("/]*\bclass=\"[^\"]*\bpagination-pages\b[^\"]*\"[^>]*>(.*)<\/div>/ismU", $this->data, $tmp); if(!$r) return 1; preg_match_all("/href=\"[^\"]+\?p=(\d+)\&[^\"]+\"/ismU", $tmp[1], $tmp); return max($tmp[1]); } public function getItems($url, $n = 0) { if($n){ $err = "getItems({$url}) - error receiving data"; $this->update_proxy(1, -1, $err); if($n > 3){ $this->__log($err); return []; } } $data = $this->data ? $this->data : $this->get_url($url, false); $this->data = null; $r = preg_match("/]*\bclass=\"[^\"]*\bbreadcrumbs-link-count\b[^\"]*\"[^>]*>(.*)<\/span>/ismU", $data); if(!$r) return $this->getItems($url, $n + 1); $r = preg_match("/]*\bclass=\"[^\"]*\bcatalog-list\b[^\"]*\"[^>]*>/ismU", $data); if(!$r) return []; /*

3 600 000 р.

9
104 м²
участок 10 сот.
р-н Октябрьский

Дом

Сегодня 13:43
*/ $r = preg_match_all('/]*\bclass="[^"]*\bitem\b[^"]*"[^>]*>.*'. ']+\bclass="price"[^>]*>(.*)<\/div>.*'. ']*\bclass="[^"]*\btitle\b[^"]*"[^>]*>.*]*\bid="(?\d+)")(?=[^>]*\bhref="(?[^"]+)")(?=[^>]*\btitle="(?[^"]+)")[^>]*>.*<\/a>.*'. '<span\b[^>]*\bclass="[^"]*\bdate\b[^"]*"[^>]*>\s*(?<date>[^<]+)(?<time>\d{2}:\d{2})\s*<\/span>.*<\/div>.*<\/div>/ismxU', $data, $tmp); if(!$r) return []; $items = []; for($i = 0; $i < $r; $i++){ $tmp["date"][$i] = preg_replace('@\s+|\s+@isu', '', html_entity_decode($tmp["date"][$i])); $tmp["url"][$i] = preg_replace('@\?slocation=\d+$@isu', '', $tmp["url"][$i]); $items[$tmp["id"][$i]] = (object) [ id => $tmp["id"][$i], date => $this->get_date($tmp["date"][$i], $tmp["time"][$i]), title => $tmp["title"][$i], url => "https://" . $this->site->host . $tmp["url"][$i], price => preg_replace('/\D+/', '', trim(html_entity_decode(strip_tags($tmp[1][$i])))), ]; } return $items; } public function getItem($item, $n = 0) { if($n){ $err = "getItem({$item->id}) - error receiving data"; $this->update_proxy(1, -1, $err); if($n > 3) return $this->__log($err); } # check item /*if(isset($this->olditems[$item->id])) { $this->updateItem($item); # price, images (not checked only!) return $item; }*/ $data = $this->get_url($item->url, true); /*$file = "{$_ENV[ROOT]}/{$this->site->code}-{$item->id}.html"; if(file_exists($file)) $data = file_get_contents($file); else { $data = $this->get_url($item->url, false); file_put_contents($file, $data); }*/ $r = preg_match("/\bavito\.item\.phone\s*=\s*['\"]([^'\"]+)['\"]/ismU", $data, $tmp); if(!$r) return $this->getItem($item, $n + 1); # <get_phone> $pkey = $this->get_pkey($item->id, $tmp[1]); // Changes every hour list($item->phone) = mysql_fetch_assoc(mysql_query("SELECT `phone` FROM `avito_phone_cache` WHERE `pkey` = '" . mysql_real_escape_string($pkey) . "' AND `date` = CURDATE() AND `hour` = HOUR(NOW())")); if(!$item->phone) { $url = preg_replace("@^https?://{$this->site->host}@isU", "", $item->url); #$url = "https://m.avito.ru{$url}/phone/{$pkey}?async"; #$tmp = $this->get_url( # $url, # true, # [ # "Referer: https://m.avito.ru{$item->url}", # "X-Requested-With: XMLHttpRequest", # ] #); #if(preg_match("/\"phone\"[:]\"([^\"]+)\"/", $tmp, $tmp)) { # $item->phone = preg_replace("/\D+/", "", $tmp[1]); #} $url = "https://m.avito.ru{$url}"; $tmp = $this->get_url( $url, true ); if(preg_match("/\"tel[:]([^\"]+)\"/", $tmp, $tmp)) { $item->phone = preg_replace("/\D+/", "", $tmp[1]); } if(!$item->phone) return $this->error("error receiving phone"); if($item->phone && mb_strlen($item->phone) <= 7) $item->phone = $this->cities[$this->city]["code"] . $item->phone; mysql_query("INSERT INTO `avito_phone_cache` (`pkey`,`phone`,`date`,`hour`) VALUES('" . mysql_real_escape_string($pkey) . "','" . mysql_real_escape_string($item->phone) . "',CURDATE(), HOUR(NOW()))"); } # </get_phone> # <get_object_type> if(preg_match("/<div\b[^>]*\bclass=\"[^\"]*\bbreadcrumbs-links\b[^\"]*\"[^>]*>(.*)<\/div>/ismU",$data,$tmp)){ preg_match_all("/href=\"([^\"]+)\"/", $tmp[1], $tmp); $tmp = array_pop($tmp[1]); if(preg_match("/\/(vtorichka|novostroyka)\//", $tmp, $_)) $item->object_type = $_[1]; else{ $item->object_type = array_pop(explode("/", $tmp)); if(!$this->objects[$item->object_type]) $item->object_type = "komnaty"; if($item->object_type == "dacha") $item->category = "zemelnye_uchastki"; } } # </get_object_type> # <get_title> if (preg_match("/<h1\b[^>]*\bclass=\"[^\"]*\bh1\b[^\"]*\"[^>]*>(.*)<\/h1>/ismU", $data, $tmp)) { $item->title = $tmp[1]; } if(preg_match('/^Дача\b/isu', $item->title)) { $item->category = "zemelnye_uchastki"; } # </get_title> # <get_description> preg_match("/<div\b[^>]*\bitemprop=\"description\"[^>]*>(.*)<\/div>/ismU", $data, $tmp); $item->description = trim(strip_tags($tmp[1])); # </get_description> # <get_params> preg_match_all("/<div\b[^>]*\bclass=\"[^\"]*\bitem-params\b[^\"]*\"[^>]*>.*<\/div>/ismU", $data, $tmp); $item->raw_params = implode("\n", $tmp[0]); $r = preg_match_all('@<li\b\s+\bclass="[^"]*\bitem-params-list-item\b[^"]*"[^>]*>\s*<span\b\s+\bclass="[^"]*\bitem-params-label\b[^"]*"[^>]*>(?<key>.*)</span>\s*(?<value>.*)</li>@isU', $item->raw_params, $tmp); $params = []; $item->params = ""; for($i = 0; $i < $r; $i++) { $k = preg_replace('@^s+|:\s*$@is', '', $tmp['key'][$i]); $v = trim($tmp['value'][$i]); $params[$k] = $v; $item->params .= $k . ': ' . $v . '; '; } $r = preg_match_all('@<div\b[^>+]\bclass="advanced-params-param-title"[^>]*>(?<key>.*)</div>\s*<ul\b[^>]+\bclass="advanced-params-param-list"[^>]*>(?<value>.*)</ul>@isU', $data, $tmp); for($i = 0; $i < $r; $i++) { $k = preg_replace('@^s+|:\s*$@is', '', $tmp['key'][$i]); $v = trim(strip_tags(preg_replace('@(</li>)\s*(<li)@is','$1, $2',$tmp['value'][$i]))); // $params[$k] = $v; $item->params .= $k . ': ' . $v . '; '; } $item->rooms = intval($params['Количество комнат']); if(!$item->rooms) { if($this->category == "komnaty") $item->rooms = 1; elseif(preg_match("/\b(\d+)\-к\s+квартира\b/isu", $item->title, $tmp)) $item->rooms = intval($tmp[1]); elseif(preg_match("/\bстудия\b/isu", $item->title)) $item->rooms = 1; else $item->rooms = 0; } foreach (['Площадь', 'Площадь дома'] as $_) { if($params[$_]) { $item->s = __floatval($params[$_]); break; } } if(!$item->s) { if(preg_match("/\b(\d+(?:[.,]\d+)?)\s*м/isu", $item->title, $tmp)) { $item->s = __floatval($tmp[1]); } } foreach (['Тип дома', 'Материал стен'] as $_) { if($params[$_]) { $item->material = $params[$_]; break; } } $item->level = intval($params['Этаж']); $item->levels = intval($params['Этажей в доме']); $item->land = __floatval($params['Площадь участка']); if(!$item->land && preg_match("/\bучаст\S+\s+([\d.,]+)\s+сот\./isu", $item->title, $tmp)) { $item->land = __floatval($tmp[1]); } $item->distance = $params['Расстояние до города']; # </get_params> # <get_seller> preg_match("/<div\b[^>]*\bitemprop=\"seller\"[^>]*>(.*)<\/div>/ismU", $data, $tmp); $item->seller = trim(strip_tags($tmp[1])); $item->seller = preg_replace('@\s*на Avito c\b.*$@isu', '', $item->seller); # </get_seller> # <get_address> if(preg_match("/<span\b[^>]*\bitemprop=\"address\"[^>]*>(.*)<span\b[^>]*\bitemprop=\"streetAddress\"[^>]*>(.*)<\/span>.*<\/div>/ismU", $data, $tmp)){ $item->address = trim($tmp[2]); $item->raion = preg_match("/metro/i", $tmp[1]) ? "ст.м. " : ""; $item->raion .= trim(strip_tags($tmp[1])); $item->raion = preg_replace("/[\W]$/", "", $item->raion); } else{ preg_match("/<div\b[^>]*\bid=\"map\"[^>]*>(.*)<\/div>/ismU", $data, $tmp); $item->raion = preg_match("/metro/i", $tmp[1]) ? "ст.м. " : ""; $tmp[1] = preg_replace("/<span\b[^>]*\bitemprop=\"name\"[^>]*>.*<\/span>/ismU", "", $tmp[1]); $item->raion .= preg_replace("/^[\W]\s*/", "", trim(strip_tags($tmp[1]))); } # </get_address> # <get_images> $item->images = []; $r = preg_match_all('@<div\b(?=[^>]*\bclass="[^"]*\bgallery-img-frame\b[^"]*")(?=[^>]*\bdata-url="(.*)")[^>]*>@isU', $data, $tmp); for($i = 0; $i < $r; $i++) { $item->images[] = 'http:' . $tmp[1][$i]; } $item->images = implode("\n", $item->images); # </get_images> $item->display = $this->checkPhone($item->phone); // $this->saveItem($item); isset($this->olditems[$item->id]) ? $this->updateItem($item) : $this->saveItem($item); return $item; } public function get_pkey($id, $t) { $t = preg_split("/[^0-9a-f]+/",$t); $t = implode("",$id %2 ? $t : array_reverse($t)); $t = str_split($t); for($i = 0; $i < count($t); ++$i) if($i % 3 === 0) $r .= $t[$i]; return $r; } public function create_url($urlsfx, $p = 1) { $url = "https://{$this->site->host}/{$this->city}/{$this->category}/{$this->deal_type}{$urlsfx}"; if($p > 1) $url .= "&p={$p}"; return $url; } /* public function checkItem($item) { if($item->date <= $this->datelimit) return 0; $olditem = mysql_fetch_object(mysql_query("SELECT * FROM objects WHERE site_id = '{$this->site->id}' AND id = '{$item->id}'")); if( !$olditem ) return 1; if( !$olditem->display || $olditem->is_agency == 1 || $item->date <= $olditem->date || $olditem->category != "kvartiry" && $olditem->category != "komnaty" ) return 0; $this->olditems[$item->id] = $olditem; return 1; } */ /* public function updateItem($item) { $olditem = $this->olditems[$item->id]; if( !$olditem || !$olditem->display || $olditem->is_agency == 1 || $olditem->date >= $item->date ) return; if($olditem->checked && $olditem->price == $item->price){ $sql = "UPDATE objects SET date = '{$item->date}' WHERE object_id = '{$olditem->object_id}'"; mysql_query($sql) or $this->error(mysql_error()); return; } $sql = "UPDATE objects SET date = '{$item->date}', date_add = NOW()"; if($olditem->price != $item->price) $sql .= ", price = '{$item->price}'"; if($olditem->checked) $sql .= ", checked = 2"; $sql .= " WHERE object_id = '{$olditem->object_id}'"; mysql_query($sql) or $this->error(mysql_error()); if($olditem->category != "kvartiry" && $olditem->category != "komnaty") return; $link = mysql_fetch_object(mysql_query("SELECT * FROM objects_links WHERE (link_id, link_s) = ('{$olditem->link_id}', '{$olditem->link_s}')")); if( !$link || $link->is_agency == 1 || $link->date >= $item->date || # BUG: $link->checked != $olditem->checked (!!!) $link->checked && $olditem->price == $item->price ) return; $link->url = $item->url; if(!preg_match("/^https?\:\/\//is",$link->url)){ $host = $this->sites[$tmp["url"]]->host; $host = "http" . (preg_match("/[:]443$/",$host) ? "s" : "") . "://" . $host; $link->url = $host . $link->url; } $sql = "UPDATE objects_links SET date = '{$item->date}', date_add = NOW(), url = '" . mysql_real_escape_string($link->url) . "'"; if($olditem->price != $item->price){ $link->price = unserialize($link->price); unset($link->price[$olditem->s]); $link->price = [$olditem->s => [$item->price]] + $link->price; $sql .= ", price = '" . mysql_real_escape_string(serialize($link->price)) . "'"; } if($link->checked || $link->number !== "0") $sql .= ", checked = 2"; $sql .= " WHERE (link_id, link_s) = ('{$link->link_id}', '{$link->link_s}')"; mysql_query($sql) or $this->error(mysql_error()); $s = "UPDATED: [{$link->link_id}-{$link->link_s}] : {$olditem->date} --> {$item->date} : {$olditem->price} --> {$item->price}"; mysql_query("INSERT INTO objects_notes (link_id, link_s, note) VALUES('{$link->link_id}','{$link->link_s}','" . mysql_real_escape_string($s) . "') ON DUPLICATE KEY UPDATE note = TRIM(CONCAT(note, '" . mysql_real_escape_string("\n" . $s) . "'))"); $this->__log($s); } */ public function isValidItemHtml($html, $item) { return preg_match("/\bavito\.item\.phone\s*=\s*['\"]([^'\"]+)['\"]/ismU", $html); } public function cron() { mysql_query("DELETE FROM `avito_phone_cache` WHERE `date` < CURDATE() OR `date` = CURDATE() AND `hour` < HOUR(NOW())"); return parent::cron(); } public function get_proxy($proxy = "", $reset = 0) { $proxy = [ "185.209.114.73:24531:yfgzno:IbaKaB0kOU", "188.130.186.111:24531:yfgzno:IbaKaB0kOU", "91.107.124.138:24531:yfgzno:IbaKaB0kOU", "194.55.166.177:24531:yfgzno:IbaKaB0kOU", "5.44.46.3:24531:yfgzno:IbaKaB0kOU", "93.188.206.46:24531:yfgzno:IbaKaB0kOU", "91.197.0.209:24531:yfgzno:IbaKaB0kOU", "46.16.14.173:24531:yfgzno:IbaKaB0kOU", "46.8.153.101:24531:yfgzno:IbaKaB0kOU", "5.188.79.184:24531:yfgzno:IbaKaB0kOU", "31.184.233.153:24531:yfgzno:IbaKaB0kOU", "109.248.234.166:24531:yfgzno:IbaKaB0kOU", "185.250.151.254:24531:yfgzno:IbaKaB0kOU", "78.40.118.71:24531:yfgzno:IbaKaB0kOU", "185.139.213.56:24531:yfgzno:IbaKaB0kOU", "185.47.206.34:24531:yfgzno:IbaKaB0kOU", "193.57.138.74:24531:yfgzno:IbaKaB0kOU", "185.228.91.231:24531:yfgzno:IbaKaB0kOU", "185.231.204.235:24531:yfgzno:IbaKaB0kOU", "78.153.138.32:24531:yfgzno:IbaKaB0kOU", ]; $this->__proxy = (object) [ip => "", port => "", login => "", pass => ""]; // $this->proxy = "10.0.0.1:8800"; $this->proxy = $proxy[mt_rand(0, count($proxy) - 1)]; list($this->__proxy->ip, $this->__proxy->port, $this->__proxy->login, $this->__proxy->pass) = explode(":", $this->proxy); $this->proxy = $this->__proxy->ip . ":" . $this->__proxy->port; return $this->proxy; } public function update_proxy($n, $status = "", $error = "") { return false; } }