value; if($isStartBlockEnabled) die; $this->fails = 7; $this->timeout = 30; $this->check_olditems = 1; $this->limit = 200; //$this->cookies = ""; $this->datelimit = date("Y-m-d", time() - 2 * 86400) . " 00:00:00"; $this->isCorrectedObjects = true; $this->numUncorrectObjOnPage = 0; $this->agency_list = $this->getAgenciesFromDB(); $this->deals = [ prodam => 1, # Продам sdam => 2, # Сдам ]; $this->categories = [ kvartiry => 1, # Квартиры komnaty => 2, # Комнаты doma_dachi_kottedzhi => 3, # Дома, дачи, коттеджи zemelnye_uchastki => 4, # Земельные участки kommercheskaya_nedvizhimost => 5, # Коммерческая недвижимость garazhi_i_mashinomesta => 6, # Гаражи и машиноместа nedvizhimost_za_rubezhom => 7, # Недвижимость за рубежом ]; $this->objects = [ # комнаты??? komnaty => 0, # квартиры vtorichka => 1, # Вторичка novostroyka => 2, # Новостройка # Дома, дачи, коттеджи dom => 3, # Дома dacha => 4, # Дачи kottedzh => 5, # Коттеджи taunhaus => 6, # Таунхаусы # Земельные участки izhs => 7, # Поселений (ИЖС) selhoznaznacheniya => 8, # Сельхозназначения (СНТ, ДНП) promnaznacheniya => 9, # Промназначения # Коммерческая недвижимость gostinicy => 10, # Гостиница ofis => 11, # Офисное помещение drugoe => 12, # Помещение свободного назначения proizvodstvo => 13, # Производственное помещение sklad => 14, # Складское помещение magazin => 15, # Торговое помещение ]; $etc = "?s=104&user=1"; //поиск по категориям недвижимости $this->sections = [ prodam => [ kvartiry => "-ASgBAgICAUSSA8YQ".$etc, komnaty => "-ASgBAgICAUSQA7wQ".$etc, // doma_dachi_kottedzhi => "-ASgBAgICAUSUA9AQ".$etc, // zemelnye_uchastki => "-ASgBAgICAUSWA9oQ".$etc, // kommercheskaya_nedvizhimost => "-ASgBAgICAUSwCNJW".$etc, ], sdam => [ // kvartiry => "/na_dlitelnyy_srok{$etc}", // komnaty => "/na_dlitelnyy_srok{$etc}", // doma_dachi_kottedzhi => "/na_dlitelnyy_srok{$etc}", kvartiry => "/na_dlitelnyy_srok-ASgBAgICAkSSA8gQ8AeQUg".$etc, komnaty => "/na_dlitelnyy_srok-ASgBAgICAkSQA74QqAn2YA".$etc, // doma_dachi_kottedzhi => "/na_dlitelnyy_srok-ASgBAgICAkSUA9IQoAjIVQ".$etc, kommercheskaya_nedvizhimost => "-ASgBAgICAUSwCNRW".$etc, ] ]; } //получение контента страниц public function getPages($url, $n = 0) { sleep (mt_rand(8, 10)); $cmd = "/var/www/grabber-krsk/cron/cookie-avito.sh"; $res = shell_exec($cmd); $this->__log("shell_exec: " . $cmd . " -> " . var_dump($res)); if($n){ $err = "getPages({$url}) - error receiving data"; $this->update_proxy(1, -1, $err); if($n > 21){ //$this->countTries++; $this->__log($err); return 4; } // return $this->__log($err); //записать ошибку в лог } // $file = "{$_ENV[ROOT]}/{$this->site->code}.html"; // $this->data = file_get_contents($file); $this->data = $this->get_url($url, true); // file_put_contents($file, $this->data); $r = preg_match("/Подождите, идет загрузка/", $this->data); if($r) {return $this->getPages($url, $n + 1);} $r = preg_match("/]*\bclass=\"[^\"]*\bpagination-pages\b[^\"]*\"[^>]*>(.*)<\/div>/ismU", $this->data, $tmp); if(!$r) {return $this->getPages($url, $n + 1);} preg_match_all("/href=\"[^\"]+\?p=(\d+)\&[^\"]+\"/ismU", $tmp[1], $tmp); $this->countTries = 0; return max($tmp[1]); } public function getItems($url, $n = 0) { if($this->countTries == 3) { return []; } sleep (mt_rand(2, 7)); if($this->isCorrectedObjects != true) { return []; } if($n){ $this->update_proxy(1, -1, $err); if($n > $this->fails){ $err = "getItems({$url}) - error receiving data"; $this->__log($err); $this->countTries++; return []; } } $data = $this->data ? $this->data : $this->get_url($url, true); //file_put_contents('avito.txt', $data); // die(); $r = preg_match("/Подождите, идет загрузка/", $this->data); if($r) return $this->getItems($url, $n + 1); $this->data = null; $r = preg_match("/class=\"[^\"]*catalog[^\"]*\"/ismU", $data); if(!$r) return $this->getItems($url,$n+1); $this->countTries = 0; //поиск по контенту через регулярные выражения $r = preg_match_all('/]*data-item-id=\"(?[^\"]*)\"[^>]*>.*]*itemprop=\"url\"[^>]*href=\"(?[^\"]*)"[^>]*title=\"(?[^\"]*)"[^>]*>(?<info>.*iva-item-phoneButton)/ismU', $data, $_tmp); if ($r == 0) return $this->getItems($url, $n + 1); //var_dump($_tmp["id"]); //var_dump($_tmp["url"]); //var_dump($_tmp["title"]); //var_dump($_tmp["info"]); //die(); /*$r = preg_match_all('/<div[^>]*data-item-id=\"(?<id>[^\"]*)\"[^>]*>.*<a[^>]*itemprop=\"url\"[^>]*href=\"(?<url>[^\"]*)"[^>]*title=\"(?<title>[^\"]*)"[^>]*>.*<meta[^\>]*itemprop\=\"price\"[^\>]*content=\"(?<price>[^\"]*)\".*<div[^\>]*data-marker\=\"item-date\"[^>]*((data\-tooltip=\"(?<dataTooltipdate>[^\"]*)(\ | )(?<dataTooltiptime>\d\d:\d\d)\")|\>((?<date>[^\"]*)(\ | )(?<time>\d\d:\d\d)|(?<datestill>[^<]+назад))<)<div\s+class=\"style\-title[^>]*>(?<seller>[^<]*)<)/ismxU', $data, $tmp);*/ //(?<profile-info>.*<div\s+class=\"iva\-item\-asideActions) //<div\s+class=\"style\-title[^>]*>(?<seller>[^<]*)<(?<protected>.*) //<div\s+class=\"style\-title[^>]*>(?<seller>[^<]*)< //die(); // preg_match_all('/<span class="geo-address[^<]*<span>([^<]*)/i', $data, $tmp1); $timestamp1 = strtotime($this->GetMoscowNowDateString("Y-m-d H:i").':00'); $timestamp2 = strtotime($this->GetNowDateString("Y-m-d H:i").':00'); $diffHour = abs($timestamp2 - $timestamp1)/(60*60); $items = []; $tmp["id"] = $_tmp["id"]; $tmp["url"] = $_tmp["url"]; $tmp["title"] = $_tmp["title"]; //var_dump($tmp["id"]); //var_dump($tmp["url"]); //var_dump($tmp["title"]); //var_dump($_tmp["info"]); //die(); $this->numUncorrectObjOnPage = 0; $k=count($tmp["id"]); for($i = 0; $i < $k; $i++){ $r = preg_match_all('/<meta[^\>]*itemprop\=\"price\"[^\>]*content=\"(?<price>[^\"]*)\".*<div[^\>]*data-marker\=\"item-date\"[^>]*((data\-tooltip=\"(?<dataTooltipdate>[^\"]*)(\ | )(?<dataTooltiptime>\d\d:\d\d)\")|\>((?<date>[^\"]*)(\ | )(?<time>\d\d:\d\d)|(?<datestill>[^<]+назад))<).*|<div\s+class=\"style\-title[^>]*>(?<seller>[^<]*)</ismxU', $_tmp["info"][$i], $tmp1); $tmp["price"] = $tmp1["price"]; $tmp["dataTooltipdate"] = $tmp1["dataTooltipdate"]; $tmp["dataTooltiptime"] = $tmp1["dataTooltiptime"]; $tmp["date"] = $tmp1["date"]; $tmp["time"] = $tmp1["time"]; $tmp["datestill"] = $tmp1["datestill"]; $tmp["seller"] = $tmp1["seller"]; //var_dump($tmp["price"]); //var_dump($tmp["time"]); //var_dump($tmp1["seller"][1]); //var_dump($tmp["id"] ); //var_dump($tmp["datestill"]); //$tmp["url"][$i] = preg_replace('@\?slocation=\d+$@isu', '', $tmp["url"][$i]); if(strpos("https://" . $this->site->host . $tmp["url"][$i], $this->city) === false) { $this->__log("Incorrect city: url = "."https://" . $this->site->host . $tmp["url"][$i]); $this->numUncorrectObjOnPage++; continue; } //$tmp['seller'][$i] = "Lacrum-Недвижимость"; elseif (preg_match("/Профиль защищён/", $_tmp["info"][$i])) { $item->description = "Номер продавца защищён Avito\r\n"; $item->avitoProtected = true; } elseif (preg_match("/не будут доставлены/", $_tmp["info"][$i])) { $item->description = "Номер продавца защищён Avito\r\n"; $item->avitoProtected = true; } else { $item->description = ""; $item->avitoProtected = false; } # </get_description> // Если $tmp['selller'][$i] находится в массиве $this->agency_list, то берем следующий элемент, если // нет продавца в базе, то создаем новый объект $this->name = null; $a = ""; $tmp['seller'][1] = str_replace(""", "", $tmp['seller'][1]); var_dump($this->name. "В общем списке название"); if(in_array(strtolower(trim($tmp['seller'][1])), $this->agency_list)) { var_dump($tmp['seller'][1]."Sovpadenit"); continue ; } else { var_dump($tmp['seller'][1]."!!!!"); $a = $tmp['seller'][1]; //$this->name = $tmp['seller'][1]; // var_dump(strlen($this->name)); // if (strlen($this->name)>3) // { // $item->description = "Номер продавца защищён Avito\r\n"; // $item->avitoProtected = true; // $this->hidden = 1; } $items[$tmp["id"][$i]] = (object) [ id => $tmp["id"][$i], title => html_entity_decode($tmp["title"][$i]), url => "https://" . $this->site->host . $tmp["url"][$i], price => preg_replace('/\D+/isu', '', $tmp["price"][0]), seller =>$a, // address => trim($tmp1[1][$i]), ]; //var_dump($items); //$items[$tmp["id"][$i]]->address = trim($tmp1[1][$i]); $query = mysql_query("SELECT id as id1 from grabber.objects where is_agency='-1' and display=0 and url like '%avito%' and date_add>DATE_ADD(NOW(), INTERVAL -25 DAY)"); $isCoincidense = false; #Совпадение. Изначально думаем, что его нет while ($row=mysql_fetch_row($query)) { if ($row[0]==$tmp['id'][$i]) { $this->__log("NE Sovpalo DB:".$row[0]. ' =TMP: '. $tmp['id'][$i]."\n"); $isCoincidense = true; break; } else { //$this->__log(" Sovpalo DB:".$row[0]. ' TMP: '. $tmp['id'][$i]."\n"); $isCoincidense = false; } } if(!$isCoincidense){ if($tmp["dataTooltipdate"][0]){ $tmp["dataTooltipdate"][0] = preg_replace('@\s+|\s+@isu', '', html_entity_decode($tmp["dataTooltipdate"][0])); $items[$tmp["id"][$i]]->date = $this->get_date(trim($tmp["dataTooltipdate"][0]), trim($tmp["dataTooltiptime"][0])); } elseif($tmp["date"][0]){ $tmp["date"][0] = preg_replace('@\s+|\s+@isu', '', html_entity_decode($tmp["date"][0])); $items[$tmp["id"][$i]]->date = $this->get_date(trim($tmp["date"][0]), trim($tmp["time"][0])); } elseif($tmp["datestill"][0]){ //Дату типа (1 день назад, 4 минуты назад)можно парсить здесь if(preg_match("/\ секунд/ismU", $tmp["datestill"][0], $tmp2)){ $items[$tmp["id"][$i]]->date = $this->GetMoscowNowDateString("Y-m-d H:i").':00'; } elseif(preg_match("/(?<time>[0-9]+)\ минут/ismU", $tmp["datestill"][0], $tmp2)){ $items[$tmp["id"][$i]]->date = $this->GetMoscowNowDateString("Y-m-d H:i",-$tmp2['time']*60).':00'; } else { $que = mysql_query("SELECT date FROM grabber.objects WHERE id = {$tmp["id"][$i]};"); $res = mysql_fetch_object($que); $olddate = $res->date; if($olddate) { if(preg_match("/(?<time>[0-9]+)\ час/ismU", $tmp["datestill"][0], $tmp2)){ $startPeriod = -($tmp2['time']+1)*60*60; $endPeriod = -$tmp2['time']*60*60; $startDate = $this->GetMoscowNowDateString("Y-m-d H:i", $startPeriod).':00'; $endDate = $this->GetMoscowNowDateString("Y-m-d H:i", $endPeriod).':00'; if($this->check_in_range($startDate,$endDate,$olddate)) $items[$tmp["id"][$i]]->date = $olddate; else { $startDate = $this->GetMoscowNowDateString("Y-m-d H:i",$startPeriod+($diffHour*60*60)).':00'; $endDate = $this->GetMoscowNowDateString("Y-m-d H:i", $endPeriod+($diffHour*60*60)).':00'; if($this->check_in_range($startDate,$endDate,$olddate)) { $items[$tmp["id"][$i]]->date = date("Y-m-d H:i",strtotime($olddate)-($diffHour*60*60)).':00'; $this->__log("Update date in db to {$items[$tmp["id"][$i]]->date} to object with id {$tmp['id'][$i]}. Because incorrect time zone."); mysql_query("UPDATE grabber.objects SET date = '{$items[$tmp["id"][$i]]->date}' WHERE id = '{$tmp["id"][$i]}';"); } } } elseif(preg_match("/(?<time>[0-9]+)\ (день|дня|дней)/ismU", $tmp["datestill"][0], $tmp2)){ $startPeriod = -($tmp2['time']+1)*24*60*60; $endPeriod = -$tmp2['time']*24*60*60; $startDate = $this->GetMoscowNowDateString("Y-m-d H:i",$startPeriod).':00'; $endDate = $this->GetMoscowNowDateString("Y-m-d H:i",$endPeriod).':00'; if($this->check_in_range($startDate,$endDate,$olddate)) $items[$tmp["id"][$i]]->date = $olddate; else { $startDate = $this->GetMoscowNowDateString("Y-m-d H:i",$startPeriod+($diffHour*60*60)).':00'; $endDate = $this->GetMoscowNowDateString("Y-m-d H:i", $endPeriod+($diffHour*60*60)).':00'; if($this->check_in_range($startDate,$endDate,$olddate)) { $items[$tmp["id"][$i]]->date = date("Y-m-d H:i",strtotime($olddate)-($diffHour*60*60)).':00'; $this->__log("Update date in db to {$items[$tmp["id"][$i]]->date} to object with id {$tmp['id'][$i]}. Because incorrect time zone."); mysql_query("UPDATE grabber.objects SET date = '{$items[$tmp["id"][$i]]->date}' WHERE id = '{$tmp["id"][$i]}';"); } } } elseif(preg_match("/(?<time>[0-9]+)\ (неделю|недели)/ismU", $tmp["datestill"][0], $tmp2)){ $startPeriod = -($tmp2['time']+1)*7*24*60*60; $endPeriod = -$tmp2['time']*7*24*60*60; $startDate = $this->GetMoscowNowDateString("Y-m-d H:i",$startPeriod).':00'; $endDate = $this->GetMoscowNowDateString("Y-m-d H:i",$endPeriod).':00'; if($this->check_in_range($startDate,$endDate,$olddate)) $items[$tmp["id"][$i]]->date = $olddate; else { $startDate = $this->GetMoscowNowDateString("Y-m-d H:i",$startPeriod+($diffHour*60*60)).':00'; $endDate = $this->GetMoscowNowDateString("Y-m-d H:i", $endPeriod+($diffHour*60*60)).':00'; if($this->check_in_range($startDate,$endDate,$olddate)) { $items[$tmp["id"][$i]]->date = date("Y-m-d H:i",strtotime($olddate)-($diffHour*60*60)).':00'; $this->__log("Update date in db to {$items[$tmp["id"][$i]]->date} to object with id {$tmp['id'][$i]}. Because incorrect time zone."); mysql_query("UPDATE grabber.objects SET date = '{$items[$tmp["id"][$i]]->date}' WHERE id = '{$tmp["id"][$i]}';"); } } } } } //$query=mysql_query("SELECT id as id1 from grabber.objects where is_agency='-1' and display=0"); if(!$items[$tmp["id"][$i]]->date) //||!$row['id1']) { $this->__log("Temporary set now to date to object with id {$tmp['id'][$i]}. Maybe new or updated, date should parse inside of getting item."); $items[$tmp["id"][$i]]->date = $this->GetNowDateString("Y-m-d H:i").':00'; } } } } if($k == $this->numUncorrectObjOnPage ) { $this->isCorrectedObjects = false; $this->__log("Another cities objects"); } else $this->isCorrectedObjects = true; return $items; } function GetMoscowNowDateString($dateFormatString,$intervalToChangeInSeconds = null) { return $this->GetNowDateString($dateFormatString,'Europe/Moscow',$intervalToChangeInSeconds); } function GetNowDateString($dateFormatString,$timeZoneString = null,$intervalToChangeInSeconds = null) { if($timeZoneString) $dateNow = new DateTime('now', new DateTimeZone($timeZoneString)); else $dateNow = new DateTime('now'); if($intervalToChangeInSeconds) date_add($dateNow, date_interval_create_from_date_string("{$intervalToChangeInSeconds} seconds")); return $dateNow->format($dateFormatString); } private function getAgenciesFromDB() { $tmp = (mysql_query("SELECT `name_agency` FROM grabber.agencies;")); while($row = mysql_fetch_array($tmp)) { $rows [] = strtolower($row[0]); } return $rows; } function check_in_range($start_date, $end_date, $date_from_user) { // Convert to timestamp $start_ts = strtotime($start_date); $end_ts = strtotime($end_date); $user_ts = strtotime($date_from_user); // Check that user date is between start & end return (($user_ts >= $start_ts) && ($user_ts <= $end_ts)); } public function GetPhone($url) { //var_dump($url); sleep (mt_rand(8, 10)); $tmp = $this->get_url($url,true); var_dump($this->name); if ($this->name == null) { //$item->seller = ); //var_dump($tmp[1][0]); //$item->seller = $tmp[1][0]; preg_match('/sellerName\s*\"\s*:\s*\"([^\"]*)\"/ism', $tmp, $seller); $this->name = $seller[1]; var_dump("МЯ ВНУТРИ ОБЪЯВЛЕНИЯ"); var_dump($this->name); /*if(in_array(strtolower(trim($tmp['seller'][1])), $this->agency_list)) { var_dump($tmp['seller'][1]."Sovpadenit"); continue ; } else { var_dump($tmp['seller'][1]."!!!!"); } */ } //else // { // $item->seller = $this->name; // } // var_dump("УЖЕ В GETITEM". $item->seller); var_dump($tmp); if (preg_match('/скоро телефон заменится/', $tmp)) { $this->hidden = 1; var_dump("В GetPhone". $this->hidden); } elseif (preg_match('/не\s*будут\s*доставлены/ism', $tmp)) { $this->hidden = 1; var_dump("В GetPhone". $this->hidden); } if (preg_match('/{"error":"phone_hidden","image64":""}/ism', $tmp)){ return "Нет телефона"; } $js = json_decode($tmp); $data = $js->image64; list($type, $data) = explode(';', $data); list(, $data) = explode(',', $data); $data = base64_decode($data); $rand = mt_rand(10000000,99999999); file_put_contents($rand, $data); $cmd = '"C:\Program Files\Tesseract-OCR\tesseract.exe" '.$rand.' '.$rand.' -l eng'; shell_exec($cmd); $phone = file_get_contents($rand.'.txt'); //$cmd = "{$_ENV[ROOT]}/localimg2phone.sh \"{$rand}\""; //$phone = shell_exec($cmd); //$this->__log("shell_exec: " . $cmd . " -> " . $phone); $phone = preg_replace('/\D+/isu', '', $phone); unlink($rand); unlink($rand.'.txt'); $this->__log("Phone parsed: ".$phone); return $phone; // $url = "https://m.avito.ru{$url}"; // if(preg_match("/item-contact-bar/", $tmp)) { // if(preg_match("/\"tel[:]([^\"]+)\"/", $tmp, $tmp)) { // return preg_replace("/\D+/", "", $tmp[1]); // } // else { // $this->__log("pseudo-number"); // return "7111111111"; // } // } } //функция получения обеъкта со страниц контента public function getItem($item, $n = 0) { sleep (mt_rand(8, 10)); if($n){ $err = "getItem({$item->id}) - error receiving data"; $this->update_proxy(1, -1, $err); if($n > $this->fails) return $this->__log($err); } # check item /*if(isset($this->olditems[$item->id])) { $this->updateItem($item); # price, images (not checked only!) return $item; }*/ $data = $this->get_url($item->url, true); // file_put_contents('avito.txt', $data); // die(); /*$file = "{$_ENV[ROOT]}/{$this->site->code}-{$item->id}.html"; if(file_exists($file)) $data = file_get_contents($file); else { $data = $this->get_url($item->url, false); file_put_contents($file, $data); }*/ // $r = preg_match("/\bavito\.item\.phone\s*=\s*['\"]([^'\"]+)['\"]/ismU", $data, $tmp); // $r = preg_match("/item\-phone/ismU", $data, $tmp); // if(!$r) return $this->getItem($item, $n + 1); $r = preg_match("/\<meta[^>]*property\=\"og\:url\"[^>]*content=\"(?'url'[^\"]*)\"/ismU", $data, $tmp); if(!$r || !$tmp['url']){ return $this->getItem($item, $n + 1); } if(preg_match('/(\d+)$/', $item->url, $ourUrlNumbers) && preg_match('/(\d+)$/', $tmp['url'], $theirUrlNumbers) && $ourUrlNumbers[0] != $theirUrlNumbers[0]){ return $this->getItem($item, $n + 1); } if(preg_match('/Такой страницы на нашем сайте нет/',$data)){ $this->__log('Avito: Такой страницы на нашем сайте нет'); return $this->getItem($item, $n + 1); } $r2 = preg_match("/>(?<date>[^\"]*)\ в\ (?<time>\d\d:\d\d)/ismU", $data, $tmp2); if($r2){ $item->date = $this->get_date(trim($tmp2["date"]), trim($tmp2["time"])); } # <get_phone> // $pkey = $this->get_pkey($item->id, $tmp[1]); // Changes every hour // list($item->phone) = mysql_fetch_assoc(mysql_query("SELECT `phone` FROM `avito_phone_cache` WHERE `pkey` = '" . mysql_real_escape_string($pkey) . "' AND `date` = CURDATE() AND `hour` = HOUR(NOW())")); // if(!preg_match("/Показать телефон/ismU", $data)){ // $item->phone = "Нет телефона"; // } if(!$item->phone) { sleep (mt_rand(3, 5)); // $url = "https://www.avito.ru/items/phone/{$item->id}?pkey={$pkey}&vsrc=r"; $url = "https://www.avito.ru/web/1/items/phone/{$item->id}?&vsrc=r"; //$mp=$this->get_url($url,true); $tries = 0; if ($item->seller =='') { $this->name =null; } else { $this->name = $item->seller; $this->hidden = 1; } $phone = $this->GetPhone($url); $item->seller = $this->name; $this->name = null; // preg_match('/{"error":"phone_hidden","image64":""}/ism', $mp); while(!$phone && $tries < 21) { $phone = $this->GetPhone($url); $tries += 1; } $item->phone = $phone; if(!$item->phone) { $this->__log("error receiving phone"); $item->phone="Нет телефона"; // return null; } if($item->phone && mb_strlen($item->phone) <= 7) $item->phone = $this->cities[$this->city]["code"] . $item->phone; // mysql_query("INSERT INTO `avito_phone_cache` (`pkey`,`phone`,`date`,`hour`) VALUES('" . mysql_real_escape_string($pkey) . "','" . mysql_real_escape_string($item->phone) . "',CURDATE(), HOUR(NOW()))"); } # </get_phone> # <get_object_type> if(preg_match("/<div\b[^>]*\bclass=\"[^\"]*\bbreadcrumbs-links\b[^\"]*\"[^>]*>(.*)<\/div>/ismU",$data,$tmp)){ preg_match_all("/href=\"([^\"]+)\"/", $tmp[1], $tmp); $tmp = array_pop($tmp[1]); if(preg_match("/\/(vtorichka|novostroyka)\//", $tmp, $_)) $item->object_type = $_[1]; else{ $item->object_type = array_pop(explode("/", $tmp)); if(!$this->objects[$item->object_type]) $item->object_type = "komnaty"; if($item->object_type == "dacha") $item->category = "zemelnye_uchastki"; } } # </get_object_type> # <get_title> if (preg_match("/<h1\b[^>]*\bclass=\"[^\"]*\bh1\b[^\"]*\"[^>]*>(.*)<\/h1>/ismU", $data, $tmp)) { $item->title = $tmp[1]; } if(preg_match('/^Дача\b/isu', $item->title)) { $item->category = "zemelnye_uchastki"; } # </get_title> # <get_description> preg_match("/<div\b[^>]*\bitemprop=\"description\"[^>]*>(.*)<\/div>/ismU", $data, $tmp); var_dump("В GetItem". $this->hidden); if ($this->hidden == 1) { $item->description = "Номер продавца защищён Avito\r\n".trim(strip_tags($tmp[1])); $item->avitoProtected = true; mysql_query("REPLACE INTO `grabber`.`avito_protected_phones` (`phone`) VALUES('" . mysql_real_escape_string($item->phone) . "');"); } else { if ($item->description == 'Профиль защищён') { $item->description .= trim(strip_tags($tmp[1])); mysql_query("REPLACE INTO `grabber`.`avito_protected_phones` (`phone`) VALUES('" . mysql_real_escape_string($item->phone) . "');"); } elseif (preg_match("/(не будут доставлены|временный номер|телефон заменится|Номер продавца защищён|Номер агентства защищён|Номер защищён)/ismU",$data,$avitoProtected)) { $item->description = "Номер продавца защищён Avito\r\n".trim(strip_tags($tmp[1])); $item->avitoProtected = true; mysql_query("REPLACE INTO `grabber`.`avito_protected_phones` (`phone`) VALUES('" . mysql_real_escape_string($item->phone) . "');"); } else { $item->description = trim(strip_tags($tmp[1])); $item->avitoProtected = false; } } # </get_description> # <get_profile_info> //preg_match_all("/item\"[^\"]\"summary[^:]*:\"([^\s|\"]*)/ism", $data, $tmp); //$item->profile_object_current=$tmp[1][0]; //preg_match_all("/Завершено[^0-9]*([0-9]*)/ism", $data, $tmp); //$item->profile_object_done=$tmp[1][0]; //preg_match_all("/class=\"seller-info-avatar-image[^\"]*\"[^\"]*\"([^\"]*)/ism", $data, $tmp); //$item->profile_url=$tmp[1][0]; //print_r ("Текущие:" .$item->profile_object_current); //print_r ("Завершенные:" .$item->profile_object_done); //print_r ("Ссылка на профиль:" .$item->profile_url); # </get_profile_info> # <get_params> // preg_match_all("/<div\b[^>]*\bclass=\"[^\"]*\bitem-params\b[^\"]*\"[^>]*>.*<\/div>/ismU", $data, $tmp); //preg_match_all("/<div\b[^>]*\bclass=\"[^\"]*\bitem-params\b[^<]*<[\s\S]*<\/div>/ism", $data, $tmp); //$tmp=preg_match_all("/<div\b[^>]*\bclass=\"[^\"]*\bitem-params\b[^<]*<[\s\S]*<\/div>/ismU", $data); //print_r($tmp); //var_dump("RAW PARAMS " . $tmp[0]); //print_r($tmp); // $item->raw_params = implode("\n", $tmp[0]); //print_r($item->raw_params); // $r = preg_match_all('@<li\b\s+\bclass="[^"]*\bitem-params-list-item\b[^"]*"[^>]*>\s*<span\b\s+\bclass="[^"]*\bitem-params-label\b[^"]*"[^>]*>(?<key>.*)</span>\s*(?<value>.*)</li>@isU', $item->raw_params, $tmp); //print_r($r); preg_match_all('/[^\-]*\-params\-title[^<]*<[^<]*<ul\s+class=\"[^\-]*\-params[^<]*(?<param>.*)/ism', $data, $_tmp); $r = preg_match_all('/<li\s*class="[^\-]*\-params.*<span\s+class=\"[^>]*>(?<key>[^:]*:)[^<]*<\/span>(?<value>[^<]*)<\/li>/isU', $_tmp['param'][0], $tmp); //var_dump($tmp); //var_dump(count($tmp)); //var_dump($tmp['value']); $params = []; $item->params = ""; for($i = 0; $i < count($tmp); $i++) { $tmp['key'][$i] = str_replace('<!-- -->', '',$tmp['key'][$i] ); $k = preg_replace('@^\s+|:\s*$@is', '', $tmp['key'][$i]); var_dump($tmp['key'][$i]); $v = trim($tmp['value'][$i]); //print_r($v); $params[$k] = $v; $item->params .= $k . ': ' . $v . '; '; } // $r = preg_match_all('@<div\b[^>+]\bclass="advanced-params-param-title"[^>]*>(?<key>.*)</div>\s*<ul\b[^>]+\bclass="advanced-params-param-list"[^>]*>(?<value>.*)</ul>@isU', $data, $tmp); // for($i = 0; $i < $r; $i++) { // $tmp['key'][$i] = str_replace('<!-- -->', '',$tmp['key'][$i] ); // $k = preg_replace('@^s+|:\s*$@is', '', $tmp['key'][$i]); // $v = trim(strip_tags(preg_replace('@(</li>)\s*(<li)@is','$1, $2',$tmp['value'][$i]))); // $params[$k] = $v; // $item->params .= $k . ': ' . $v . '; '; //} $item->rooms = intval($params['Количество комнат']); if(!$item->rooms) { if($this->category == "komnaty") $item->rooms = 1; elseif(preg_match("/\b(\d+)\-к\s+квартира\b/isu", $item->title, $tmp)) $item->rooms = intval($tmp[1]); elseif(preg_match("/\bстудия\b/isu", $item->title)) $item->rooms = 1; else $item->rooms = 0; } foreach (['Площадь', 'Площадь дома'] as $_) { if($params[$_]) { $item->s = __floatval($params[$_]); break; } } if(!$item->s) { if(preg_match("/\b(\d+(?:[.,]\d+)?)\s*м/isu", $item->title, $tmp)) { $item->s = __floatval($tmp[1]); } } foreach (['Тип дома', 'Материал стен'] as $_) { if($params[$_]) { $item->material = $params[$_]; break; } } //$r=str_replace('из', '',$params['Этаж']); $tmp=explode("из", $params["Этаж"]); print_r("______{$tmp[0]}__________{$tmp[1]}_____________"); $item->level = intval($tmp[0]); $item->levels = intval($tmp[1]); // $item->level = intval($params['Этаж']); // $item->levels = intval($params['Этажей в доме']); $item->land = __floatval($params['Площадь участка']); if(!$item->land && preg_match("/\bучаст\S+\s+([\d.,]+)\s+сот\./isu", $item->title, $tmp)) { $item->land = __floatval($tmp[1]); } $item->distance = $params['Расстояние до города']; # </get_params> # <get_seller> //preg_match("/<span class=\"[^\"]*seller[^\"]*\" title=\"([^\"]*)\">/ismU", $data, $tmp); //preg_match_all("/js-seller-info-name\">[^<]*<a href[^>]+>([^<]+)<\/a>/", $data, $tmp); //if () //$item->seller =trim(strip_tags($tmp[1][0])); //$item->seller = trim(strip_tags($tmp[1])); //$item->seller = trim(strip_tags($this->name)); //var_dump($item->seller); // $this->__log("allo ".$item->seller."\n"); //$item->seller = preg_replace('@\s*на Avito c\b.*$@isu', '', $item->seller); # </get_seller> # <get_address> if(preg_match("/<span[^>]+(itemprop|class)=\"[^\"]*address[^\"]*\"[^>]*>(.*)<\/span>/ismU",$data,$tmp)){ $item->address = trim($tmp[2]); }else if(preg_match("/<span\b[^>]*\bitemprop=\"address\"[^>]*>(.*)<span\b[^>]*\bitemprop=\"streetAddress\"[^>]*>(.*)<\/span>.*<\/div>/ismU", $data, $tmp)){ $item->address = trim($tmp[2]); $item->raion = preg_match("/metro/i", $tmp[1]) ? "ст.м. " : ""; $item->raion .= trim(strip_tags($tmp[1])); $item->raion = preg_replace("/[\W]$/", "", $item->raion); } else{ preg_match("/<div\b[^>]*\bid=\"map\"[^>]*>(.*)<\/div>/ismU", $data, $tmp); $item->raion = preg_match("/metro/i", $tmp[1]) ? "ст.м. " : ""; $tmp[1] = preg_replace("/<span\b[^>]*\bitemprop=\"name\"[^>]*>.*<\/span>/ismU", "", $tmp[1]); $item->raion .= preg_replace("/^[\W]\s*/", "", trim(strip_tags($tmp[1]))); } /* var_dump("Address " . $item->address); var_dump("raion" . $item->raion); var_dump("title" . $item->title); var_dump("Rooms" . $item->rooms); var_dump("levels" . $item->level . " / " . $item->levels);*/ # </get_address> /* # <get_images> $item->images = []; $r = preg_match_all('@<div\b(?=[^>]*\bclass="[^"]*\bgallery-img-frame\b[^"]*")(?=[^>]*\bdata-url="(.*)")[^>]*>@isU', $data, $tmp); for($i = 0; $i < $r; $i++) { $item->images[] = $tmp[1][$i]; } $item->images = implode("\n", $item->images); # </get_images> $item->display = 1; unset($item->avitoProtected); // $this->saveItem($item); isset($this->olditems[$item->id]) ? $this->updateItem($item) : $this->saveItem($item); return $item; } */ # <get_images> $item->images = []; $r = preg_match('@<div\b(?=[^>]*\bclass="[^"]*\bgallery-img-frame\b[^"]*")(?=[^>]*\bdata-url="(.*)")[^>]*>@ismU', $data); if ($r==0) { var_dump("PIOIOOO"); $tmp = null; $r = preg_match_all('/640x480":"(?<image>https:[^&]*)/ism', $data, $tmp); var_dump($r); $r=$r/2; for($i = 0; $i < $r; $i++) { $tmp[1][$i] = str_replace("\\", "", $tmp["image"][$i]); //var_dump($tmp[1][$i]); } } else { $r = preg_match_all('@<div\b(?=[^>]*\bclass="[^"]*\bgallery-img-frame\b[^"]*")(?=[^>]*\bdata-url="(.*)")[^>]*>@ismU', $data, $tmp); } //print_r("ФОТО ".$r); for($i = 0; $i < $r; $i++) { $item->images[] = $tmp[1][$i]; var_dump($tmp[1][$i]); } $item->images = implode("\n", $item->images); # </get_images> var_dump($item->images); $item->display = 1; unset($item->avitoProtected); // $this->saveItem($item); isset($this->olditems[$item->id]) ? $this->updateItem($item) : $this->saveItem($item); return $item; } public function get_pkey($id, $t) { $t = preg_split("/[^0-9a-f]+/",$t); $t = implode("",$id %2 ? $t : array_reverse($t)); $t = str_split($t); for($i = 0; $i < count($t); ++$i) if($i % 3 === 0) $r .= $t[$i]; return $r; } public function create_url($urlsfx, $p = 1) { $url = "https://{$this->site->host}/{$this->city}/{$this->category}/{$this->deal_type}{$urlsfx}"; if($p > 1) $url .= "&p={$p}"; return $url; } /* public function checkItem($item) { if($item->date <= $this->datelimit) return 0; $olditem = mysql_fetch_object(mysql_query("SELECT * FROM objects WHERE site_id = '{$this->site->id}' AND id = '{$item->id}'")); if( !$olditem ) return 1; if( !$olditem->display || $olditem->is_agency == 1 || $item->date <= $olditem->date || $olditem->category != "kvartiry" && $olditem->category != "komnaty" ) return 0; $this->olditems[$item->id] = $olditem; return 1; } */ /* public function updateItem($item) { $olditem = $this->olditems[$item->id]; if( !$olditem || !$olditem->display || $olditem->is_agency == 1 || $olditem->date >= $item->date ) return; if($olditem->checked && $olditem->price == $item->price){ $sql = "UPDATE objects SET date = '{$item->date}' WHERE object_id = '{$olditem->object_id}'"; mysql_query($sql) or $this->error(mysql_error()); return; } $sql = "UPDATE objects SET date = '{$item->date}', date_add = NOW()"; if($olditem->price != $item->price) $sql .= ", price = '{$item->price}'"; if($olditem->checked) $sql .= ", checked = 2"; $sql .= " WHERE object_id = '{$olditem->object_id}'"; mysql_query($sql) or $this->error(mysql_error()); if($olditem->category != "kvartiry" && $olditem->category != "komnaty") return; $link = mysql_fetch_object(mysql_query("SELECT * FROM objects_links WHERE (link_id, link_s) = ('{$olditem->link_id}', '{$olditem->link_s}')")); if( !$link || $link->is_agency == 1 || $link->date >= $item->date || # BUG: $link->checked != $olditem->checked (!!!) $link->checked && $olditem->price == $item->price ) return; $link->url = $item->url; if(!preg_match("/^https?\:\/\//is",$link->url)){ $host = $this->sites[$tmp["url"]]->host; $host = "http" . (preg_match("/[:]443$/",$host) ? "s" : "") . "://" . $host; $link->url = $host . $link->url; } $sql = "UPDATE objects_links SET date = '{$item->date}', date_add = NOW(), url = '" . mysql_real_escape_string($link->url) . "'"; if($olditem->price != $item->price){ $link->price = unserialize($link->price); unset($link->price[$olditem->s]); $link->price = [$olditem->s => [$item->price]] + $link->price; $sql .= ", price = '" . mysql_real_escape_string(serialize($link->price)) . "'"; } if($link->checked || $link->number !== "0") $sql .= ", checked = 2"; $sql .= " WHERE (link_id, link_s) = ('{$link->link_id}', '{$link->link_s}')"; mysql_query($sql) or $this->error(mysql_error()); $s = "UPDATED: [{$link->link_id}-{$link->link_s}] : {$olditem->date} --> {$item->date} : {$olditem->price} --> {$item->price}"; mysql_query("INSERT INTO objects_notes (link_id, link_s, note) VALUES('{$link->link_id}','{$link->link_s}','" . mysql_real_escape_string($s) . "') ON DUPLICATE KEY UPDATE note = TRIM(CONCAT(note, '" . mysql_real_escape_string("\n" . $s) . "'))"); $this->__log($s); } */ public function isValidItemHtml($html, $item) { return preg_match("/\bavito\.item\.phone\s*=\s*['\"]([^'\"]+)['\"]/ismU", $html); } public function cron() { mysql_query("DELETE FROM `avito_phone_cache` WHERE `date` < CURDATE() OR `date` = CURDATE() AND `hour` < HOUR(NOW())"); return parent::cron(); } public function get_proxy($proxy = "", $reset = 0) { var_dump("AVITO PROXY"); $proxy = $this->get_proxylist(); $this->__proxy = (object) [ip => "", port => "", login => "", pass => ""]; // $this->proxy = "10.0.0.1:8800"; $this->proxy = $proxy[mt_rand(0, count($proxy) - 1)]; list($this->__proxy->ip, $this->__proxy->port, $this->__proxy->login, $this->__proxy->pass) = explode(":", $this->proxy); $this->proxy = $this->__proxy->ip . ":" . $this->__proxy->port; return $this->proxy; } public function update_proxy($n, $status, $error = "") { //return false; var_dump("AVITO UPDATE PROXY"); $proxy = $this->get_proxylist(); $this->__proxy = (object) [ip => "", port => "", login => "", pass => ""]; // $this->proxy = "10.0.0.1:8800"; $this->proxy = $proxy[mt_rand(0, count($proxy) - 1)]; list($this->__proxy->ip, $this->__proxy->port, $this->__proxy->login, $this->__proxy->pass) = explode(":", $this->proxy); $this->proxy = $this->__proxy->ip . ":" . $this->__proxy->port; return $this->proxy; } }