session_end(); die($this->__log($err)); } public function __construct($site_id) { // $num = mysql_result(mysql_query("SELECT COUNT(*) FROM grabber.objects WHERE link_id = 0 AND link_s = 0 AND is_agency = 0 AND checked = 0 AND display = 0 AND category = 'kvartiry';"),0); // mysql_query("DELETE FROM grabber.objects WHERE link_id = 0 AND link_s = 0 AND is_agency = 0 AND checked = 0 AND display = 0 AND category = 'kvartiry';"); // $this->__log("\n\n=========\n INCORRECT ITEMS CLEARED: {$num}\n=========\n\n"); $res = mysql_fetch_object(mysql_query("SELECT value FROM oris.oris_conf WHERE name = 'GrabberFailsCount';")); $failsValueFromDb = $res->value; if($failsValueFromDb) $this->fails = $failsValueFromDb; $this->sites = []; $dbc = mysql_query("SELECT * FROM sites ORDER BY id"); while($rt = mysql_fetch_object($dbc)) $this->sites[$rt->id] = $rt; $site_id = (int) $site_id; $this->site = &$this->sites[$site_id]; if(!$this->site) $this->error("site (id = {$site_id}) not found"); if(!$this->site->is_active) $this->error("site «" . $this->site->name . "» (id = " . $this->site->id . ") disabled"); $this->site_id = $this->site->id; $this->cities = conf::$cities; $this->timeout = conf::$timeout; $this->mirrors = conf::$mirrors; $this->days = [ "вчера" => date("Y-m-d",time() - 86400), "сегодня" => date("Y-m-d"), ]; //$this->datelimit = date("Y-m-d", time() - 2 * 86400) . " 00:00:00"; $this->datelimit = date("Y-m-d", time() - 7 * 86400) . " 00:00:00"; $this->month = [ "01" => "января", "02" => "февраля", "03" => "марта", "04" => "апреля", "05" => "мая", "06" => "июня", "07" => "июля", "08" => "августа", "09" => "сентября", "10" => "октября", "11" => "ноября", "12" => "декабря", ]; $this->year = date("Y"); $this->dS = 0.1; $this->deals = [ prodam => 1, sdam => 2, ]; $this->categories = [ kvartiry => 1, # Квартиры komnaty => 2, # Комнаты doma_dachi_kottedzhi => 3, # Дома, дачи, коттеджи zemelnye_uchastki => 4, # Земельные участки kommercheskaya_nedvizhimost => 5, # Коммерческая недвижимость garazhi_i_mashinomesta => 6, # Гаражи и машиноместа nedvizhimost_za_rubezhom => 7, # Недвижимость за рубежом ]; // $this->__proxy = (object) [ip => "", port => ""]; // $this->proxy = "10.0.0.1:8800"; // list($this->__proxy->ip,$this->__proxy->port) = explode(":",$this->proxy); $this->cookies = "{$_ENV[ROOT]}/../logs/{$this->site->code}.cookie"; } public function update_proxy($n, $status = "", $error = "") { if(!$this->__proxy) return false; $status = mysql_real_escape_string($status); $error = mysql_real_escape_string($error); $sql = "UPDATE site_proxy SET date_check = NOW(), last_status = '{$status}', last_error = '{$error}', fails_counter = " . ($n ? "fails_counter + 1" : "0, success_counter = success_counter + 1, last_success = NOW()") . " WHERE site_id = '{$this->site->id}' AND proxy_id = '{$this->__proxy->id}'"; mysql_query($sql); } public function get_proxy($proxy = "", $reset = 0) { // $this->__proxy = (object) [ip => "", port => ""]; // $this->proxy = "10.0.0.1:8800"; // list($this->__proxy->ip,$this->__proxy->port) = explode(":",$this->proxy); // return $this->proxy; if($proxy){ $this->__proxy = mysql_fetch_object(mysql_query("SELECT * FROM proxy WHERE proxy = '" . mysql_real_escape_string($proxy) . "'")) or $this->error("proxy {$proxy} not found!"); } else { if($reset) mysql_query("UPDATE site_proxy SET fails_counter = 0, last_status = '' WHERE site_id = '{$this->site->id}' AND is_active"); $this->__proxy = mysql_fetch_object(mysql_query("SELECT p.* FROM proxy p INNER JOIN site_proxy s ON s.site_id = '{$this->site->id}' AND p.id = s.proxy_id WHERE s.is_active AND (s.last_status != '403' OR s.last_status = '403' AND s.date_check + INTERVAL 15 MINUTE < NOW()) ORDER BY RAND() LIMIT 1")); if(!$this->__proxy) return $reset ? $this->error("no active proxy in proxy-list!") : $this->get_proxy("", 1); } $this->proxy = $this->__proxy->proxy; list($this->__proxy->ip,$this->__proxy->port) = explode(":",$this->proxy); return $this->proxy; } public $proxylist; public function get_proxylist() { if(is_null($this->proxylist)) { $que = "SELECT value FROM oris.oris_conf WHERE name = 'ProxyListAvito';"; $result = mysql_result(mysql_query($que),0); if($result) $this->proxylist = split("\n", $result); else { $result = file_get_contents('https://my.virty.io/proxy_list/proxies.php?hash=19e43d5e1f5af6a6db7d6d9de7b377f3&type=http&format=format1'); $this->proxylist = split("\n", $result); } } return $this->proxylist; } public function get_date($date, $time = "") { if(!$time) $time = "00:00"; $time .= ":00"; $date = mb_strtolower(trim($date),"utf-8"); if($this->days[$date]) $date = $this->days[$date]; else{ preg_match("/^(\d*)(\D*)(\d*)$/i",$date,$date); if(!$date[3]) $date[3] = $this->year; elseif(mb_strlen($date[3],"utf-8") < 4) $date[3] = "20" . $date[3]; if(!isset($this->monthcache[$date[2]])){ $this->monthcache[$date[2]] = preg_replace("/\p{P}+$/","",trim($date[2])); foreach($this->month as $k => $v){ if(preg_match("/^" . preg_quote($this->monthcache[$date[2]],"/") . "/ui",$v)){ $this->monthcache[$date[2]] = $k; break; } } } if(mb_strlen($date[1],"utf-8") < 2) $date[1] = "0" . $date[1]; $date[2] = $this->monthcache[$date[2]]; if($date[2] > date("m") && $date[3] >= $this->year) $date[3] = $this->year - 1; $date = "{$date[3]}-{$date[2]}-{$date[1]}"; } return $date . " " . $time; } private function __get_url($url, $opts, $method) { sleep(4); $log = "get_url (oris-proxy): {$method} : {$url}"; $this->__log($log); $params = [ METHOD => $method, URL => $url, OPTS => serialize($opts), ]; $content = http_build_query($params); $context = stream_context_create([ http => [ "protocol_version" => 1.1, "method" => "POST", "timeout" => 60, "header" => [ "Content-Type: application/x-www-form-urlencoded", "Content-Length: " . strlen($content), "Connection: close", ], "content" => $content, ]]); $data = file_get_contents("https://office.oris-info.ru/proxy/index.php", false, $context); return $data; } public function get_url($url, $proxy = true, $opts = [], $n = 1) { if(!$url || !preg_match("/^https?[:]\/\//",$url)) return $this->error("get_url: incorrect url = {$url}"); if ($this->http_tunnel || !$proxy && $n > 2) { $this->http_tunnel = true; return $this->__get_url($url, $opts, "GET"); } if($n > $this->fails / 3) { if($proxy) $proxy = false; else{ $proxy = true; $n = 1; } } $log = "get_url: url = {$url}"; $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_HEADER, false); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch,CURLOPT_ENCODING , ""); curl_setopt($ch, CURLOPT_TIMEOUT, $this->timeout); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); curl_setopt($ch, CURLOPT_MAXREDIRS, 2); curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); //curl_setopt ($ch, CURLOPT_POST, 1); // curl_setopt($ch, CURLOPT_SSLVERSION, 2); if ($this->cookies) { curl_setopt($ch, CURLOPT_COOKIEFILE, $this->cookies); curl_setopt($ch, CURLOPT_COOKIEJAR, $this->cookies); } $headers = []; $headers = is_array($opts) ? $opts : [$opts]; $headers[] = "Accept-Language: ru-RU,ru;q=0.8,en-US;q=0.5,en;q=0.3"; $headers[] = "Cache-Control: max-age=0"; $headers[] = "Connection: keep-alive"; if(count(array_filter($headers, function($v) { return strpos($v, 'User-Agent') !== false; }))==0){ // $headers[] = "User-Agent: Mozilla/5.0 (Windows NT 5.1; rv:31.0) Gecko/20100101 Firefox/31.0"; $headers[] = "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.72 Safari/537.36"; } if ($proxy) { $this->get_proxy($proxy === true ? "" : $proxy); curl_setopt($ch, CURLOPT_PROXY, $this->proxy); if ($this->__proxy->pass) curl_setopt($ch, CURLOPT_PROXYUSERPWD, $this->__proxy->login . ":" . preg_replace("/\s/","",$this->__proxy->pass)); $log .= ", proxy = {$this->proxy}"; $this->__log('------------------------------------------------------------------------------------------------------------'); $this->__log($this->proxy); $this->__log($this->__proxy->login); $this->__log($this->__proxy->pass); $this->__log($this->__proxy->login . ":" . preg_replace("/\s/","",$this->__proxy->pass)); $this->__log('------------------------------------------------------------------------------------------------------------'); // $headers[] = "X-Forwarded-For: {$this->__proxy->ip}"; } else { $this->__proxy = null; $this->proxy = null; sleep(3); } $this->__log($log); curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); curl_setopt($ch, CURLINFO_HEADER_OUT, true); $this->__log('------------------------------------------------------------------------------------------------------------'); $this->__log(json_encode(curl_getinfo($ch))); $this->__log('------------------------------------------------------------------------------------------------------------'); $this->__log(json_encode(curl_getinfo($ch, CURLINFO_COOKIELIST))); $this->__log('------------------------------------------------------------------------------------------------------------'); $ss = curl_exec($ch); $er = curl_error($ch); $ci = curl_getinfo($ch); $this->__log('------------------------------------------------------------------------------------------------------------'); $this->__log(json_encode(curl_getinfo($ch))); $this->__log('------------------------------------------------------------------------------------------------------------'); curl_close($ch); $f = $er || $ci["http_code"] != 200; if ($f) { $this->__log("{$log} fails (err = {$er}, http_code = {$ci["http_code"]})"); return $proxy ? $this->get_url($url, false, $opts) : ""; } return $ss; } public function post_url($url, $proxy = true, $opts = [], $form = []) { //$ss = $this->__get_url($url, $opts, "POST"); //return $ss ? $ss : $this->error("post_url: oris-proxy-error"); if(!$url || !preg_match("/^https?[:]\/\//",$url)) return $this->error("post_url: incorrect url = {$url}"); $query = http_build_query($form); $log = "post_url: url = {$url}&{$query}"; $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_POST, 1); curl_setopt($ch, CURLOPT_POSTFIELDS, $query); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_TIMEOUT, $this->timeout); curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); if ($this->cookies) { curl_setopt($ch, CURLOPT_COOKIEFILE, $this->cookies); curl_setopt($ch, CURLOPT_COOKIEJAR, $this->cookies); } $headers = []; $headers = is_array($opts) ? $opts : [$opts]; $headers[] = "Accept-Language: ru-RU,ru;q=0.8,en-US;q=0.5,en;q=0.3"; $headers[] = "Cache-Control: max-age=0"; $headers[] = "Connection: keep-alive"; $headers[] = "User-Agent: Mozilla/5.0 (Windows NT 5.1; rv:31.0) Gecko/20100101 Firefox/31.0"; $headers[] = "Content-length: " . strlen($query); curl_setopt($ch, CURLOPT_PROXY, $this->proxy); $log .= ", proxy = {$this->proxy}"; $this->__log($log); curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); sleep(4); $ss = curl_exec($ch); $er = curl_error($ch); $ci = curl_getinfo($ch); curl_close($ch); $f = $er || $ci["http_code"] != 200; if ($f) { $this->__log("{$log} fails (err = {$er}, http_code = {$ci["http_code"]})"); return ""; } return $ss; } public function session_start() { } public function session_end() { @unlink("{$_ENV[ROOT]}/../pids/{$this->site->code}-{$_SERVER[argv][2]}-{$_SERVER[argv][3]}-{$_SERVER[argv][4]}.pid"); } abstract public function getPages($url, $n = 0); abstract public function getItems($url, $n = 0); abstract public function getItem($item, $n = 0); abstract public function create_url($urlsfx, $p = 1); final public function checkItem($item) { if($item->date && $item->date <= $this->datelimit) return 0; $olditem = mysql_fetch_object(mysql_query("SELECT * FROM objects WHERE site_id = '{$this->site->id}' AND id = '{$item->id}'")); if(!$this->check_olditems) return $olditem ? 0 : 1; if( !$olditem ) return 1; if( !$olditem->display || $olditem->is_agency == 1 || $item->date <= $olditem->date || $olditem->category != $this->category || $olditem->deal_type != $this->deal_type // || $olditem->category != "kvartiry" && $olditem->category != "komnaty" // && $olditem->category != "doma_dachi_kottedzhi" // && $olditem->category != "zemelnye_uchastki" ) return 0; $d1 = date_create($item->date); $d2 = date_create($olditem->date); $r = date_diff($d1, $d2); if(!$r->days) return 0; $olditem->date_diff = $r->days; $olditem->price = __floatval(preg_replace('/[^\d.,]+/', '', $olditem->price)); $this->olditems[$item->id] = $olditem; return 1; } public function saveItem($item) { if (isset($this->olditems[$item->id])) return $this->updateItem($item); $item->site_id = $this->site->id; $item->date_add = date("Y-m-d H:i:s"); if(!$item->city) $item->city = $this->city; if(!$item->deal_type) $item->deal_type = $this->deal_type; if(!$item->category) $item->category = $this->category; $name=[ 'АРЕВЕРА-Недвижимость', //'Компания «Этажи»', 'ГРАНТА-недвижимость', 'Lacrum-Недвижимость', //'Агентство недвижимости \"Астория\"', 'Паритет-инвест', 'Исполин Недвижимость', 'Красноярское Инвестиционное Агентство Недвижимости', //'Novostroy Invest эксперт на рынке Недвижимости Красноярска!', 'Century21Smart', //'Агентство недвижимости «Фрегат»', //'ООО \"Алекс\"', 'Центр обмена жилья', //'Агентство недвижимости \"СВОЁ\"', 'AN BROOKLIN', 'ООО ГАРАНТ', 'АНГОР', 'Агентство недвижимости и оценки \"Профессионал\"', 'АН ДОМ', 'Красноярский Центр Недвижимости', //'Центр Загородной недвижимости \"Кедр\"', 'Ремесленникъ', 'Перспектива24-Красноярск-Федеральный оператор недвижимости', //'АН \"СИТИ\"', //'АН \"Stolica-24\"', //'ООО \"ЦЕНТР ОФОРМЛЕНИЯ НЕДВИЖИМОСТИ\"', //'\"Делегат Групп\" недвижимость', //'АН \"Экспонента\"', 'АН Территория 24', 'АКТИВ НЕДВИЖИМОСТЬ', 'АН Городские районы', 'АН ИнвестКристал', 'АН Городские районы', 'ЖилДом - Недвижимость !', 'Новый Красноярск', 'АН СССР', 'Агентство Недвижимости Ангерона', 'Ankom-недвижимость', 'АКТИВ НЕДВИЖИМОСТЬ', 'Сибирский дом', //'Бюро Недвижимости \"МеЧтА\"', //'Авангард+', //'АН \"Вариант Центр\"', 'Риэлт Сибирь', //'агентство недвижимости \"Ермак\"', 'Группа компаний РиМиР', 'Дельта Гарант']; for ($i=0;$i>count($name); $i++ ) { if ($item->seller=== $name[$i]) { $item->display = 0; } } if($item->is_agency === -1){ $phoneNotProtectedAndInOrisAgenciesInLastFourMonths = !preg_match("/(Номер продавца защищён|Номер агентства защищён|Номер защищён)/ismU",$item->description) && mysql_fetch_object(mysql_query("SELECT COUNT(*) as cnt FROM oris.oris_agencies WHERE phone = '{$item->phone}' AND date_change > DATE_ADD(NOW(), INTERVAL -8 MONTH);"))->cnt > 0; if($phoneNotProtectedAndInOrisAgenciesInLastFourMonths){ $item->display = 0; } } $data = (array) $item; unset($data["city"]); unset($data["date_begin"]); unset($data["date_end"]); unset($data["proxy"]); unset($data["session_id"]); unset($data["raw_params"]); foreach ($data as &$_) $_ = mysql_real_escape_string($_); unset($_); # TODO: ON DUPLICATE KEY UPDATE ... $sql = "INSERT INTO objects (`" . implode("`, `", array_keys($data)) . "`) VALUES('" . implode("', '", $data) . "')"; mysql_query($sql) or $this->error(mysql_error()); if($item->display){ $item->object_id = mysql_insert_id(); if($item->phone > ""){ $tmp = explode("\n",preg_replace("/^7/sm","8",$item->phone)); $sql = "INSERT INTO objects_phones (object_id,phone) VALUES "; foreach($tmp as $_){ $_ = mysql_real_escape_string(trim($_)); $sql .= "('{$item->object_id}','{$_}'), "; } $sql = preg_replace("/,\s+$/","",$sql); $sql .= " ON DUPLICATE KEY UPDATE object_id = object_id"; mysql_query($sql) or $this->__log("saveItem(): " . mysql_error()); } $this->get_street($item); $this->get_house_number($item); $this->link_object($item); mysql_query("UPDATE objects SET link_id = '{$item->link_id}', link_s = '{$item->link_s}', street_id = '{$item->street_id}', streets = '{$item->streets}', house_number = '{$item->house_number}' WHERE object_id = '{$item->object_id}'"); mysql_query("UPDATE objects_phones SET link_id = '{$item->link_id}' WHERE object_id = '{$item->object_id}'"); } } public function updateItem($item) { $olditem = $this->olditems[$item->id]; $item->site_id = $this->site->id; $item->s = number_format($item->s, 2, '.', ''); $date_add = date("Y-m-d H:i:s"); if(!$item->city) $item->city = $this->city; if(!$item->deal_type) $item->deal_type = $this->deal_type; if(!$item->category) $item->category = $this->category; unset($item->city); unset($item->date_begin); unset($item->date_end); unset($item->proxy); unset($item->session_id); unset($item->raw_params); $urlContainsCian = strpos($item->url, 'cian.ru') !== false; $urlContainsIrr = strpos($item->url, 'irr.ru') !== false; $excludeUpdate = $urlContainsCian || $urlContainsIrr; if($excludeUpdate){ return; } if($item->is_agency === -1){ $phoneNotProtectedAndInOrisAgenciesInLastFourMonths = !preg_match("/(Номер продавца защищён|Номер агентства защищён|Номер защищён)/ismU",$item->description) && mysql_fetch_object(mysql_query("SELECT COUNT(*) as cnt FROM oris.oris_agencies WHERE phone = '{$item->phone}' AND date_change > DATE_ADD(NOW(), INTERVAL -8 MONTH);"))->cnt > 0; if($phoneNotProtectedAndInOrisAgenciesInLastFourMonths){ $item->display = 0; } else{ $note = mysql_fetch_object(mysql_query("SELECT note as note FROM grabber.objects_notes WHERE (link_id, link_s) IN (SELECT link_id,link_s FROM grabber.objects WHERE object_id = '{$olditem->object_id}');"))->note; if($note && preg_match("/([Аа]гентство)/ismU",$note)){ $item->display = 0; } } } $cmpfields = ['date' => 0, 'price' => 0, 'phone' => 0, 'level' => 0, 'levels' => 0, 'rooms' => 0, 's' => 0, 'address' => 0]; $log = "UPDATED: [{$olditem->object_id}] : "; $sql = "UPDATE objects SET date_add = '{$date_add}'"; foreach((array) $item as $k => $v) { if(isset($cmpfields[$k])) { if($k == 'phone') { $t1 = explode("\n",preg_replace("/^7/sm","8", $item->phone)); $t2 = explode("\n",preg_replace("/^7/sm","8", $olditem->phone)); $d = array_diff($t1, $t2); $cmpfields[$k] = count($d); } else if($item->$k != $olditem->$k) { $log .= "{$k}: {$olditem->$k} --> {$item->$k}; "; $cmpfields[$k] = 1; } } else if($item->$k != $olditem->$k) { $log .= "{$k}; "; } $sql .= ", `{$k}` = '" . mysql_real_escape_string($v) . "'"; } $sql .= " WHERE object_id = '{$olditem->object_id}'"; mysql_query($sql) or $this->error(mysql_error()); $this->__log($log); if($item->display) { $item->date_add = $date_add; $item->object_id = $olditem->object_id; if($item->phone > ""){ $tmp = explode("\n",preg_replace("/^7/sm","8",$item->phone)); mysql_query("DELETE FROM objects_phones WHERE object_id = '{$item->object_id}'"); $sql = "INSERT INTO objects_phones (object_id,phone) VALUES "; foreach($tmp as $_){ $_ = mysql_real_escape_string(trim($_)); $sql .= "('{$item->object_id}','{$_}'), "; } $sql = preg_replace("/,\s+$/","",$sql); $sql .= " ON DUPLICATE KEY UPDATE object_id = object_id"; mysql_query($sql) or $this->__log("updateItem(): " . mysql_error()); } $this->get_street($item); $this->get_house_number($item); $this->link_object($item, $cmpfields); mysql_query("UPDATE objects SET link_id = '{$item->link_id}', link_s = '{$item->link_s}', street_id = '{$item->street_id}', streets = '{$item->streets}', house_number = '{$item->house_number}' WHERE object_id = '{$item->object_id}'"); mysql_query("UPDATE objects_phones SET link_id = '{$item->link_id}' WHERE object_id = '{$item->object_id}'"); mysql_query("UPDATE objects_links SET checked = '{$item->checked}' WHERE (link_id, link_s) = ('{$item->link_id}', '{$item->link_s}')") or die(mysql_error()); mysql_query("UPDATE objects SET checked = '{$item->checked}' WHERE (link_id, link_s) = ('{$item->link_id}', '{$item->link_s}')") or die(mysql_error()); mysql_query("INSERT INTO objects_notes (link_id, link_s, note) VALUES('{$item->link_id}','{$item->link_s}','" . mysql_real_escape_string($log) . "') ON DUPLICATE KEY UPDATE note = TRIM(CONCAT(note, '" . mysql_real_escape_string("\n" . $log) . "'))"); } } public function checkPhone($phones) { $arr = []; if (!is_array($phones)) $phones = [$phones]; foreach ($phones as $_) { $_ = trim($_); if ($_) $arr[] = $_; } if (!count($arr)) return 0; $n = mb_strlen(conf::$city->code, "utf-8") - 1; $code = mb_substr(conf::$city->code, 1, $n, "utf-8"); foreach ($arr as $phone){ /* $rt = mysql_fetch_object(mysql_query("SELECT * FROM phone_cache WHERE phone = '" . mysql_real_escape_string($phone) . "'")); if ($rt && $rt->is_agency) return 0; else if (!$rt) { $is_agency = 0; $url = "http://rent-scaner.ru/check-phone"; $data = $then->get_url($url); if ($data) { if (preg_match('@*\bname="csrf-token"])(?=[^>*\bcontent="(.*)"])[^>]*>@isU', $data, $tmp)) { $data = $this->post_url($url, false, [], ["_csrf" => $tmp[1], "CheckPhone[phone]" => $phone]); if ($data && preg_match()) } } mysql_query("INSERT INTO phone_cache (phone, is_agency) VALUES('" . mysql_real_escape_string($phone) . "', '{$is_agency}')"); if ($is_agency) return 0; } */ $phone = preg_replace("/^\+7/","7",$phone); $tmp = mysql_real_escape_string(preg_replace("/^[78]{$code}/","",$phone)); $phone = mysql_real_escape_string(preg_replace("/^\d/","_",$phone)); $n = mysql_result(mysql_query("SELECT COUNT(*) FROM oris.oris_agencies WHERE phone > '' AND !deleted AND (phone = '{$tmp}' OR phone LIKE '{$phone}')"),0); if($n) return 0; } return 1; } public function grab($city, $deal_type, $category) { if( !isset($this->cities[$city]) || !isset($this->deals[$deal_type]) || !isset($this->categories[$category]) || !isset($this->sections[$deal_type][$category]) ) { return false; } $this->city = $city; $this->deal_type = $deal_type; $this->category = $category; $this->session_start(); $this->items = []; $sections = is_array($this->sections[$deal_type][$category]) ? $this->sections[$deal_type][$category] : [$this->sections[$deal_type][$category]]; foreach($sections as $urlsfx){ $url = $this->create_url($urlsfx); $pages = $this->getPages($url); $limit = $this->limit; for ($p = $this->startpage; $p <= $pages; $p++){ $url = $this->create_url($urlsfx,$p); $items = $this->getItems($url); foreach($items as $item){ if(!$this->checkItem($item)) $limit--; else{ $limit = $this->limit; $this->items[$item->id] = $item; } if(!$limit) break 2; } } } $n = count($this->items); $this->__log("\n\n=========\n ITEMS FOUND: {$n}\n=========\n\n"); $this->items = array_reverse($this->items); foreach($this->items as $item){ $this->getItem($item) ? null : $this->saveUrl($item); } $this->session_end(); return true; } public function cron() { foreach($this->cities as $city => $city_id){ foreach($this->sections as $deal_type => $params){ foreach($params as $category => $v){ $logfile = "{$_ENV[ROOT]}/../logs/{$this->site->code}-{$city}-{$deal_type}-{$category}.log"; /* $pidfile = "{$_ENV[ROOT]}/../pids/{$this->site->code}-{$city}-{$deal_type}-{$category}.pid"; if (is_file($pidfile)) { $pid = file_get_contents($pidfile); @shell_exec("kill -9 {$pid} > /dev/null 2>&1"); } $cmd = "php {$_ENV[ROOT]}/grabber.php {$this->site->code} {$city} {$deal_type} {$category} >> {$logfile} 2>&1 & echo $!"; // echo "[",date("Y-M-d H:i:s"),"] ", $cmd ,"\n"; $pid = shell_exec($cmd); file_put_contents($pidfile,$pid); */ $cmd = "php {$_ENV[ROOT]}/grabber.php {$this->site->code} {$city} {$deal_type} {$category} >> {$logfile} 2>&1 &"; $cmd = "flock -n /tmp/{$this->site->code}-{$deal_type}-{$category}.flock -c \"{$cmd}\""; shell_exec($cmd); } } } } protected function __cron() { $logfile = "{$_ENV[ROOT]}/../logs/{$this->site->code}.log"; $cmd = "php {$_ENV[ROOT]}/grabber.php {$this->site->code} >> {$logfile} &"; $cmd = "flock -n /tmp/{$this->site->code}.flock -c \"{$cmd}\""; shell_exec($cmd); } public function get_street(&$item) { $item->street_id = 0; $item->streets = ""; $res = [street_id => &$item->street_id, streets => &$item->streets]; if(!$item->category) return $res; $re = "/\b" . preg_quote(conf::$city->name,"/") . "\b/ui"; $address = trim(preg_replace($re,"",$item->address)); if(mb_strlen($address,"utf-8") < 4) $address = preg_replace($re,"",$item->title); $address = preg_replace("/c/i","с",$address); $address = preg_replace("/ё/i","е",$address); $address = preg_replace("/\bлет.*\b/Uui","лет",$address); $address = preg_replace("/\bпереулок\b/Uui","пер",$address); $address = preg_replace("/\bпроспект\b/Uui","пр",$address); $address = preg_replace("/\bтракт\b/Uui","тр",$address); $address = preg_replace("/\b.*\bр(?:айо|\-о?)н\b/Uui","",$address); $address = preg_replace("/(\d+(?:\-я)?)/ui"," $1 ",$address); $address = mysql_real_escape_string($address); $__address = $item->address . PHP_EOL . $item->title; foreach (conf::$stopwords as $a => $id) { if(preg_match("/\b{$a}/isu", $__address)) { $item->street_id = $id; $item->streets = $id; return $res; } } $item->street_id = 0; $item->streets = ""; $re = "/\b(ново|старо|дальне|нижне|верхне|северо|юго|южно|западно|восточно|средне)(.)/ui"; $r = 0; if(preg_match($re,$address,$tmp)){ $r = 1; if(preg_match("/[^а-я]/ui",$tmp[2])){ $r = 2; $address = preg_replace("/\b{$tmp[1]}[^а-я]+/ui",$tmp[1],$address); } } $sql = "SELECT id, MATCH(name, prefix) AGAINST ('%s') AS rank FROM oris_streets WHERE !deleted HAVING rank > 0 ORDER BY rank DESC LIMIT 8"; $db = mysql_query(sprintf($sql,mysql_real_escape_string($address))); if(!mysql_num_rows($db) && $r == 2){ $address = preg_replace($re,"$1-$2",$address); $db = mysql_query(sprintf($sql,mysql_real_escape_string($address))); } $re = "/\b(?:прода.*|аренд.*|сда|сниму|квартир.*|комнат.*|адрес.*|цена|окн.*|лоджи.*|балкон|совм.*|разд.*|ремонт|договор|торг)\b/Uui"; if(!mysql_num_rows($db) && $item->description){ $tmp = preg_replace($re,"",$item->description); $db = mysql_query(sprintf($sql,mysql_real_escape_string($tmp))); } if(!mysql_num_rows($db)){ $address = preg_replace($re,"",$address); $tmp = preg_split("/[^а-я]+/ui",$address); $address = ""; foreach($tmp as $_) { $n = mb_strlen($_,"utf-8"); if($n >= 4) $address .= mb_substr($_,0,$n - ($n < 6 ? 1 : 2),"utf-8") . "* "; } $db = mysql_query("SELECT id, MATCH(name, prefix) AGAINST ('>" . mysql_real_escape_string($address) . "' IN BOOLEAN MODE) AS rank FROM oris_streets WHERE !deleted HAVING rank > 0 ORDER BY rank DESC LIMIT 8"); } $r = 0; while($_ = mysql_fetch_object($db)){ if(!$item->street_id){ $item->street_id = $_->id; $r = $_->rank; } // if($r != $_->rank) break; $item->streets .= $_->id . ","; } $item->streets = preg_replace("/,$/","",$item->streets); return $res; } public function get_house_number(&$item) { $item->house_number = ""; $item->address = trim($item->address); if(preg_match("/\D([1-9]\d*\D{0,6})$/iu",$item->address)){ preg_match_all("/\D(\d+)/iu",$item->address,$tmp,PREG_OFFSET_CAPTURE); $tmp = $tmp[1]; $e2 = array_pop($tmp); while($e1 = array_pop($tmp)){ if($e2[1] - $e1[1] > 6) break; $e2 = $e1; } $item->house_number = substr($item->address, $e2[1]); $item->house_number = preg_replace("/\\\/","/",$item->house_number); } $item->house_number = mysql_real_escape_string($item->house_number); } public function get_link_s($s) { return $s ? ceil(log($s/(1 - $this->dS)) / log((1 + $this->dS)/(1 - $this->dS))) : 0; } public static function colorize_phones($item) { $rx = "/^" . conf::$city->code . "/"; $tables = [ prodam => [ kvartiry => "oris_flat_sale", komnaty => "oris_flat_sale", doma_dachi_kottedzhi => "oris_house_sale", zemelnye_uchastki => "oris_ground_sale", kommercheskaya_nedvizhimost => "oris_nonres_sale", ], sdam => [ kvartiry => "oris_flat_rent", komnaty => "oris_flat_rent", doma_dachi_kottedzhi => "oris_house_rent", zemelnye_uchastki => "oris_ground_rent", kommercheskaya_nedvizhimost => "oris_nonres_rent", ] ]; $fields = ["phone_home", "phone_work", "phone_cell"]; $table = $tables[$item->deal_type][$item->category]; if (!$table) return; $phones = []; foreach(explode("\n", preg_replace("/^7/sm", "8", trim($item->phone))) as $_) { $_ = trim($_); if ($_ > "" && !isset($phones[$_])) { $phones[$_] = [mysql_real_escape_string($_)]; if (preg_match($rx, $_)) $phones[$_][] = mysql_real_escape_string(preg_replace($rx, "", $_)); } } $s = ""; foreach ($phones as &$_) { $s .= "'" . implode("','", $_) . "', "; $_ = ""; } unset($_); $s = preg_replace("/,\s+$/", "", $s); $sql = "SELECT t1.object_id, t1.phone_home, t1.phone_work, t1.phone_cell, t1.deleted FROM oris.oris_objects t1 INNER JOIN oris.{$table} t2 ON t1.object_id = t2.object_id WHERE t1.phone_home IN ({$s}) OR t1.phone_work IN ({$s}) OR t1.phone_cell IN ({$s})"; $dbc = mysql_query($sql); while ($_ = mysql_fetch_object($dbc)) { foreach($fields as $f) { $p = $_->$f; if (!$p) continue; if (mb_strlen($p, "utf-8") < 11) $p = conf::$city->code . $p; if (isset($phones[$p])) $phones[$p] = $phones[$p] === "" ? $_->deleted : $phones[$p] && $_->deleted; } } foreach ($phones as $phone => $status) { // print "$item->object_id; $phone -> '$status'\n"; mysql_query("UPDATE objects_phones SET status = '{$status}' WHERE object_id = {$item->object_id} AND phone = '{$phone}'"); } } # TODO: rename "link" to "group", more correct public function link_object(&$item, $update = false) { $item->link_s = $this->get_link_s($item->s); $item->link_id = $item->object_id; $UpdateActualDateQuery = "UPDATE oris.GrabberObjectLinks SET ActualDate = '".$item->date."' WHERE url = '".$item->url."';"; mysql_query($UpdateActualDateQuery) or $this->__log(mysql_error()); $res = [link_id => &$item->link_id, link_s => &$item->link_s]; // if (!$item->category || $item->category != "kvartiry" && $item->category != "komnaty") { // self::colorize_phones($item); // return $res; // } if($item->street_id && $item->phone != "Нет телефона"){ # TODO: review this # use link_s in search, make link_id as single key whitout link_s $filter = ""; if($item->category == 'doma_dachi_kottedzhi'){ $filter = " AND o.s IS NOT NULL AND o.s > 0 AND ABS(o.s - {$item->s}) < 1 "; }else if($item->category == 'zemelnye_uchastki'){ if($item->s && $item->s > 0){ $filter = " AND o.s IS NOT NULL AND o.s > 0 AND ABS(o.s - {$item->s}) < 1 "; }else if($item->land && $item->land > 0){ $filter = " AND o.land IS NOT NULL AND o.land > 0 AND ABS(o.land - {$item->land}) < 1 "; } else{ $filter = " AND 1!=1 "; } } $query = " SELECT o.link_id FROM objects o INNER JOIN objects_phones p ON o.object_id = p.object_id INNER JOIN objects_phones t ON p.phone = t.phone AND p.object_id != t.object_id WHERE o.display AND o.link_id AND t.object_id = '{$item->object_id}' AND o.category = '{$item->category}' AND o.deal_type = '{$item->deal_type}' AND o.street_id = '{$item->street_id}' AND o.level = '{$item->level}' AND o.rooms = '{$item->rooms}' {$filter} LIMIT 1; "; $tmp = mysql_fetch_assoc(mysql_query($query)); if($tmp["link_id"]) $item->link_id = $tmp["link_id"]; } $link = new stdClass(); foreach(["date","date_add","link_id","link_s","street_id","level","rooms","deal_type","category","is_agency"] as $f) $link->$f = $item->$f; $link->url = ""; $link->checked = 0; $link->number = "0"; $link->phone = []; $link->price = []; $link->images = []; $link->sites = []; $link->is_delayed = 0; $tmp = [ images => 0, url => 0, ]; $code = conf::$city->code; $re = "/^" . $code . "/"; $streets = explode(",", $item->streets); $streets = array_combine($streets, $streets); # TODO: review algorythm # select $link from `objects_links` first, then compare with $item (see previous todo) # so next loop can be removed # (???) races with other process -> rebuild `object_links` with another unique proccess/cron, NOT HERE!! -> so grabbers could work faster!! # store combined streets of linked objects in `objects_links` $dbc = mysql_query("SELECT * FROM objects WHERE object_id = '{$item->object_id}' OR link_id = '{$item->link_id}' AND link_s = '{$item->link_s}' ORDER BY date DESC, date_add DESC"); $link->count = mysql_num_rows($dbc); $checked = 0; while ($_ = mysql_fetch_object($dbc)) { foreach (["title","raion","address","distance","params","seller","is_agency"] as $f) if (mb_strlen($link->$f,"utf-8") < mb_strlen($_->$f,"utf-8")) $link->$f = $_->$f; if($_->date > $link->date) $link->date = $_->date; $link->sites[$_->site_id] = $this->sites[$_->site_id]->name; $_->phone = explode("\n",preg_replace("/^7/sm", "8", trim($_->phone))); foreach($_->phone as $f) { $f = trim($f); if($f > "" && !isset($link->phone[$f])){ $link->phone[$f] = [mysql_real_escape_string($f)]; if(preg_match($re, $f)) $link->phone[$f][] = mysql_real_escape_string(preg_replace($re, "", $f)); } } if(!count($link->images) && trim($_->images) > ""){ $_->images = preg_split("/(?:\r?\n){1,}/", trim($_->images)); $tmp["images"] = $_->site_id; $link->images = $_->images; } if(!$link->url && trim($_->url) > ""){ $tmp["url"] = $_->site_id; $link->url = trim($_->url); } if(!$link->description && trim($_->description) > "") { $link->description = trim($_->description); } if(!count($link->price)) $link->price = [$_->s => [$_->price]]; if($_->checked && !$checked) $checked = $_->checked; } # images $host = $this->sites[$tmp["images"]]->host; $host = "http" . (preg_match("/[:]443$/", $host) ? "s" : "") . "://" . $host; foreach($link->images as &$_){ if(!preg_match("/^https?\:\/\//is", $_)) $_ = $host . $_; } unset($_); $link->images = implode("\n", $link->images); # url if(!preg_match("/^https?\:\/\//is",$link->url)){ $host = $this->sites[$tmp["url"]]->host; $host = "http" . (preg_match("/[:]443$/",$host) ? "s" : "") . "://" . $host; $link->url = $host . $link->url; } $link->sites = implode(", ",$link->sites); # prices /*foreach($link->price as &$_) { $_ = array_values($_); sort($_, SORT_STRING); } unset($_); ksort($link->price, SORT_NUMERIC);*/ # phones $tmp = ""; foreach($link->phone as &$_){ $tmp .= "'" . implode("','",$_) . "', "; $_ = ""; } unset($_); $tmp = preg_replace("/,\s+$/","",$tmp); $allItemPhonesSqlInConcated = $tmp; $dbc = mysql_query(" SELECT oo.date_check, oo.object_id, oo.square, oo.rooms_count, oo.level, oo.price, oo.phone_home, oo.phone_work, oo.phone_cell, oo.deleted, COALESCE((SELECT ObjectId FROM oris.GrabberObjectLinks WHERE Url = '{$link->url}' LIMIT 1),'') = oo.object_id as haveSavedUrl, oo.street_id, oo.number, IF( ofs.object_id IS NULL AND ohs.object_id IS NULL AND ogs.object_id IS NULL AND ons.object_id IS NULL, IF(ofr.object_id IS NULL AND ohr.object_id IS NULL AND ogr.object_id IS NULL AND onr.object_id IS NULL, NULL, 'sdam'), 'prodam') AS deal_type, CASE WHEN ofs.object_id IS NOT NULL OR ofr.object_id IS NOT NULL AND LOWER((SELECT name FROM oris.oris_series_type s WHERE s.id = (SELECT series_id FROM oris.oris_flat WHERE object_id = oo.object_id))) NOT LIKE '%комната%' THEN 'kvartiry' WHEN ofs.object_id IS NOT NULL OR ofr.object_id IS NOT NULL AND LOWER((SELECT name FROM oris.oris_series_type s WHERE s.id = (SELECT series_id FROM oris.oris_flat WHERE object_id = oo.object_id))) LIKE '%комната%' THEN 'komnaty' WHEN ogs.object_id IS NOT NULL OR ogr.object_id IS NOT NULL THEN 'zemelnye_uchastki' WHEN ohs.object_id IS NOT NULL OR ohr.object_id IS NOT NULL THEN 'doma_dachi_kottedzhi' WHEN ons.object_id IS NOT NULL OR onr.object_id IS NOT NULL THEN 'kommercheskaya_nedvizhimost' END as category FROM oris.oris_objects oo LEFT JOIN oris.oris_flat_sale ofs ON oo.object_id = ofs.object_id LEFT JOIN oris.oris_flat_rent ofr ON oo.object_id = ofr.object_id LEFT JOIN oris.oris_house_sale ohs ON oo.object_id = ohs.object_id LEFT JOIN oris.oris_house_rent ohr ON oo.object_id = ohr.object_id LEFT JOIN oris.oris_ground_sale ogs ON oo.object_id = ogs.object_id LEFT JOIN oris.oris_ground_rent ogr ON oo.object_id = ogr.object_id LEFT JOIN oris.oris_nonres_sale ons ON oo.object_id = ons.object_id LEFT JOIN oris.oris_nonres_rent onr ON oo.object_id = onr.object_id WHERE oo.phone_home IN ({$tmp}) OR oo.phone_work IN ({$tmp}) OR oo.phone_cell IN ({$tmp}) OR oo.object_id = (SELECT ObjectId FROM oris.GrabberObjectLinks WHERE Url = '{$link->url}' LIMIT 1) ORDER BY oo.date_check DESC "); $tmp = ["phone_home", "phone_work", "phone_cell"]; $__cache = []; $__deal_type = []; $__category = []; unset($link->same, $link->archive, $__price); while($_ = mysql_fetch_object($dbc)){ $__deal_type[$_->deal_type] = 1; $__category[$_->category] = 1; if ((isset($streets[$_->street_id]) && $link->deal_type == $_->deal_type && $link->category == $_->category)) { $_->link_s = $this->get_link_s($_->square); if (!isset($link->same) || $link->same == 255) { $link->same = $_->deleted ? 255 : 1; } if (((is_null($link->level) || $link->level == $_->level) && (is_null($link->rooms) || $link->rooms == $_->rooms_count )&& ($link->link_s == $_->link_s || abs($item->s - $_->square) <= 1)) ) { $link->street_id = $_->street_id; $link->archive = isset($link->archive) ? $link->archive && $_->deleted : $_->deleted; if ($link->number === "0") $link->number = $_->number; if (!$_->deleted && !isset($__price)) $__price = $_->price; foreach($tmp as $f) { $f = $_->$f; if (!$f) continue; if (mb_strlen($f, "utf-8") < 11) $f = $code . $f; if (isset($link->phone[$f])) { $link->phone[$f] = $link->phone[$f] === "" ? $_->deleted : $link->phone[$f] && $_->deleted; $__cache[$f] = 1; } } continue; } } foreach($tmp as $f) { $f = $_->$f; if(!$f) continue; if(mb_strlen($f, "utf-8") < 11) $f = $code . $f; if(!isset($__cache[$f]) && isset($link->phone[$f])) $link->phone[$f] = $link->phone[$f] === "" ? $_->deleted : $link->phone[$f] && $_->deleted; } } $dbc = mysql_query("SELECT 1 as result FROM oris.GrabberObjectLinks WHERE Url LIKE '%{$item->url}%' AND ObjectId IN (SELECT object_id FROM oris.oris_objects WHERE !deleted) LIMIT 1;"); $haveSavedUrl = mysql_fetch_object($dbc); $haveSavedUrl = $haveSavedUrl->result; if($haveSavedUrl && strpos($item->url, 'avito') !== false) { $isPhoneAvitoBlocked = strpos($item->description, 'Номер продавца защищён Avito') !== false; $que = mysql_query(" SELECT SUM(b) as result FROM ( SELECT (SELECT COALESCE(phone_home,'') as phone FROM oris.oris_objects WHERE object_id = (SELECT ObjectId FROM oris.GrabberObjectLinks WHERE Url = '{$item->url}')) IN ({$allItemPhonesSqlInConcated}) as b UNION SELECT (SELECT COALESCE(phone_work,'') as phone FROM oris.oris_objects WHERE object_id = (SELECT ObjectId FROM oris.GrabberObjectLinks WHERE Url = '{$item->url}')) IN ({$allItemPhonesSqlInConcated}) as b UNION SELECT (SELECT COALESCE(phone_cell,'') as phone FROM oris.oris_objects WHERE object_id = (SELECT ObjectId FROM oris.GrabberObjectLinks WHERE Url = '{$item->url}')) IN ({$allItemPhonesSqlInConcated}) as b ) as l ;"); $isAnyPhoneInObject = mysql_fetch_object($que); $isAnyPhoneInObject = $isAnyPhoneInObject->result; $haveSavedUrl = $haveSavedUrl && ($isPhoneAvitoBlocked || (!$isPhoneAvitoBlocked && $isAnyPhoneInObject)); } # проверяем отсутствие телефона в категории + признак архива $__f = !isset($__deal_type[$link->deal_type]) || !isset($__category[$link->category]) || $link->archive; # проверяем наличие новых и архивных телефонов if (!$__f) foreach ($link->phone as $_) { if ($_ === "" || $_ === "1" || $_ === true) { $__f = 1; break; } } $update_backup = $update; # объявление - неархивный "полный дубль" без новых телефонов c ссылкой // if (!$__f && !$link->archive && $link->number !== "0" && isset($__price)) { if (!$__f && !$link->archive && ((($link->category == "komnaty" || $link->category == "kvartiry") && $link->number !== "0") || ($link->category !== "kvartiry" && $link->category !== "komnaty"))) { # сравниваем цены // $price = __floatval(current(current($link->price))); // $t = []; // $t[$price] = 1; // $t[$price / 1000] = 1; // $t[$price * 1000] = 1; // if(isset($t[$__price])) // $link->checked = 1; // else if($haveSavedUrl) $link->checked = 2; } else if (!$__f || ($haveSavedUrl)) { # повторное получение объявления сайта if ($update && isset($this->olditems[$item->id])) { $olditem = $this->olditems[$item->id]; $diff = ['sdam' => 30, 'prodam' => 90]; // if ($checked && ($olditem->date_diff >= $diff[$item->deal_type] || $update['price'])) { if ($checked && $olditem->date_diff && ($haveSavedUrl)) { $checked = 2; } unset($update['date'], $update['price'], $update['levels'], $update['s']); foreach($update as $_) { if($_) { $checked = 0; break; } } } $link->checked = $checked; } $que = mysql_query("SELECT checked FROM grabber.objects_links WHERE (link_id, link_s) = ('{$link->link_id}', '{$link->link_s}');"); $currentCheckedStatus = mysql_fetch_object($que); $currentCheckedStatus = $currentCheckedStatus->checked; $urlContainsCian = strpos($item->url, 'cian.ru') !== false; $urlContainsIrr = strpos($item->url, 'irr.ru') !== false; $urlContainsDomofond = strpos($item->url, 'domofond.ru') !== false; $excludeAutoUpdate = $urlContainsCian || $urlContainsIrr || $urlContainsDomofond; if(!$excludeAutoUpdate && $link->checked == 2 && $currentCheckedStatus != 2) { $doesOnlyDateOrPriceChanged = $update_backup['date'] || $update_backup['price']; unset($update_backup['date'], $update_backup['price']); foreach($update_backup as $_) { if($_) { $doesOnlyDateOrPriceChanged = 0; break; } } if($doesOnlyDateOrPriceChanged) { $que = mysql_query("SELECT ObjectId FROM oris.GrabberObjectLinks WHERE Url = '{$item->url}';"); $res = mysql_fetch_object($que); $objectId = $res->ObjectId; $que = mysql_query("SELECT date_check,IF(object_id IN ( SELECT object_id FROM oris.oris_flat_sale UNION ALL SELECT object_id FROM oris.oris_house_sale UNION ALL SELECT object_id FROM oris.oris_ground_sale UNION ALL SELECT object_id FROM oris.oris_nonres_sale ),price*1000,price) as price, is_on_handsafe,private_note FROM oris.oris_objects WHERE object_id = '{$objectId}';"); $res = mysql_fetch_object($que); $oldDateCheck = $res->date_check; $oldPrice = $res->price; $oldIsOnHandsafe = $res->is_on_handsafe; $oldPrivateNote = $res->private_note; $que = mysql_query("SELECT id FROM oris.oris_users_v3 WHERE login = 'Robot';"); $res = mysql_fetch_object($que); $robotUserId = $res->id; $que = mysql_query("SELECT NOW() as date2;"); $res = mysql_fetch_object($que); $fixedNow = $res->date2; if($objectId != null && $oldDateCheck != null && $oldPrice != null && $robotUserId != null && $fixedNow != null) { $currentPrice = __floatval(current(current($link->price))); $handsafeRegexReplace = "/((,)*( )*((н|Н)а задатке до)+( | )?([0-9]?[0-9]?(\ |\ )??((я|Я)нвар|(ф|Ф)еврал|(м|М)арт|(а|А)прел|(м|М)а|(и|И)юн|(и|И)юл|(а|А)август|(с|С)ентябр|(о|О)ктябр|(н|Н)оябр|(д|Д)екабр)(ь|я|а|й)?(\ |\ )?[0-9]?[0-9]?[0-9]?[0-9]?)?[\ \,]*\s*)|((,)*( )*((с|С)няли с продажи до)+( | )?([0-9]?[0-9]?(\ |\ )??((я|Я)нвар|(ф|Ф)еврал|(м|М)арт|(а|А)прел|(м|М)а|(и|И)юн|(и|И)юл|(а|А)август|(с|С)ентябр|(о|О)ктябр|(н|Н)оябр|(д|Д)екабр)(ь|я|а|й)?(\ |\ )?[0-9]?[0-9]?[0-9]?[0-9]?)?[\ \,]*\s*)|((,)*( )*((с|С)няли с продажи на неопределенный срок)+[\ \,]*\s*)|((,)*( )*((е|Е)сть потенциальный покупатель)+[\ \,]*\s*)/smi"; $changesToOrisObject = " date_check = '{$fixedNow}' "; $abortChanges = false; if($currentPrice != $oldPrice) { if(($link->deal_type == 'prodam' && abs($currentPrice - $oldPrice) <= 100000) || ($link->deal_type == 'sdam' && abs($currentPrice - $oldPrice) <= 1000)) { $priceToApply = $link->deal_type == 'prodam' ? $currentPrice / 1000 : $currentPrice; $oldPrice = $link->deal_type == 'prodam' ? $oldPrice / 1000 : $oldPrice; $changesToOrisObject = $changesToOrisObject . " , price = {$priceToApply} "; } else $abortChanges = true; } if($oldIsOnHandsafe == 1) { $changesToOrisObject = $changesToOrisObject . " , is_on_handsafe = 0 "; $newMemo = preg_replace($handsafeRegexReplace,'',$oldPrivateNote); if($newMemo !== $oldPrivateNote) { $changesToOrisObject = $changesToOrisObject . " , private_note = \"{$newMemo}\" "; } } if(!$abortChanges) { $this->__log($objectId); $this->__log($changesToOrisObject); $res = mysql_query("INSERT INTO `oris`.`oris_changes` (`object_id`, `date`, `table_name`, `user_id`, `flag`) VALUES ('{$objectId}', '{$fixedNow}', 'oris_objects', {$robotUserId}, 1);"); // $id = mysql_insert_id(); $res = mysql_query("UPDATE oris.oris_objects SET {$changesToOrisObject} WHERE object_id = '{$objectId}';"); $link->checked = 1; $res = mysql_query("UPDATE `oris`.`GrabberObjectLinks` SET `ActualDate` = NOW(), `DeleteDate` = NULL, `ExpiredDate` = NULL, `InspectorLastCheckDate` = NOW() WHERE `Url` = '{$item->url}';"); $que = mysql_query("SELECT Id FROM oris.GrabberObjectLinks WHERE url = '{$item->url}'"); $result = mysql_fetch_object($que); $urlId = $result->Id; $res = mysql_query("DELETE FROM oris.oris_incorrect_objects_request WHERE object_id = '{$objectId}';"); if($res && mysql_affected_rows() > 0) { $que = mysql_query("SELECT memo FROM oris.oris_objects WHERE object_id = '{$objectId}';"); $result = mysql_fetch_object($que); $memo = $result->memo; if(empty(memo) || is_null(memo)) $newMemo = 'Удалено из некорректного'; else $newMemo = $memo." Удалено из некорректного"; $changesToOrisObject = " memo = \"{$newMemo}\" "; $this->__log($objectId); $this->__log($changesToOrisObject); $res = mysql_query("INSERT INTO `oris`.`oris_changes` (`object_id`, `date`, `table_name`, `user_id`, `flag`) VALUES ('{$objectId}', '{$fixedNow}', 'oris_objects', {$robotUserId}, 1);"); $res = mysql_query("UPDATE oris.oris_objects SET {$changesToOrisObject} WHERE object_id = '{$objectId}';"); } $res = mysql_query("DELETE FROM oris.GrabberObjectLinkBadRequests WHERE GrabberObjectLinkId = {$urlId};"); } } } } if(!$update && $link->checked) { mysql_query("UPDATE objects SET checked = '{$link->checked}' WHERE object_id = '{$item->object_id}' OR (link_id, link_s) = ('{$item->link_id}', '{$item->link_s}')") or die(mysql_error()); } $item->checked = $link->checked; $link->phone = serialize($link->phone); $link->price = serialize($link->price); $data = (array) $link; unset($data["city"]); // unset($data["category"]); foreach ($data as &$_) $_ = mysql_real_escape_string($_); unset($_); $sql = "REPLACE INTO objects_links (`" . implode("`, `", array_keys($data)) . "`) VALUES('" . implode("', '", $data) . "')"; mysql_query($sql) or $this->error(mysql_error()); return $res; } public function link_unlinked(){ $this->__log('Started link_unlinked'); $dbc = mysql_query(" SELECT COUNT(*) as cnt FROM grabber.objects WHERE (link_id,link_s) NOT IN (SELECT link_id,link_s FROM grabber.objects_links) AND !(display = 0 AND checked = 0); "); $cnt = mysql_fetch_object($dbc); $this->__log(''); $this->__log('-------------------------------'); $this->__log("Founded unlinked items - {$cnt->cnt}"); $this->__log('-------------------------------'); $dbc = mysql_query(" SELECT * FROM grabber.objects WHERE (link_id,link_s) NOT IN (SELECT link_id,link_s FROM grabber.objects_links) AND !(display = 0 AND checked = 0); "); while($item = mysql_fetch_object($dbc)){ $this->__log("({$item->link_id},{$item->link_s}) - {$item->url}"); $this->link_object($item); mysql_query("UPDATE objects SET link_id = '{$item->link_id}', link_s = '{$item->link_s}', street_id = '{$item->street_id}', streets = '{$item->streets}', house_number = '{$item->house_number}' WHERE object_id = '{$item->object_id}'"); mysql_query("UPDATE objects_phones SET link_id = '{$item->link_id}' WHERE object_id = '{$item->object_id}'"); } } public function saveUrl($item) { $item->site_id = $this->site->id; $item->url = mysql_real_escape_string($item->url); if(!$item->city) $item->city = $this->city; if(!$item->deal_type) $item->deal_type = $this->deal_type; if(!$item->category) $item->category = $this->category; mysql_query("INSERT INTO objects_errors (site_id,id,url,date_add,date_check,fails,city,deal_type,category) VALUES('{$item->site_id}','{$item->id}','{$item->url}',NOW(),NOW(),0,'{$item->city}','{$item->deal_type}','{$item->category}') ON DUPLICATE KEY UPDATE fails = fails + 1, date_check = NOW()") or $this->__log("saveUrl(): " . mysql_error()); } public static function parse_level($s) { $result = []; $mathes = []; $count = 0; $patterns = [ # 2-х этажный '/\b(\d+)\D{0,4}\bэт(?:\b|аж)/ismu', # кол-во этажей 5 '/\bэтаж(?:ей?|ность)?\b\D{0,4}(\d+)\b/ismu', # Этаж 4 из 16, Этаж 4/17 '/\bэтаж\b\D{1,4}\b\d+\s*(?:\bиз\b|\/)\s*(\d+)\b/ismu' ]; foreach($patterns as $rx){ $r = preg_match_all($rx,$s,$tmp); if($r){ $mathes = array_merge($mathes, $tmp[1]); $count += $r; } } # 1-к вартиру, 6/18 30/12/7 if(!$count && $r = preg_match_all('/\b(\d+)[^\d\/]{0,3}\/[^\d\/]{0,3}(\d+)[^\d\/]{3,}/ismu',$s . "___",$tmp)){ for($i = 0; $i < $r; $i++){ if($tmp[1][$i] <= $tmp[2][$i]){ $mathes[] = $tmp[1][$i]; $count++; } } } return $count ? min($mathes) : 0; } public static function parse_levels($s) { $mathes = []; $count = 0; $patterns = [ # 2-х этажный/уровневый '/\b(\d+)\D{0,4}\b(?:эт(?:\b|аж)|уров)/ismu', # кол-во этажей/уровней 5; этажей в здании '/ \b(?:этаж(?:ей?|ность)?|уровн?е(?:нь|й|вый)?)\b (?:\s+в\s+(?:доме|здании|помещении|коттедже))? \D{0,4} (\d+)\b /ismux', # Этаж 4 из 16, Этаж 4/17 '/\bэтаж\b\D{1,4}\b\d+\s*(?:\bиз\b|\/)\s*(\d+)\b/ismu' ]; foreach($patterns as $rx){ $r = preg_match_all($rx,$s,$tmp); if($r){ $mathes = array_merge($mathes, $tmp[1]); $count += $r; } } # 1-к вартиру, 6/18 30/12/7 if(!$count && $r = preg_match_all('/\b(\d+)[^\d\/]{0,3}\/[^\d\/]{0,3}(\d+)[^\d\/]{3,}/ismu',$s . "___",$tmp)){ for($i = 0; $i < $r; $i++){ if($tmp[1][$i] <= $tmp[2][$i]){ $mathes[] = $tmp[2][$i]; $count++; } } } return $count ? max($mathes) : 0; } public static function parse_square($s) { $result = []; $mathes = []; $count = 0; $patterns = [ '/ \b(\d+(?:[,.]\d+)?) \s*\/\s* (?:\d+(?:[,.]\d+)?|-) \s*\/\s* (?:\d+(?:[,.]\d+)?|-)\b /ismx', '/ \b(\d+(?:[.,]\d+)?) \D{0,3} (?:кв[.\s]\s*м|м\s*[2²]\b|квадрат\w{0,2}) /ismux', '/ \bпл(?:ощадь)?\b (?:\s+(?:общая|жилая|дома|квартиры|комнаты|коттеджа|офиса|помещения|здания)\b)? [\s.,:;\(\)]+ (?:в\s*)? (?:кв[.\s]\s*м|м\s*[2²]\b) [\s.,:;\(\)]+ (\d+(?:[,.]\d+)?) /ismux', '/ \b(?:комнат[ау]|квартир[ау]|дом|помещение|офис)\b [\s.,:;\(\)]+ (\d+(?:[,.]\d+)?) \D{0,3}м /ismux', ]; foreach($patterns as $rx){ $r = preg_match_all($rx,$s,$tmp); if($r){ foreach($tmp[1] as &$_) $_ = __floatval($_); unset($_); $mathes = array_merge($mathes, $tmp[1]); $count += $r; } } return $count ? max($mathes) : 0; } public static function parse_land($s) { $result = []; $mathes = []; $count = 0; $patterns = [ '/\b(\d+(?:[,.]\d+)?)\b\D{0,4}\b(?:сот|га|гектар)/ismu', '/ \b(?:участ(?:ок|к[ае])|земл[иея]) [\s.,:;\(\)]+ (?:в\s*)? (?:сот(?:ок|ках)?|га|гектар(?:ах)?) [\s.,:;\(\)]+ (\d+(?:[,.]\d+)?) /ismux', ]; foreach($patterns as $rx){ $r = preg_match_all($rx,$s,$tmp); if($r){ foreach($tmp[1] as &$_) $_ = __floatval($_); unset($_); $mathes = array_merge($mathes, $tmp[1]); $count += $r; } } return $count ? max($mathes) : 0; } public static function parse_rooms($s, $c = "") { $arr = [ "одн" => 1, "дву" => 2, "тр" => 3, "четыр" => 4, "пяти" => 5, "шести" => 6, ]; if(preg_match('/ \b( (?:одн[уеё]|дву|тр[её]|четыр[её])шк | (?:одно?|двух?|тр[её]х?|четыр[её]х?|пяти?|шести?)\W{0,3}комн ) /ismxu', $s, $tmp)) foreach($arr as $k => $v) if(strpos($tmp[1],$k) !== false) return $v; if(preg_match('/\b(\d)\s*[\D\S]{0,3}\s*к(?:ом(?:н(?:ат\w{0,3})?)?)?\b/ismu', $s, $tmp)) return $tmp[1]; if(preg_match('/\bкомн\w{0,2}\D{0,3}(\d+)\D/ismu', $s, $tmp)) return $tmp[1]; return ($c == "kvartiry" || $c == "komnaty" ? 1 : 0); } public function isValidItemHtml($html, $item) { return $this->error("isValidItemHtml: pseudo abstract method, must be overriden"); } protected function get_mirror($deleteCurrent = false) { if(!count($this->mirrors)) return $this->error("get_mirror: mirror list is empty"); $mirror = $this->mirror ? next($this->mirrors) : current($this->mirrors); if(!$mirror){ reset($this->mirrors); $mirror = current($this->mirrors); } if($deleteCurrent){ $this->__log("deleting mirror: {$this->mirror}"); reset($this->mirrors); foreach($this->mirrors as $k => $v){ if($v == $this->mirror){ unset($this->mirrors[$k]); break; } } } $this->mirror = $mirror; return $this->mirror; } protected static function url_get_contents($url, $params = [], $method = "GET") { if($method == "GET") { if(is_array($params) && count($params)) $url = $url . "?" . http_build_query($params); echo "[" . date("Y-m-d H:i:s") . "] url_get_contents: {$url}\n"; $context = stream_context_create([ http => [ "protocol_version" => 1.1, "method" => "GET", "timeout" => 60, "header" => [ "Connection: close" ], ]]); } elseif($method == "POST") { echo "[" . date("Y-m-d H:i:s") . "] url_get_contents: {$url}\n" . print_r($params, 1); $content = http_build_query($params); $context = stream_context_create([ http => [ "protocol_version" => 1.1, "method" => "POST", "timeout" => 60, "header" => [ "Content-Type: application/x-www-form-urlencoded", "Content-Length: " . strlen($content), "Connection: close", ], "content" => $content, ]]); } else { die ("[" . date("Y-m-d H:i:s") . "] Unknown request method: \"{$method}\"\n"); } $data = file_get_contents($url, false, $context); sleep(3); return $data; } protected function mirror_get_contents($params, $method = "GET") { $this->get_mirror(); if(!is_array($params)) { if(preg_match("@^\d+$@", $params)) $params = [item_id => $params]; else if(preg_match("@^https?://@is", $params)) $params = [url => $params]; else $params = []; } $data = @gzdecode(self::url_get_contents($this->mirror, $params, $method)); if(!$data) { $this->__log($this->mirror . " failed, trying next..."); $this->get_mirror(true); return $this->mirror_get_contents($params, $method); } return $data; } protected function get_url_head($url, $proxy = false, $opts = []) { $headers = []; $headers = is_array($opts) ? $opts : [$opts]; $headers[] = "Accept-Language: ru-RU,ru;q=0.8,en-US;q=0.5,en;q=0.3"; $headers[] = "Cache-Control: max-age=0"; $headers[] = "Connection: keep-alive"; $headers[] = "User-Agent: Mozilla/5.0 (Windows NT 5.1; rv:31.0) Gecko/20100101 Firefox/31.0"; $curl = curl_init(); if($proxy){ $this->get_proxy($proxy === true ? "" : $proxy); curl_setopt($curl, CURLOPT_PROXY, $this->proxy); if ($this->__proxy->pass) curl_setopt($curl, CURLOPT_PROXYUSERPWD, $this->__proxy->login . ":" . preg_replace("/\s/","",$this->__proxy->pass)); // $headers[] = "X-Forwarded-For: {$this->__proxy->ip}"; } else{ $this->__proxy = null; $this->proxy = null; sleep(3); } curl_setopt($curl, CURLOPT_HTTPHEADER, $headers); curl_setopt($curl, CURLOPT_TIMEOUT, $this->timeout); curl_setopt($curl, CURLOPT_URL, $url); curl_setopt($curl, CURLOPT_NOBODY, true); curl_setopt($curl, CURLOPT_RETURNTRANSFER, true); curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false); curl_exec($curl); $info = curl_getinfo($curl); curl_close($curl); return $info; } public function setSiteCodeAlias($code){ $this->site->code_alias = $code; } }