session_end(); die($this->__log($err)); } public function __construct($site_id) { // $num = mysql_result(mysql_query("SELECT COUNT(*) FROM grabber.objects WHERE link_id = 0 AND link_s = 0 AND is_agency = 0 AND checked = 0 AND display = 0 AND category = 'kvartiry';"),0); // mysql_query("DELETE FROM grabber.objects WHERE link_id = 0 AND link_s = 0 AND is_agency = 0 AND checked = 0 AND display = 0 AND category = 'kvartiry';"); // $this->__log("\n\n=========\n INCORRECT ITEMS CLEARED: {$num}\n=========\n\n"); $res = mysql_fetch_object(mysql_query("SELECT value FROM oris.oris_conf WHERE name = 'GrabberFailsCount';")); $failsValueFromDb = $res->value; if($failsValueFromDb) $this->fails = $failsValueFromDb; $this->sites = []; $dbc = mysql_query("SELECT * FROM sites ORDER BY id"); while($rt = mysql_fetch_object($dbc)) $this->sites[$rt->id] = $rt; $site_id = (int) $site_id; $this->site = &$this->sites[$site_id]; if(!$this->site) $this->error("site (id = {$site_id}) not found"); if(!$this->site->is_active) $this->error("site «" . $this->site->name . "» (id = " . $this->site->id . ") disabled"); $this->site_id = $this->site->id; $this->cities = conf::$cities; $this->timeout = conf::$timeout; $this->mirrors = conf::$mirrors; $this->days = [ "вчера" => date("Y-m-d",time() - 86400), "сегодня" => date("Y-m-d"), ]; //$this->datelimit = date("Y-m-d", time() - 2 * 86400) . " 00:00:00"; $this->datelimit = date("Y-m-d", time() - 7 * 86400) . " 00:00:00"; $this->month = [ "01" => "января", "02" => "февраля", "03" => "марта", "04" => "апреля", "05" => "мая", "06" => "июня", "07" => "июля", "08" => "августа", "09" => "сентября", "10" => "октября", "11" => "ноября", "12" => "декабря", ]; $this->year = date("Y"); $this->dS = 0.1; $this->deals = [ prodam => 1, sdam => 2, ]; $this->categories = [ kvartiry => 1, # Квартиры komnaty => 2, # Комнаты doma_dachi_kottedzhi => 3, # Дома, дачи, коттеджи zemelnye_uchastki => 4, # Земельные участки kommercheskaya_nedvizhimost => 5, # Коммерческая недвижимость garazhi_i_mashinomesta => 6, # Гаражи и машиноместа nedvizhimost_za_rubezhom => 7, # Недвижимость за рубежом ]; // $this->__proxy = (object) [ip => "", port => ""]; // $this->proxy = "10.0.0.1:8800"; // list($this->__proxy->ip,$this->__proxy->port) = explode(":",$this->proxy); $this->cookies = "{$_ENV[ROOT]}/../logs/{$this->site->code}.cookie"; } public function update_proxy($n, $status = "", $error = "") { if(!$this->__proxy) return false; $status = mysql_real_escape_string($status); $error = mysql_real_escape_string($error); $sql = "UPDATE site_proxy SET date_check = NOW(), last_status = '{$status}', last_error = '{$error}', fails_counter = " . ($n ? "fails_counter + 1" : "0, success_counter = success_counter + 1, last_success = NOW()") . " WHERE site_id = '{$this->site->id}' AND proxy_id = '{$this->__proxy->id}'"; mysql_query($sql); } public function get_proxy($proxy = "", $reset = 0) { // $this->__proxy = (object) [ip => "", port => ""]; // $this->proxy = "10.0.0.1:8800"; // list($this->__proxy->ip,$this->__proxy->port) = explode(":",$this->proxy); // return $this->proxy; if($proxy){ $this->__proxy = mysql_fetch_object(mysql_query("SELECT * FROM proxy WHERE proxy = '" . mysql_real_escape_string($proxy) . "'")) or $this->error("proxy {$proxy} not found!"); } else { if($reset) mysql_query("UPDATE site_proxy SET fails_counter = 0, last_status = '' WHERE site_id = '{$this->site->id}' AND is_active"); $this->__proxy = mysql_fetch_object(mysql_query("SELECT p.* FROM proxy p INNER JOIN site_proxy s ON s.site_id = '{$this->site->id}' AND p.id = s.proxy_id WHERE s.is_active AND (s.last_status != '403' OR s.last_status = '403' AND s.date_check + INTERVAL 15 MINUTE < NOW()) ORDER BY RAND() LIMIT 1")); if(!$this->__proxy) return $reset ? $this->error("no active proxy in proxy-list!") : $this->get_proxy("", 1); } $this->proxy = $this->__proxy->proxy; list($this->__proxy->ip,$this->__proxy->port) = explode(":",$this->proxy); return $this->proxy; } public $proxylist; public function get_proxylist() { if(is_null($this->proxylist)) { $que = "SELECT value FROM oris.oris_conf WHERE name = 'ProxyListAvito';"; $result = mysql_result(mysql_query($que),0); if($result) $this->proxylist = split("\n", $result); else { $result = file_get_contents('https://my.virty.io/proxy_list/proxies.php?hash=19e43d5e1f5af6a6db7d6d9de7b377f3&type=http&format=format1'); $this->proxylist = split("\n", $result); } } return $this->proxylist; } public function get_date($date, $time = "") { if(!$time) $time = "00:00"; $time .= ":00"; $date = mb_strtolower(trim($date),"utf-8"); if($this->days[$date]) $date = $this->days[$date]; else{ preg_match("/^(\d*)(\D*)(\d*)$/i",$date,$date); if(!$date[3]) $date[3] = $this->year; elseif(mb_strlen($date[3],"utf-8") < 4) $date[3] = "20" . $date[3]; if(!isset($this->monthcache[$date[2]])){ $this->monthcache[$date[2]] = preg_replace("/\p{P}+$/","",trim($date[2])); foreach($this->month as $k => $v){ if(preg_match("/^" . preg_quote($this->monthcache[$date[2]],"/") . "/ui",$v)){ $this->monthcache[$date[2]] = $k; break; } } } if(mb_strlen($date[1],"utf-8") < 2) $date[1] = "0" . $date[1]; $date[2] = $this->monthcache[$date[2]]; if($date[2] > date("m") && $date[3] >= $this->year) $date[3] = $this->year - 1; $date = "{$date[3]}-{$date[2]}-{$date[1]}"; } return $date . " " . $time; } private function __get_url($url, $opts, $method) { sleep(4); $log = "get_url (oris-proxy): {$method} : {$url}"; $this->__log($log); $params = [ METHOD => $method, URL => $url, OPTS => serialize($opts), ]; $content = http_build_query($params); $context = stream_context_create([ http => [ "protocol_version" => 1.1, "method" => "POST", "timeout" => 60, "header" => [ "Content-Type: application/x-www-form-urlencoded", "Content-Length: " . strlen($content), "Connection: close", ], "content" => $content, ]]); $data = file_get_contents("https://office.oris-info.ru/proxy/index.php", false, $context); return $data; } public function get_url($url, $proxy = true, $opts = [], $n = 1) { if(!$url || !preg_match("/^https?[:]\/\//",$url)) return $this->error("get_url: incorrect url = {$url}"); if ($this->http_tunnel || !$proxy && $n > 2) { $this->http_tunnel = true; return $this->__get_url($url, $opts, "GET"); } if($n > $this->fails / 3) { if($proxy) $proxy = false; else{ $proxy = true; $n = 1; } } $log = "get_url: url = {$url}"; $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_HEADER, false); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch,CURLOPT_ENCODING , ""); curl_setopt($ch, CURLOPT_TIMEOUT, $this->timeout); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); curl_setopt($ch, CURLOPT_MAXREDIRS, 2); curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); //curl_setopt ($ch, CURLOPT_POST, 1); // curl_setopt($ch, CURLOPT_SSLVERSION, 2); if ($this->cookies) { curl_setopt($ch, CURLOPT_COOKIEFILE, $this->cookies); curl_setopt($ch, CURLOPT_COOKIEJAR, $this->cookies); } $headers = []; $headers = is_array($opts) ? $opts : [$opts]; $headers[] = "Accept-Language: ru-RU,ru;q=0.8,en-US;q=0.5,en;q=0.3"; $headers[] = "Cache-Control: max-age=0"; $headers[] = "Connection: keep-alive"; if(count(array_filter($headers, function($v) { return strpos($v, 'User-Agent') !== false; }))==0){ // $headers[] = "User-Agent: Mozilla/5.0 (Windows NT 5.1; rv:31.0) Gecko/20100101 Firefox/31.0"; $headers[] = "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.72 Safari/537.36"; } if ($proxy) { $this->get_proxy($proxy === true ? "" : $proxy); curl_setopt($ch, CURLOPT_PROXY, $this->proxy); if ($this->__proxy->pass) curl_setopt($ch, CURLOPT_PROXYUSERPWD, $this->__proxy->login . ":" . preg_replace("/\s/","",$this->__proxy->pass)); $log .= ", proxy = {$this->proxy}"; $this->__log('------------------------------------------------------------------------------------------------------------'); $this->__log($this->proxy); $this->__log($this->__proxy->login); $this->__log($this->__proxy->pass); $this->__log($this->__proxy->login . ":" . preg_replace("/\s/","",$this->__proxy->pass)); $this->__log('------------------------------------------------------------------------------------------------------------'); // $headers[] = "X-Forwarded-For: {$this->__proxy->ip}"; } else { $this->__proxy = null; $this->proxy = null; sleep(3); } $this->__log($log); curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); curl_setopt($ch, CURLINFO_HEADER_OUT, true); $this->__log('------------------------------------------------------------------------------------------------------------'); $this->__log(json_encode(curl_getinfo($ch))); $this->__log('------------------------------------------------------------------------------------------------------------'); $this->__log(json_encode(curl_getinfo($ch, CURLINFO_COOKIELIST))); $this->__log('------------------------------------------------------------------------------------------------------------'); $ss = curl_exec($ch); $er = curl_error($ch); $ci = curl_getinfo($ch); $this->__log('------------------------------------------------------------------------------------------------------------'); $this->__log(json_encode(curl_getinfo($ch))); $this->__log('------------------------------------------------------------------------------------------------------------'); curl_close($ch); $f = $er || $ci["http_code"] != 200; if ($f) { $this->__log("{$log} fails (err = {$er}, http_code = {$ci["http_code"]})"); return $proxy ? $this->get_url($url, false, $opts) : ""; } return $ss; } public function post_url($url, $proxy = true, $opts = [], $form = []) { //$ss = $this->__get_url($url, $opts, "POST"); //return $ss ? $ss : $this->error("post_url: oris-proxy-error"); if(!$url || !preg_match("/^https?[:]\/\//",$url)) return $this->error("post_url: incorrect url = {$url}"); $query = http_build_query($form); $log = "post_url: url = {$url}&{$query}"; $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_POST, 1); curl_setopt($ch, CURLOPT_POSTFIELDS, $query); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_TIMEOUT, $this->timeout); curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); if ($this->cookies) { curl_setopt($ch, CURLOPT_COOKIEFILE, $this->cookies); curl_setopt($ch, CURLOPT_COOKIEJAR, $this->cookies); } $headers = []; $headers = is_array($opts) ? $opts : [$opts]; $headers[] = "Accept-Language: ru-RU,ru;q=0.8,en-US;q=0.5,en;q=0.3"; $headers[] = "Cache-Control: max-age=0"; $headers[] = "Connection: keep-alive"; $headers[] = "User-Agent: Mozilla/5.0 (Windows NT 5.1; rv:31.0) Gecko/20100101 Firefox/31.0"; $headers[] = "Content-length: " . strlen($query); curl_setopt($ch, CURLOPT_PROXY, $this->proxy); $log .= ", proxy = {$this->proxy}"; $this->__log($log); curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); sleep(4); $ss = curl_exec($ch); $er = curl_error($ch); $ci = curl_getinfo($ch); curl_close($ch); $f = $er || $ci["http_code"] != 200; if ($f) { $this->__log("{$log} fails (err = {$er}, http_code = {$ci["http_code"]})"); return ""; } return $ss; } public function session_start() { } public function session_end() { @unlink("{$_ENV[ROOT]}/../pids/{$this->site->code}-{$_SERVER[argv][2]}-{$_SERVER[argv][3]}-{$_SERVER[argv][4]}.pid"); } abstract public function getPages($url, $n = 0); abstract public function getItems($url, $n = 0); abstract public function getItem($item, $n = 0); abstract public function create_url($urlsfx, $p = 1); final public function checkItem($item) { if($item->date && $item->date <= $this->datelimit) return 0; $olditem = mysql_fetch_object(mysql_query("SELECT * FROM objects WHERE site_id = '{$this->site->id}' AND id = '{$item->id}'")); if(!$this->check_olditems) return $olditem ? 0 : 1; if( !$olditem ) return 1; if( !$olditem->display || $olditem->is_agency == 1 || $item->date <= $olditem->date || $olditem->category != $this->category || $olditem->deal_type != $this->deal_type // || $olditem->category != "kvartiry" && $olditem->category != "komnaty" // && $olditem->category != "doma_dachi_kottedzhi" // && $olditem->category != "zemelnye_uchastki" ) return 0; $d1 = date_create($item->date); $d2 = date_create($olditem->date); $r = date_diff($d1, $d2); if(!$r->days) return 0; $olditem->date_diff = $r->days; $olditem->price = __floatval(preg_replace('/[^\d.,]+/', '', $olditem->price)); $this->olditems[$item->id] = $olditem; return 1; } public function saveItem($item) { if (isset($this->olditems[$item->id])) return $this->updateItem($item); $item->site_id = $this->site->id; $item->date_add = date("Y-m-d H:i:s"); if(!$item->city) $item->city = $this->city; if(!$item->deal_type) $item->deal_type = $this->deal_type; if(!$item->category) $item->category = $this->category; $name=[ 'АРЕВЕРА-Недвижимость', 'Компания «Этажи»', 'ГРАНТА-недвижимость', 'Lacrum-Недвижимость', 'Агентство недвижимости \"Астория\"', 'Паритет-инвест', 'Исполин Недвижимость', 'Красноярское Инвестиционное Агентство Недвижимости', 'Novostroy Invest эксперт на рынке Недвижимости Красноярска!', 'Century21Smart', 'Агентство недвижимости «Фрегат»', 'ООО \"Алекс\"', 'Центр обмена жилья', 'Агентство недвижимости \"СВОЁ\"', 'AN BROOKLIN', 'ООО ГАРАНТ', 'АНГОР', 'Агентство недвижимости и оценки \"Профессионал\"', 'АН ДОМ', 'Красноярский Центр Недвижимости', 'Центр Загородной недвижимости \"Кедр\"', 'Ремесленникъ', 'Перспектива24-Красноярск-Федеральный оператор недвижимости', 'АН \"СИТИ\"', 'АН \"Stolica-24\"', 'ООО \"ЦЕНТР ОФОРМЛЕНИЯ НЕДВИЖИМОСТИ\"', '\"Делегат Групп\" недвижимость', 'АН \"Экспонента\"', 'АН Территория 24', 'АКТИВ НЕДВИЖИМОСТЬ', 'АН Городские районы', 'АН ИнвестКристал', 'АН Городские районы', 'ЖилДом - Недвижимость !', 'Новый Красноярск', 'АН СССР', 'Агентство Недвижимости Ангерона', 'Ankom-недвижимость', 'АКТИВ НЕДВИЖИМОСТЬ', 'Сибирский дом', 'Бюро Недвижимости \"МеЧтА\"', 'Авангард+', 'АН \"Вариант Центр\"', 'Риэлт Сибирь', 'агентство недвижимости \"Ермак\"', 'Группа компаний РиМиР', 'Дельта Гарант']; for ($i=0; $i<=count($name); $i++) { if($item->seller === $name[$i]) { $item->display = 0; //print_r($name); } } if($item->is_agency === -1){ $phoneNotProtectedAndInOrisAgenciesInLastFourMonths = !preg_match("/(Номер продавца защищён|Номер агентства защищён|Номер защищён)/ismU",$item->description) && mysql_fetch_object(mysql_query("SELECT COUNT(*) as cnt FROM oris.oris_agencies WHERE phone = '{$item->phone}' AND date_change > DATE_ADD(NOW(), INTERVAL -8 MONTH);"))->cnt > 0; if($phoneNotProtectedAndInOrisAgenciesInLastFourMonths){ $item->display = 0; } } $data = (array) $item; unset($data["city"]); unset($data["date_begin"]); unset($data["date_end"]); unset($data["proxy"]); unset($data["session_id"]); unset($data["raw_params"]); foreach ($data as &$_) $_ = mysql_real_escape_string($_); unset($_); # TODO: ON DUPLICATE KEY UPDATE ... $sql = "INSERT INTO objects (`" . implode("`, `", array_keys($data)) . "`) VALUES('" . implode("', '", $data) . "')"; mysql_query($sql) or $this->error(mysql_error()); if($item->display){ $item->object_id = mysql_insert_id(); if($item->phone > ""){ $tmp = explode("\n",preg_replace("/^7/sm","8",$item->phone)); $sql = "INSERT INTO objects_phones (object_id,phone) VALUES "; foreach($tmp as $_){ $_ = mysql_real_escape_string(trim($_)); $sql .= "('{$item->object_id}','{$_}'), "; } $sql = preg_replace("/,\s+$/","",$sql); $sql .= " ON DUPLICATE KEY UPDATE object_id = object_id"; mysql_query($sql) or $this->__log("saveItem(): " . mysql_error()); } $this->get_street($item); $this->get_house_number($item); $this->link_object($item); mysql_query("UPDATE objects SET link_id = '{$item->link_id}', link_s = '{$item->link_s}', street_id = '{$item->street_id}', streets = '{$item->streets}', house_number = '{$item->house_number}' WHERE object_id = '{$item->object_id}'"); mysql_query("UPDATE objects_phones SET link_id = '{$item->link_id}' WHERE object_id = '{$item->object_id}'"); } } public function updateItem($item) { $olditem = $this->olditems[$item->id]; $item->site_id = $this->site->id; $item->s = number_format($item->s, 2, '.', ''); $date_add = date("Y-m-d H:i:s"); if(!$item->city) $item->city = $this->city; if(!$item->deal_type) $item->deal_type = $this->deal_type; if(!$item->category) $item->category = $this->category; unset($item->city); unset($item->date_begin); unset($item->date_end); unset($item->proxy); unset($item->session_id); unset($item->raw_params); $urlContainsCian = strpos($item->url, 'cian.ru') !== false; $urlContainsIrr = strpos($item->url, 'irr.ru') !== false; $excludeUpdate = $urlContainsCian || $urlContainsIrr; if($excludeUpdate){ return; } if($item->is_agency === -1){ $phoneNotProtectedAndInOrisAgenciesInLastFourMonths = !preg_match("/(Номер продавца защищён|Номер агентства защищён|Номер защищён)/ismU",$item->description) && mysql_fetch_object(mysql_query("SELECT COUNT(*) as cnt FROM oris.oris_agencies WHERE phone = '{$item->phone}' AND date_change > DATE_ADD(NOW(), INTERVAL -8 MONTH);"))->cnt > 0; if($phoneNotProtectedAndInOrisAgenciesInLastFourMonths){ $item->display = 0; } else{ $note = mysql_fetch_object(mysql_query("SELECT note as note FROM grabber.objects_notes WHERE (link_id, link_s) IN (SELECT link_id,link_s FROM grabber.objects WHERE object_id = '{$olditem->object_id}');"))->note; if($note && preg_match("/([Аа]гентство)/ismU",$note)){ $item->display = 0; } } } $cmpfields = ['date' => 0, 'price' => 0, 'phone' => 0, 'level' => 0, 'levels' => 0, 'rooms' => 0, 's' => 0, 'address' => 0]; $log = "UPDATED: [{$olditem->object_id}] : "; $sql = "UPDATE objects SET date_add = '{$date_add}'"; foreach((array) $item as $k => $v) { if(isset($cmpfields[$k])) { if($k == 'phone') { $t1 = explode("\n",preg_replace("/^7/sm","8", $item->phone)); $t2 = explode("\n",preg_replace("/^7/sm","8", $olditem->phone)); $d = array_diff($t1, $t2); $cmpfields[$k] = count($d); } else if($item->$k != $olditem->$k) { $log .= "{$k}: {$olditem->$k} --> {$item->$k}; "; $cmpfields[$k] = 1; } } else if($item->$k != $olditem->$k) { $log .= "{$k}; "; } $sql .= ", `{$k}` = '" . mysql_real_escape_string($v) . "'"; } $sql .= " WHERE object_id = '{$olditem->object_id}'"; mysql_query($sql) or $this->error(mysql_error()); $this->__log($log); if($item->display) { $item->date_add = $date_add; $item->object_id = $olditem->object_id; if($item->phone > ""){ $tmp = explode("\n",preg_replace("/^7/sm","8",$item->phone)); mysql_query("DELETE FROM objects_phones WHERE object_id = '{$item->object_id}'"); $sql = "INSERT INTO objects_phones (object_id,phone) VALUES "; foreach($tmp as $_){ $_ = mysql_real_escape_string(trim($_)); $sql .= "('{$item->object_id}','{$_}'), "; } $sql = preg_replace("/,\s+$/","",$sql); $sql .= " ON DUPLICATE KEY UPDATE object_id = object_id"; mysql_query($sql) or $this->__log("updateItem(): " . mysql_error()); } $this->get_street($item); $this->get_house_number($item); $this->link_object($item, $cmpfields); mysql_query("UPDATE objects SET link_id = '{$item->link_id}', link_s = '{$item->link_s}', street_id = '{$item->street_id}', streets = '{$item->streets}', house_number = '{$item->house_number}' WHERE object_id = '{$item->object_id}'"); mysql_query("UPDATE objects_phones SET link_id = '{$item->link_id}' WHERE object_id = '{$item->object_id}'"); mysql_query("UPDATE objects_links SET checked = '{$item->checked}' WHERE (link_id, link_s) = ('{$item->link_id}', '{$item->link_s}')") or die(mysql_error()); mysql_query("UPDATE objects SET checked = '{$item->checked}' WHERE (link_id, link_s) = ('{$item->link_id}', '{$item->link_s}')") or die(mysql_error()); mysql_query("INSERT INTO objects_notes (link_id, link_s, note) VALUES('{$item->link_id}','{$item->link_s}','" . mysql_real_escape_string($log) . "') ON DUPLICATE KEY UPDATE note = TRIM(CONCAT(note, '" . mysql_real_escape_string("\n" . $log) . "'))"); } } public function checkPhone($phones) { $arr = []; if (!is_array($phones)) $phones = [$phones]; foreach ($phones as $_) { $_ = trim($_); if ($_) $arr[] = $_; } if (!count($arr)) return 0; $n = mb_strlen(conf::$city->code, "utf-8") - 1; $code = mb_substr(conf::$city->code, 1, $n, "utf-8"); foreach ($arr as $phone){ /* $rt = mysql_fetch_object(mysql_query("SELECT * FROM phone_cache WHERE phone = '" . mysql_real_escape_string($phone) . "'")); if ($rt && $rt->is_agency) return 0; else if (!$rt) { $is_agency = 0; $url = "http://rent-scaner.ru/check-phone"; $data = $then->get_url($url); if ($data) { if (preg_match('@*\bname="csrf-token"])(?=[^>*\bcontent="(.*)"])[^>]*>@isU', $data, $tmp)) { $data = $this->post_url($url, false, [], ["_csrf" => $tmp[1], "CheckPhone[phone]" => $phone]); if ($data && preg_match()) } } mysql_query("INSERT INTO phone_cache (phone, is_agency) VALUES('" . mysql_real_escape_string($phone) . "', '{$is_agency}')"); if ($is_agency) return 0; } */ $phone = preg_replace("/^\+7/","7",$phone); $tmp = mysql_real_escape_string(preg_replace("/^[78]{$code}/","",$phone)); $phone = mysql_real_escape_string(preg_replace("/^\d/","_",$phone)); $n = mysql_result(mysql_query("SELECT COUNT(*) FROM oris.oris_agencies WHERE phone > '' AND !deleted AND (phone = '{$tmp}' OR phone LIKE '{$phone}')"),0); if($n) return 0; } return 1; } public function grab($city, $deal_type, $category) { if( !isset($this->cities[$city]) || !isset($this->deals[$deal_type]) || !isset($this->categories[$category]) || !isset($this->sections[$deal_type][$category]) ) { return false; } $this->city = $city; $this->deal_type = $deal_type; $this->category = $category; $this->session_start(); $this->items = []; $sections = is_array($this->sections[$deal_type][$category]) ? $this->sections[$deal_type][$category] : [$this->sections[$deal_type][$category]]; foreach($sections as $urlsfx){ $url = $this->create_url($urlsfx); $pages = $this->getPages($url); $limit = $this->limit; for ($p = $this->startpage; $p <= $pages; $p++){ $url = $this->create_url($urlsfx,$p); $items = $this->getItems($url); foreach($items as $item){ if(!$this->checkItem($item)) $limit--; else{ $limit = $this->limit; $this->items[$item->id] = $item; } if(!$limit) break 2; } } } $n = count($this->items); $this->__log("\n\n=========\n ITEMS FOUND: {$n}\n=========\n\n"); $this->items = array_reverse($this->items); foreach($this->items as $item){ $this->getItem($item) ? null : $this->saveUrl($item); } $this->session_end(); return true; } public function cron() { foreach($this->cities as $city => $city_id){ foreach($this->sections as $deal_type => $params){ foreach($params as $category => $v){ $logfile = "{$_ENV[ROOT]}/../logs/{$this->site->code}-{$city}-{$deal_type}-{$category}.log"; /* $pidfile = "{$_ENV[ROOT]}/../pids/{$this->site->code}-{$city}-{$deal_type}-{$category}.pid"; if (is_file($pidfile)) { $pid = file_get_contents($pidfile); @shell_exec("kill -9 {$pid} > /dev/null 2>&1"); } $cmd = "php {$_ENV[ROOT]}/grabber.php {$this->site->code} {$city} {$deal_type} {$category} >> {$logfile} 2>&1 & echo $!"; // echo "[",date("Y-M-d H:i:s"),"] ", $cmd ,"\n"; $pid = shell_exec($cmd); file_put_contents($pidfile,$pid); */ $cmd = "php {$_ENV[ROOT]}/grabber.php {$this->site->code} {$city} {$deal_type} {$category} >> {$logfile} 2>&1 &"; $cmd = "flock -n /tmp/{$this->site->code}-{$deal_type}-{$category}.flock -c \"{$cmd}\""; shell_exec($cmd); } } } } protected function __cron() { $logfile = "{$_ENV[ROOT]}/../logs/{$this->site->code}.log"; $cmd = "php {$_ENV[ROOT]}/grabber.php {$this->site->code} >> {$logfile} &"; $cmd = "flock -n /tmp/{$this->site->code}.flock -c \"{$cmd}\""; shell_exec($cmd); } public function get_street(&$item) { $item->street_id = 0; $item->streets = ""; $res = [street_id => &$item->street_id, streets => &$item->streets]; if(!$item->category) return $res; $re = "/\b" . preg_quote(conf::$city->name,"/") . "\b/ui"; $address = trim(preg_replace($re,"",$item->address)); if(mb_strlen($address,"utf-8") < 4) $address = preg_replace($re,"",$item->title); $address = preg_replace("/c/i","с",$address); $address = preg_replace("/ё/i","е",$address); $address = preg_replace("/\bлет.*\b/Uui","лет",$address); $address = preg_replace("/\bпереулок\b/Uui","пер",$address); $address = preg_replace("/\bпроспект\b/Uui","пр",$address); $address = preg_replace("/\bтракт\b/Uui","тр",$address); $address = preg_replace("/\b.*\bр(?:айо|\-о?)н\b/Uui","",$address); $address = preg_replace("/(\d+(?:\-я)?)/ui"," $1 ",$address); $address = mysql_real_escape_string($address); $__address = $item->address . PHP_EOL . $item->title; foreach (conf::$stopwords as $a => $id) { if(preg_match("/\b{$a}/isu", $__address)) { $item->street_id = $id; $item->streets = $id; return $res; } } $item->street_id = 0; $item->streets = ""; $re = "/\b(ново|старо|дальне|нижне|верхне|северо|юго|южно|западно|восточно|средне)(.)/ui"; $r = 0; if(preg_match($re,$address,$tmp)){ $r = 1; if(preg_match("/[^а-я]/ui",$tmp[2])){ $r = 2; $address = preg_replace("/\b{$tmp[1]}[^а-я]+/ui",$tmp[1],$address); } } $sql = "SELECT id, MATCH(name, prefix) AGAINST ('%s') AS rank FROM oris_streets WHERE !deleted HAVING rank > 0 ORDER BY rank DESC LIMIT 8"; $db = mysql_query(sprintf($sql,mysql_real_escape_string($address))); if(!mysql_num_rows($db) && $r == 2){ $address = preg_replace($re,"$1-$2",$address); $db = mysql_query(sprintf($sql,mysql_real_escape_string($address))); } $re = "/\b(?:прода.*|аренд.*|сда|сниму|квартир.*|комнат.*|адрес.*|цена|окн.*|лоджи.*|балкон|совм.*|разд.*|ремонт|договор|торг)\b/Uui"; if(!mysql_num_rows($db) && $item->description){ $tmp = preg_replace($re,"",$item->description); $db = mysql_query(sprintf($sql,mysql_real_escape_string($tmp))); } if(!mysql_num_rows($db)){ $address = preg_replace($re,"",$address); $tmp = preg_split("/[^а-я]+/ui",$address); $address = ""; foreach($tmp as $_) { $n = mb_strlen($_,"utf-8"); if($n >= 4) $address .= mb_substr($_,0,$n - ($n < 6 ? 1 : 2),"utf-8") . "* "; } $db = mysql_query("SELECT id, MATCH(name, prefix) AGAINST ('>" . mysql_real_escape_string($address) . "' IN BOOLEAN MODE) AS rank FROM oris_streets WHERE !deleted HAVING rank > 0 ORDER BY rank DESC LIMIT 8"); } $r = 0; while($_ = mysql_fetch_object($db)){ if(!$item->street_id){ $item->street_id = $_->id; $r = $_->rank; } // if($r != $_->rank) break; $item->streets .= $_->id . ","; } $item->streets = preg_replace("/,$/","",$item->streets); return $res; } public function get_house_number(&$item) { $item->house_number = ""; $item->address = trim($item->address); if(preg_match("/\D([1-9]\d*\D{0,6})$/iu",$item->address)){ preg_match_all("/\D(\d+)/iu",$item->address,$tmp,PREG_OFFSET_CAPTURE); $tmp = $tmp[1]; $e2 = array_pop($tmp); while($e1 = array_pop($tmp)){ if($e2[1] - $e1[1] > 6) break; $e2 = $e1; } $item->house_number = substr($item->address, $e2[1]); $item->house_number = preg_replace("/\\\/","/",$item->house_number); } $item->house_number = mysql_real_escape_string($item->house_number); } public function get_link_s($s) { return $s ? ceil(log($s/(1 - $this->dS)) / log((1 + $this->dS)/(1 - $this->dS))) : 0; } public static function colorize_phones($item) { $rx = "/^" . conf::$city->code . "/"; $tables = [ prodam => [ kvartiry => "oris_flat_sale", komnaty => "oris_flat_sale", doma_dachi_kottedzhi => "oris_house_sale", zemelnye_uchastki => "oris_ground_sale", kommercheskaya_nedvizhimost => "oris_nonres_sale", ], sdam => [ kvartiry => "oris_flat_rent", komnaty => "oris_flat_rent", doma_dachi_kottedzhi => "oris_house_rent", zemelnye_uchastki => "oris_ground_rent", kommercheskaya_nedvizhimost => "oris_nonres_rent", ] ]; $fields = ["phone_home", "phone_work", "phone_cell"]; $table = $tables[$item->deal_type][$item->category]; if (!$table) return; $phones = []; foreach(explode("\n", preg_replace("/^7/sm", "8", trim($item->phone))) as $_) { $_ = trim($_); if ($_ > "" && !isset($phones[$_])) { $phones[$_] = [mysql_real_escape_string($_)]; if (preg_match($rx, $_)) $phones[$_][] = mysql_real_escape_string(preg_replace($rx, "", $_)); } } $s = ""; foreach ($phones as &$_) { $s .= "'" . implode("','", $_) . "', "; $_ = ""; } unset($_); $s = preg_replace("/,\s+$/", "", $s); $sql = "SELECT t1.object_id, t1.phone_home, t1.phone_work, t1.phone_cell, t1.deleted FROM oris.oris_objects t1 INNER JOIN oris.{$table} t2 ON t1.object_id = t2.object_id WHERE t1.phone_home IN ({$s}) OR t1.phone_work IN ({$s}) OR t1.phone_cell IN ({$s})"; $dbc = mysql_query($sql); while ($_ = mysql_fetch_object($dbc)) { foreach($fields as $f) { $p = $_->$f; if (!$p) continue; if (mb_strlen($p, "utf-8") < 11) $p = conf::$city->code . $p; if (isset($phones[$p])) $phones[$p] = $phones[$p] === "" ? $_->deleted : $phones[$p] && $_->deleted; } } foreach ($phones as $phone => $status) { // print "$item->object_id; $phone -> '$status'\n"; mysql_query("UPDATE objects_phones SET status = '{$status}' WHERE object_id = {$item->object_id} AND phone = '{$phone}'"); } } # TODO: rename "link" to "group", more correct public function link_object(&$item, $update = false) { $item->link_s = $this->get_link_s($item->s); $item->link_id = $item->object_id; $UpdateActualDateQuery = "UPDATE oris.GrabberObjectLinks SET ActualDate = '".$item->date."' WHERE url = '".$item->url."';"; mysql_query($UpdateActualDateQuery) or $this->__log(mysql_error()); $res = [link_id => &$item->link_id, link_s => &$item->link_s]; // if (!$item->category || $item->category != "kvartiry" && $item->category != "komnaty") { // self::colorize_phones($item); // return $res; // } if($item->street_id && $item->phone != "Нет телефона"){ # TODO: review this # use link_s in search, make link_id as single key whitout link_s $filter = ""; if($item->category == 'doma_dachi_kottedzhi'){ $filter = " AND o.s IS NOT NULL AND o.s > 0 AND ABS(o.s - {$item->s}) < 1 "; }else if($item->category == 'zemelnye_uchastki'){ if($item->s && $item->s > 0){ $filter = " AND o.s IS NOT NULL AND o.s > 0 AND ABS(o.s - {$item->s}) < 1 "; }else if($item->land && $item->land > 0){ $filter = " AND o.land IS NOT NULL AND o.land > 0 AND ABS(o.land - {$item->land}) < 1 "; } else{ $filter = " AND 1!=1 "; } } $query = " SELECT o.link_id FROM objects o INNER JOIN objects_phones p ON o.object_id = p.object_id INNER JOIN objects_phones t ON p.phone = t.phone AND p.object_id != t.object_id WHERE o.display AND o.link_id AND t.object_id = '{$item->object_id}' AND o.category = '{$item->category}' AND o.deal_type = '{$item->deal_type}' AND o.street_id = '{$item->street_id}' AND o.level = '{$item->level}' AND o.rooms = '{$item->rooms}' {$filter} LIMIT 1; "; $tmp = mysql_fetch_assoc(mysql_query($query)); if($tmp["link_id"]) $item->link_id = $tmp["link_id"]; } $link = new stdClass(); foreach(["date","date_add","link_id","link_s","street_id","level","rooms","deal_type","category","is_agency"] as $f) $link->$f = $item->$f; $link->url = ""; $link->checked = 0; $link->number = "0"; $link->phone = []; $link->price = []; $link->images = []; $link->sites = []; $link->is_delayed = 0; $tmp = [ images => 0, url => 0, ]; $code = conf::$city->code; $re = "/^" . $code . "/"; $streets = explode(",", $item->streets); $streets = array_combine($streets, $streets); # TODO: review algorythm # select $link from `objects_links` first, then compare with $item (see previous todo) # so next loop can be removed # (???) races with other process -> rebuild `object_links` with another unique proccess/cron, NOT HERE!! -> so grabbers could work faster!! # store combined streets of linked objects in `objects_links` $dbc = mysql_query("SELECT * FROM objects WHERE object_id = '{$item->object_id}' OR link_id = '{$item->link_id}' AND link_s = '{$item->link_s}' ORDER BY date DESC, date_add DESC"); $link->count = mysql_num_rows($dbc); $checked = 0; while ($_ = mysql_fetch_object($dbc)) { foreach (["title","raion","address","distance","params","seller","is_agency"] as $f) if (mb_strlen($link->$f,"utf-8") < mb_strlen($_->$f,"utf-8")) $link->$f = $_->$f; if($_->date > $link->date) $link->date = $_->date; $link->sites[$_->site_id] = $this->sites[$_->site_id]->name; $_->phone = explode("\n",preg_replace("/^7/sm", "8", trim($_->phone))); foreach($_->phone as $f) { $f = trim($f); if($f > "" && !isset($link->phone[$f])){ $link->phone[$f] = [mysql_real_escape_string($f)]; if(preg_match($re, $f)) $link->phone[$f][] = mysql_real_escape_string(preg_replace($re, "", $f)); } } if(!count($link->images) && trim($_->images) > ""){ $_->images = preg_split("/(?:\r?\n){1,}/", trim($_->images)); $tmp["images"] = $_->site_id; $link->images = $_->images; } if(!$link->url && trim($_->url) > ""){ $tmp["url"] = $_->site_id; $link->url = trim($_->url); } if(!$link->description && trim($_->description) > "") { $link->description = trim($_->description); } if(!count($link->price)) $link->price = [$_->s => [$_->price]]; if($_->checked && !$checked) $checked = $_->checked; } # images $host = $this->sites[$tmp["images"]]->host; $host = "http" . (preg_match("/[:]443$/", $host) ? "s" : "") . "://" . $host; foreach($link->images as &$_){ if(!preg_match("/^https?\:\/\//is", $_)) $_ = $host . $_; } unset($_); $link->images = implode("\n", $link->images); # url if(!preg_match("/^https?\:\/\//is",$link->url)){ $host = $this->sites[$tmp["url"]]->host; $host = "http" . (preg_match("/[:]443$/",$host) ? "s" : "") . "://" . $host; $link->url = $host . $link->url; } $link->sites = implode(", ",$link->sites); # prices /*foreach($link->price as &$_) { $_ = array_values($_); sort($_, SORT_STRING); } unset($_); ksort($link->price, SORT_NUMERIC);*/ # phones $tmp = ""; foreach($link->phone as &$_){ $tmp .= "'" . implode("','",$_) . "', "; $_ = ""; } unset($_); $tmp = preg_replace("/,\s+$/","",$tmp); $allItemPhonesSqlInConcated = $tmp; $dbc = mysql_query(" SELECT oo.date_check, oo.object_id, oo.square, oo.rooms_count, oo.level, oo.price, oo.phone_home, oo.phone_work, oo.phone_cell, oo.deleted, COALESCE((SELECT ObjectId FROM oris.GrabberObjectLinks WHERE Url = '{$link->url}' LIMIT 1),'') = oo.object_id as haveSavedUrl, oo.street_id, oo.number, IF( ofs.object_id IS NULL AND ohs.object_id IS NULL AND ogs.object_id IS NULL AND ons.object_id IS NULL, IF(ofr.object_id IS NULL AND ohr.object_id IS NULL AND ogr.object_id IS NULL AND onr.object_id IS NULL, NULL, 'sdam'), 'prodam') AS deal_type, CASE WHEN ofs.object_id IS NOT NULL OR ofr.object_id IS NOT NULL AND LOWER((SELECT name FROM oris.oris_series_type s WHERE s.id = (SELECT series_id FROM oris.oris_flat WHERE object_id = oo.object_id))) NOT LIKE '%комната%' THEN 'kvartiry' WHEN ofs.object_id IS NOT NULL OR ofr.object_id IS NOT NULL AND LOWER((SELECT name FROM oris.oris_series_type s WHERE s.id = (SELECT series_id FROM oris.oris_flat WHERE object_id = oo.object_id))) LIKE '%комната%' THEN 'komnaty' WHEN ogs.object_id IS NOT NULL OR ogr.object_id IS NOT NULL THEN 'zemelnye_uchastki' WHEN ohs.object_id IS NOT NULL OR ohr.object_id IS NOT NULL THEN 'doma_dachi_kottedzhi' WHEN ons.object_id IS NOT NULL OR onr.object_id IS NOT NULL THEN 'kommercheskaya_nedvizhimost' END as category FROM oris.oris_objects oo LEFT JOIN oris.oris_flat_sale ofs ON oo.object_id = ofs.object_id LEFT JOIN oris.oris_flat_rent ofr ON oo.object_id = ofr.object_id LEFT JOIN oris.oris_house_sale ohs ON oo.object_id = ohs.object_id LEFT JOIN oris.oris_house_rent ohr ON oo.object_id = ohr.object_id LEFT JOIN oris.oris_ground_sale ogs ON oo.object_id = ogs.object_id LEFT JOIN oris.oris_ground_rent ogr ON oo.object_id = ogr.object_id LEFT JOIN oris.oris_nonres_sale ons ON oo.object_id = ons.object_id LEFT JOIN oris.oris_nonres_rent onr ON oo.object_id = onr.object_id WHERE oo.phone_home IN ({$tmp}) OR oo.phone_work IN ({$tmp}) OR oo.phone_cell IN ({$tmp}) OR oo.object_id = (SELECT ObjectId FROM oris.GrabberObjectLinks WHERE Url = '{$link->url}' LIMIT 1) ORDER BY oo.date_check DESC "); $tmp = ["phone_home", "phone_work", "phone_cell"]; $__cache = []; $__deal_type = []; $__category = []; unset($link->same, $link->archive, $__price); while($_ = mysql_fetch_object($dbc)){ $__deal_type[$_->deal_type] = 1; $__category[$_->category] = 1; if ((isset($streets[$_->street_id]) && $link->deal_type == $_->deal_type && $link->category == $_->category)) { $_->link_s = $this->get_link_s($_->square); if (!isset($link->same) || $link->same == 255) { $link->same = $_->deleted ? 255 : 1; } if (((is_null($link->level) || $link->level == $_->level) && (is_null($link->rooms) || $link->rooms == $_->rooms_count )&& ($link->link_s == $_->link_s || abs($item->s - $_->square) <= 1)) ) { $link->street_id = $_->street_id; $link->archive = isset($link->archive) ? $link->archive && $_->deleted : $_->deleted; if ($link->number === "0") $link->number = $_->number; if (!$_->deleted && !isset($__price)) $__price = $_->price; foreach($tmp as $f) { $f = $_->$f; if (!$f) continue; if (mb_strlen($f, "utf-8") < 11) $f = $code . $f; if (isset($link->phone[$f])) { $link->phone[$f] = $link->phone[$f] === "" ? $_->deleted : $link->phone[$f] && $_->deleted; $__cache[$f] = 1; } } continue; } } foreach($tmp as $f) { $f = $_->$f; if(!$f) continue; if(mb_strlen($f, "utf-8") < 11) $f = $code . $f; if(!isset($__cache[$f]) && isset($link->phone[$f])) $link->phone[$f] = $link->phone[$f] === "" ? $_->deleted : $link->phone[$f] && $_->deleted; } } $dbc = mysql_query("SELECT 1 as result FROM oris.GrabberObjectLinks WHERE Url LIKE '%{$item->url}%' AND ObjectId IN (SELECT object_id FROM oris.oris_objects WHERE !deleted) LIMIT 1;"); $haveSavedUrl = mysql_fetch_object($dbc); $haveSavedUrl = $haveSavedUrl->result; if($haveSavedUrl && strpos($item->url, 'avito') !== false) { $isPhoneAvitoBlocked = strpos($item->description, 'Номер продавца защищён Avito') !== false; $que = mysql_query(" SELECT SUM(b) as result FROM ( SELECT (SELECT COALESCE(phone_home,'') as phone FROM oris.oris_objects WHERE object_id = (SELECT ObjectId FROM oris.GrabberObjectLinks WHERE Url = '{$item->url}')) IN ({$allItemPhonesSqlInConcated}) as b UNION SELECT (SELECT COALESCE(phone_work,'') as phone FROM oris.oris_objects WHERE object_id = (SELECT ObjectId FROM oris.GrabberObjectLinks WHERE Url = '{$item->url}')) IN ({$allItemPhonesSqlInConcated}) as b UNION SELECT (SELECT COALESCE(phone_cell,'') as phone FROM oris.oris_objects WHERE object_id = (SELECT ObjectId FROM oris.GrabberObjectLinks WHERE Url = '{$item->url}')) IN ({$allItemPhonesSqlInConcated}) as b ) as l ;"); $isAnyPhoneInObject = mysql_fetch_object($que); $isAnyPhoneInObject = $isAnyPhoneInObject->result; $haveSavedUrl = $haveSavedUrl && ($isPhoneAvitoBlocked || (!$isPhoneAvitoBlocked && $isAnyPhoneInObject)); } # проверяем отсутствие телефона в категории + признак архива $__f = !isset($__deal_type[$link->deal_type]) || !isset($__category[$link->category]) || $link->archive; # проверяем наличие новых и архивных телефонов if (!$__f) foreach ($link->phone as $_) { if ($_ === "" || $_ === "1" || $_ === true) { $__f = 1; break; } } $update_backup = $update; # объявление - неархивный "полный дубль" без новых телефонов c ссылкой // if (!$__f && !$link->archive && $link->number !== "0" && isset($__price)) { if (!$__f && !$link->archive && ((($link->category == "komnaty" || $link->category == "kvartiry") && $link->number !== "0") || ($link->category !== "kvartiry" && $link->category !== "komnaty"))) { # сравниваем цены // $price = __floatval(current(current($link->price))); // $t = []; // $t[$price] = 1; // $t[$price / 1000] = 1; // $t[$price * 1000] = 1; // if(isset($t[$__price])) // $link->checked = 1; // else if($haveSavedUrl) $link->checked = 2; } else if (!$__f || ($haveSavedUrl)) { # повторное получение объявления сайта if ($update && isset($this->olditems[$item->id])) { $olditem = $this->olditems[$item->id]; $diff = ['sdam' => 30, 'prodam' => 90]; // if ($checked && ($olditem->date_diff >= $diff[$item->deal_type] || $update['price'])) { if ($checked && $olditem->date_diff && ($haveSavedUrl)) { $checked = 2; } unset($update['date'], $update['price'], $update['levels'], $update['s']); foreach($update as $_) { if($_) { $checked = 0; break; } } } $link->checked = $checked; } $que = mysql_query("SELECT checked FROM grabber.objects_links WHERE (link_id, link_s) = ('{$link->link_id}', '{$link->link_s}');"); $currentCheckedStatus = mysql_fetch_object($que); $currentCheckedStatus = $currentCheckedStatus->checked; $urlContainsCian = strpos($item->url, 'cian.ru') !== false; $urlContainsIrr = strpos($item->url, 'irr.ru') !== false; $urlContainsDomofond = strpos($item->url, 'domofond.ru') !== false; $excludeAutoUpdate = $urlContainsCian || $urlContainsIrr || $urlContainsDomofond; if(!$excludeAutoUpdate && $link->checked == 2 && $currentCheckedStatus != 2) { $doesOnlyDateOrPriceChanged = $update_backup['date'] || $update_backup['price']; unset($update_backup['date'], $update_backup['price']); foreach($update_backup as $_) { if($_) { $doesOnlyDateOrPriceChanged = 0; break; } } if($doesOnlyDateOrPriceChanged) { $que = mysql_query("SELECT ObjectId FROM oris.GrabberObjectLinks WHERE Url = '{$item->url}';"); $res = mysql_fetch_object($que); $objectId = $res->ObjectId; $que = mysql_query("SELECT date_check,IF(object_id IN ( SELECT object_id FROM oris.oris_flat_sale UNION ALL SELECT object_id FROM oris.oris_house_sale UNION ALL SELECT object_id FROM oris.oris_ground_sale UNION ALL SELECT object_id FROM oris.oris_nonres_sale ),price*1000,price) as price, is_on_handsafe,private_note FROM oris.oris_objects WHERE object_id = '{$objectId}';"); $res = mysql_fetch_object($que); $oldDateCheck = $res->date_check; $oldPrice = $res->price; $oldIsOnHandsafe = $res->is_on_handsafe; $oldPrivateNote = $res->private_note; $que = mysql_query("SELECT id FROM oris.oris_users_v3 WHERE login = 'Robot';"); $res = mysql_fetch_object($que); $robotUserId = $res->id; $que = mysql_query("SELECT NOW() as date2;"); $res = mysql_fetch_object($que); $fixedNow = $res->date2; if($objectId != null && $oldDateCheck != null && $oldPrice != null && $robotUserId != null && $fixedNow != null) { $currentPrice = __floatval(current(current($link->price))); $handsafeRegexReplace = "/((,)*( )*((н|Н)а задатке до)+( | )?([0-9]?[0-9]?(\ |\ )??((я|Я)нвар|(ф|Ф)еврал|(м|М)арт|(а|А)прел|(м|М)а|(и|И)юн|(и|И)юл|(а|А)август|(с|С)ентябр|(о|О)ктябр|(н|Н)оябр|(д|Д)екабр)(ь|я|а|й)?(\ |\ )?[0-9]?[0-9]?[0-9]?[0-9]?)?[\ \,]*\s*)|((,)*( )*((с|С)няли с продажи до)+( | )?([0-9]?[0-9]?(\ |\ )??((я|Я)нвар|(ф|Ф)еврал|(м|М)арт|(а|А)прел|(м|М)а|(и|И)юн|(и|И)юл|(а|А)август|(с|С)ентябр|(о|О)ктябр|(н|Н)оябр|(д|Д)екабр)(ь|я|а|й)?(\ |\ )?[0-9]?[0-9]?[0-9]?[0-9]?)?[\ \,]*\s*)|((,)*( )*((с|С)няли с продажи на неопределенный срок)+[\ \,]*\s*)|((,)*( )*((е|Е)сть потенциальный покупатель)+[\ \,]*\s*)/smi"; $changesToOrisObject = " date_check = '{$fixedNow}' "; $abortChanges = false; if($currentPrice != $oldPrice) { if(($link->deal_type == 'prodam' && abs($currentPrice - $oldPrice) <= 100000) || ($link->deal_type == 'sdam' && abs($currentPrice - $oldPrice) <= 1000)) { $priceToApply = $link->deal_type == 'prodam' ? $currentPrice / 1000 : $currentPrice; $oldPrice = $link->deal_type == 'prodam' ? $oldPrice / 1000 : $oldPrice; $changesToOrisObject = $changesToOrisObject . " , price = {$priceToApply} "; } else $abortChanges = true; } if($oldIsOnHandsafe == 1) { $changesToOrisObject = $changesToOrisObject . " , is_on_handsafe = 0 "; $newMemo = preg_replace($handsafeRegexReplace,'',$oldPrivateNote); if($newMemo !== $oldPrivateNote) { $changesToOrisObject = $changesToOrisObject . " , private_note = \"{$newMemo}\" "; } } if(!$abortChanges) { $this->__log($objectId); $this->__log($changesToOrisObject); $res = mysql_query("INSERT INTO `oris`.`oris_changes` (`object_id`, `date`, `table_name`, `user_id`, `flag`) VALUES ('{$objectId}', '{$fixedNow}', 'oris_objects', {$robotUserId}, 1);"); // $id = mysql_insert_id(); $res = mysql_query("UPDATE oris.oris_objects SET {$changesToOrisObject} WHERE object_id = '{$objectId}';"); $link->checked = 1; $res = mysql_query("UPDATE `oris`.`GrabberObjectLinks` SET `ActualDate` = NOW(), `DeleteDate` = NULL, `ExpiredDate` = NULL, `InspectorLastCheckDate` = NOW() WHERE `Url` = '{$item->url}';"); $que = mysql_query("SELECT Id FROM oris.GrabberObjectLinks WHERE url = '{$item->url}'"); $result = mysql_fetch_object($que); $urlId = $result->Id; $res = mysql_query("DELETE FROM oris.oris_incorrect_objects_request WHERE object_id = '{$objectId}';"); if($res && mysql_affected_rows() > 0) { $que = mysql_query("SELECT memo FROM oris.oris_objects WHERE object_id = '{$objectId}';"); $result = mysql_fetch_object($que); $memo = $result->memo; if(empty(memo) || is_null(memo)) $newMemo = 'Удалено из некорректного'; else $newMemo = $memo." Удалено из некорректного"; $changesToOrisObject = " memo = \"{$newMemo}\" "; $this->__log($objectId); $this->__log($changesToOrisObject); $res = mysql_query("INSERT INTO `oris`.`oris_changes` (`object_id`, `date`, `table_name`, `user_id`, `flag`) VALUES ('{$objectId}', '{$fixedNow}', 'oris_objects', {$robotUserId}, 1);"); $res = mysql_query("UPDATE oris.oris_objects SET {$changesToOrisObject} WHERE object_id = '{$objectId}';"); } $res = mysql_query("DELETE FROM oris.GrabberObjectLinkBadRequests WHERE GrabberObjectLinkId = {$urlId};"); } } } } if(!$update && $link->checked) { mysql_query("UPDATE objects SET checked = '{$link->checked}' WHERE object_id = '{$item->object_id}' OR (link_id, link_s) = ('{$item->link_id}', '{$item->link_s}')") or die(mysql_error()); } $item->checked = $link->checked; $link->phone = serialize($link->phone); $link->price = serialize($link->price); $data = (array) $link; unset($data["city"]); // unset($data["category"]); foreach ($data as &$_) $_ = mysql_real_escape_string($_); unset($_); $sql = "REPLACE INTO objects_links (`" . implode("`, `", array_keys($data)) . "`) VALUES('" . implode("', '", $data) . "')"; mysql_query($sql) or $this->error(mysql_error()); return $res; } public function link_unlinked(){ $this->__log('Started link_unlinked'); $dbc = mysql_query(" SELECT COUNT(*) as cnt FROM grabber.objects WHERE (link_id,link_s) NOT IN (SELECT link_id,link_s FROM grabber.objects_links) AND !(display = 0 AND checked = 0); "); $cnt = mysql_fetch_object($dbc); $this->__log(''); $this->__log('-------------------------------'); $this->__log("Founded unlinked items - {$cnt->cnt}"); $this->__log('-------------------------------'); $dbc = mysql_query(" SELECT * FROM grabber.objects WHERE (link_id,link_s) NOT IN (SELECT link_id,link_s FROM grabber.objects_links) AND !(display = 0 AND checked = 0); "); while($item = mysql_fetch_object($dbc)){ $this->__log("({$item->link_id},{$item->link_s}) - {$item->url}"); $this->link_object($item); mysql_query("UPDATE objects SET link_id = '{$item->link_id}', link_s = '{$item->link_s}', street_id = '{$item->street_id}', streets = '{$item->streets}', house_number = '{$item->house_number}' WHERE object_id = '{$item->object_id}'"); mysql_query("UPDATE objects_phones SET link_id = '{$item->link_id}' WHERE object_id = '{$item->object_id}'"); } } public function saveUrl($item) { $item->site_id = $this->site->id; $item->url = mysql_real_escape_string($item->url); if(!$item->city) $item->city = $this->city; if(!$item->deal_type) $item->deal_type = $this->deal_type; if(!$item->category) $item->category = $this->category; mysql_query("INSERT INTO objects_errors (site_id,id,url,date_add,date_check,fails,city,deal_type,category) VALUES('{$item->site_id}','{$item->id}','{$item->url}',NOW(),NOW(),0,'{$item->city}','{$item->deal_type}','{$item->category}') ON DUPLICATE KEY UPDATE fails = fails + 1, date_check = NOW()") or $this->__log("saveUrl(): " . mysql_error()); } public static function parse_level($s) { $result = []; $mathes = []; $count = 0; $patterns = [ # 2-х этажный '/\b(\d+)\D{0,4}\bэт(?:\b|аж)/ismu', # кол-во этажей 5 '/\bэтаж(?:ей?|ность)?\b\D{0,4}(\d+)\b/ismu', # Этаж 4 из 16, Этаж 4/17 '/\bэтаж\b\D{1,4}\b\d+\s*(?:\bиз\b|\/)\s*(\d+)\b/ismu' ]; foreach($patterns as $rx){ $r = preg_match_all($rx,$s,$tmp); if($r){ $mathes = array_merge($mathes, $tmp[1]); $count += $r; } } # 1-к вартиру, 6/18 30/12/7 if(!$count && $r = preg_match_all('/\b(\d+)[^\d\/]{0,3}\/[^\d\/]{0,3}(\d+)[^\d\/]{3,}/ismu',$s . "___",$tmp)){ for($i = 0; $i < $r; $i++){ if($tmp[1][$i] <= $tmp[2][$i]){ $mathes[] = $tmp[1][$i]; $count++; } } } return $count ? min($mathes) : 0; } public static function parse_levels($s) { $mathes = []; $count = 0; $patterns = [ # 2-х этажный/уровневый '/\b(\d+)\D{0,4}\b(?:эт(?:\b|аж)|уров)/ismu', # кол-во этажей/уровней 5; этажей в здании '/ \b(?:этаж(?:ей?|ность)?|уровн?е(?:нь|й|вый)?)\b (?:\s+в\s+(?:доме|здании|помещении|коттедже))? \D{0,4} (\d+)\b /ismux', # Этаж 4 из 16, Этаж 4/17 '/\bэтаж\b\D{1,4}\b\d+\s*(?:\bиз\b|\/)\s*(\d+)\b/ismu' ]; foreach($patterns as $rx){ $r = preg_match_all($rx,$s,$tmp); if($r){ $mathes = array_merge($mathes, $tmp[1]); $count += $r; } } # 1-к вартиру, 6/18 30/12/7 if(!$count && $r = preg_match_all('/\b(\d+)[^\d\/]{0,3}\/[^\d\/]{0,3}(\d+)[^\d\/]{3,}/ismu',$s . "___",$tmp)){ for($i = 0; $i < $r; $i++){ if($tmp[1][$i] <= $tmp[2][$i]){ $mathes[] = $tmp[2][$i]; $count++; } } } return $count ? max($mathes) : 0; } public static function parse_square($s) { $result = []; $mathes = []; $count = 0; $patterns = [ '/ \b(\d+(?:[,.]\d+)?) \s*\/\s* (?:\d+(?:[,.]\d+)?|-) \s*\/\s* (?:\d+(?:[,.]\d+)?|-)\b /ismx', '/ \b(\d+(?:[.,]\d+)?) \D{0,3} (?:кв[.\s]\s*м|м\s*[2²]\b|квадрат\w{0,2}) /ismux', '/ \bпл(?:ощадь)?\b (?:\s+(?:общая|жилая|дома|квартиры|комнаты|коттеджа|офиса|помещения|здания)\b)? [\s.,:;\(\)]+ (?:в\s*)? (?:кв[.\s]\s*м|м\s*[2²]\b) [\s.,:;\(\)]+ (\d+(?:[,.]\d+)?) /ismux', '/ \b(?:комнат[ау]|квартир[ау]|дом|помещение|офис)\b [\s.,:;\(\)]+ (\d+(?:[,.]\d+)?) \D{0,3}м /ismux', ]; foreach($patterns as $rx){ $r = preg_match_all($rx,$s,$tmp); if($r){ foreach($tmp[1] as &$_) $_ = __floatval($_); unset($_); $mathes = array_merge($mathes, $tmp[1]); $count += $r; } } return $count ? max($mathes) : 0; } public static function parse_land($s) { $result = []; $mathes = []; $count = 0; $patterns = [ '/\b(\d+(?:[,.]\d+)?)\b\D{0,4}\b(?:сот|га|гектар)/ismu', '/ \b(?:участ(?:ок|к[ае])|земл[иея]) [\s.,:;\(\)]+ (?:в\s*)? (?:сот(?:ок|ках)?|га|гектар(?:ах)?) [\s.,:;\(\)]+ (\d+(?:[,.]\d+)?) /ismux', ]; foreach($patterns as $rx){ $r = preg_match_all($rx,$s,$tmp); if($r){ foreach($tmp[1] as &$_) $_ = __floatval($_); unset($_); $mathes = array_merge($mathes, $tmp[1]); $count += $r; } } return $count ? max($mathes) : 0; } public static function parse_rooms($s, $c = "") { $arr = [ "одн" => 1, "дву" => 2, "тр" => 3, "четыр" => 4, "пяти" => 5, "шести" => 6, ]; if(preg_match('/ \b( (?:одн[уеё]|дву|тр[её]|четыр[её])шк | (?:одно?|двух?|тр[её]х?|четыр[её]х?|пяти?|шести?)\W{0,3}комн ) /ismxu', $s, $tmp)) foreach($arr as $k => $v) if(strpos($tmp[1],$k) !== false) return $v; if(preg_match('/\b(\d)\s*[\D\S]{0,3}\s*к(?:ом(?:н(?:ат\w{0,3})?)?)?\b/ismu', $s, $tmp)) return $tmp[1]; if(preg_match('/\bкомн\w{0,2}\D{0,3}(\d+)\D/ismu', $s, $tmp)) return $tmp[1]; return ($c == "kvartiry" || $c == "komnaty" ? 1 : 0); } public function isValidItemHtml($html, $item) { return $this->error("isValidItemHtml: pseudo abstract method, must be overriden"); } protected function get_mirror($deleteCurrent = false) { if(!count($this->mirrors)) return $this->error("get_mirror: mirror list is empty"); $mirror = $this->mirror ? next($this->mirrors) : current($this->mirrors); if(!$mirror){ reset($this->mirrors); $mirror = current($this->mirrors); } if($deleteCurrent){ $this->__log("deleting mirror: {$this->mirror}"); reset($this->mirrors); foreach($this->mirrors as $k => $v){ if($v == $this->mirror){ unset($this->mirrors[$k]); break; } } } $this->mirror = $mirror; return $this->mirror; } protected static function url_get_contents($url, $params = [], $method = "GET") { if($method == "GET") { if(is_array($params) && count($params)) $url = $url . "?" . http_build_query($params); echo "[" . date("Y-m-d H:i:s") . "] url_get_contents: {$url}\n"; $context = stream_context_create([ http => [ "protocol_version" => 1.1, "method" => "GET", "timeout" => 60, "header" => [ "Connection: close" ], ]]); } elseif($method == "POST") { echo "[" . date("Y-m-d H:i:s") . "] url_get_contents: {$url}\n" . print_r($params, 1); $content = http_build_query($params); $context = stream_context_create([ http => [ "protocol_version" => 1.1, "method" => "POST", "timeout" => 60, "header" => [ "Content-Type: application/x-www-form-urlencoded", "Content-Length: " . strlen($content), "Connection: close", ], "content" => $content, ]]); } else { die ("[" . date("Y-m-d H:i:s") . "] Unknown request method: \"{$method}\"\n"); } $data = file_get_contents($url, false, $context); sleep(3); return $data; } protected function mirror_get_contents($params, $method = "GET") { $this->get_mirror(); if(!is_array($params)) { if(preg_match("@^\d+$@", $params)) $params = [item_id => $params]; else if(preg_match("@^https?://@is", $params)) $params = [url => $params]; else $params = []; } $data = @gzdecode(self::url_get_contents($this->mirror, $params, $method)); if(!$data) { $this->__log($this->mirror . " failed, trying next..."); $this->get_mirror(true); return $this->mirror_get_contents($params, $method); } return $data; } protected function get_url_head($url, $proxy = false, $opts = []) { $headers = []; $headers = is_array($opts) ? $opts : [$opts]; $headers[] = "Accept-Language: ru-RU,ru;q=0.8,en-US;q=0.5,en;q=0.3"; $headers[] = "Cache-Control: max-age=0"; $headers[] = "Connection: keep-alive"; $headers[] = "User-Agent: Mozilla/5.0 (Windows NT 5.1; rv:31.0) Gecko/20100101 Firefox/31.0"; $curl = curl_init(); if($proxy){ $this->get_proxy($proxy === true ? "" : $proxy); curl_setopt($curl, CURLOPT_PROXY, $this->proxy); if ($this->__proxy->pass) curl_setopt($curl, CURLOPT_PROXYUSERPWD, $this->__proxy->login . ":" . preg_replace("/\s/","",$this->__proxy->pass)); // $headers[] = "X-Forwarded-For: {$this->__proxy->ip}"; } else{ $this->__proxy = null; $this->proxy = null; sleep(3); } curl_setopt($curl, CURLOPT_HTTPHEADER, $headers); curl_setopt($curl, CURLOPT_TIMEOUT, $this->timeout); curl_setopt($curl, CURLOPT_URL, $url); curl_setopt($curl, CURLOPT_NOBODY, true); curl_setopt($curl, CURLOPT_RETURNTRANSFER, true); curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false); curl_exec($curl); $info = curl_getinfo($curl); curl_close($curl); return $info; } public function setSiteCodeAlias($code){ $this->site->code_alias = $code; } }