getItem($item, $n + 1);
}
$item->display = 1;
$item->is_agency = 0;
$item->seller = "";
if(preg_match('/Страница не найдена/',$data))
{
$this->__log('Sibdom: Такой страницы на нашем сайте нет');
return $this->getItem($item, $n + 1);
}
$r2 = preg_match("/добавлено([^<]*)|обновлено([^<]*)/ism", $tmp[0], $tmp2);
var_dump($tmp2);
// if ($item->date==""){
if ($tmp2[1]==0){
$item->date = date ("Y-m-d", strtotime($tmp2[2]))." "."00:00:00";}
else $item->date = date ("Y-m-d", strtotime($tmp2[1]))." "."00:00:00";
print_r("DATAAAAAAAAAAAAAAAAAAAA ".$item->date);
// }
#
$item->images = [];
$r = preg_match_all("/Фото\s\d*.[^<]*
images[] = $tmp2[1][$i];
}
$item->images = implode("\n", $item->images);
var_dump($item->images);
#
#
$r = preg_match_all("/card-description[^<]*([^s]*)/ism", $tmp[0], $tmp2);
$item->description = strip_tags($tmp2[1][0]);
var_dump( $item->description);
#
//preg_match_all('/Площадь кухни[^\d]*([\S]*)/', $tmp[0], $tmp3);
//TEST Item
// $item = new stdClass();
// $item->id = $this->item->id
#
preg_match_all('/data-owner=\"([^\"]*)/ism', $tmp[0], $owner);
preg_match_all('/data-key=\"([^\"]*)/ism', $tmp[0], $key);
preg_match_all('/data-url_key=\"([^\"]*)/ism', $tmp[0], $url_key);
/*$item->phone = $this->post_url(
"https://www.sibdom.ru/api/get_phone",
[
//'okey' => $item->SEC_CODE . ";",
"owner" => $owner[1][0],
"id" => $item->id,
"key" => $key[1][0],
"url_key" => $url_key[1][0],
"mobile" => 0,
],
TRUE,
[
"Content-type: application/x-www-form-urlencoded; charset=UTF-8",
"X-Requested-With: XMLHttpRequest",
"Referer: {$url}",
]
);
*/
/* $send_data = array(
"id" => $item->id,
"key" => $key[1][0],
"mobile" => 0,
"owner" => $owner[1][0],
"url_key" => $url_key[1][0],
);
// var_dump($send_data); */
$item->phone = $this->getPhone($send_data);
//var_dump("!!!!!!!!!!!!!! {$item->phone}");
#
isset($this->oldItem[$item-id]) ? $this->updateItem($item) : $this->saveItem($item);
return $item;
}
public function post_url($url, $form = [], $proxy = true, $opts = [], $n = 1)
{
if (!$url || !preg_match("/^https?[:]\/\//", $url)) {
$this->error("post_url: incorrect url = {$url}");
}
/* if ($n > $this->fails / 2) {
if ($proxy) {
$proxy = false;
} else{
$proxy = true;
$n = 1;
}
}*/
$query = http_build_query($form);
$log = "post_url: url = {$url}&{$query}";
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $query);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_TIMEOUT, $this->timeout);
curl_setopt($ch, file_exists($this->cookies) ? CURLOPT_COOKIEFILE : CURLOPT_COOKIEJAR, $this->cookies);
$headers = is_array($opts) ? $opts : [$opts];
$headers[] = "Accept-Language: ru-RU,ru;q=0.8,en-US;q=0.5,en;q=0.3";
$headers[] = "Cache-Control: max-age=0";
$headers[] = "Connection: keep-alive";
$headers[] = "User-Agent: Mozilla/5.0 (Windows NT 5.1; rv:31.0) Gecko/20100101 Firefox/31.0";
$headers[] = "Content-length: " . strlen($query);
if ($proxy) {
$this->get_proxy($proxy === true ? "" : $proxy);
curl_setopt($ch, CURLOPT_PROXY, $this->proxy);
if ($this->__proxy->pass) curl_setopt($ch, CURLOPT_PROXYUSERPWD, $this->__proxy->login . ":" . $this->__proxy->pass);
$log .= ", proxy = {$this->proxy}";
$headers[] = "X-Forwarded-For: {$this->__proxy->ip}";
} else {
$this->__proxy = null;
$this->proxy = null;
sleep(2);
}
$this->__log($log);
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
$ss = curl_exec($ch);
$er = curl_error($ch);
$ci = curl_getinfo($ch);
curl_close($ch);
$f = $er || $ci["http_code"] != 200;
if ($proxy) {
$this->update_proxy($f, $ci["http_code"], $er);
} else if (preg_match("/^40\d$/", $ci["http_code"])) {
$this->__log("{$log} fails (err = {$er}, http_code = {$ci["http_code"]})");
return false;
}
if ($f) {
$this->__log("{$log} fails (err = {$er}, http_code = {$ci["http_code"]}), retry...");
return $this->post_url($url, $form, $proxy, $opts, $n + 1);
}
return $ss;
}
public function getRooms($strRooms)
{
$strRooms = mb_strtolower($strRooms);
$rooms = (int) filter_var($strRooms, FILTER_SANITIZE_NUMBER_INT);
if($rooms === 0){
switch($strRooms)
{
case 'студия': { return 1; }
case 'гостинка': { return 1; }
case 'комната': { return 0; }
}
}
return $rooms;
}
public function getPhone($send_data)
{
sleep (2);
$url = "https://sibdom.ru/api/get_phone";
$data = json_encode($send_data, JSON_UNESCAPED_UNICODE);
var_dump($data);
$headers = array(
'cache-control: max-age=0',
'upgrade-insecure-requests: 1',
'user-agent: Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.97 Safari/537.36',
'sec-fetch-user: ?1',
'accept: application/json, text/javascript, text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
'x-compress: null',
'sec-fetch-site: none',
'sec-fetch-mode: navigate',
'accept-encoding: deflate, br',
'accept-language: ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7', 'Content-Type: application/x-www-form-urlencoded; charset=utf-8', 'X-Requested-With: XMLHttpRequest',
);
$ch = curl_init('sibdom.ru/api/get_phone');
curl_setopt($ch, CURLOPT_COOKIEFILE, __DIR__ . '/cookie.txt');
curl_setopt($ch, CURLOPT_COOKIEJAR, __DIR__ . '/cookie.txt');
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $data);
$html = curl_exec($ch);
curl_close($ch);
var_dump($html);
// $tmp = $this->post_url($url, true, $data );
//var_dump(json_decode($tmp));
// var_dump(utf8_encode(json_decode(json_encode(json_decode($tmp)->result);
//$res = json_decode($tmp)->result;
//var_dump($this-> unicode_escape_decode($res));
//die();
$this->__log("Phone parsed: ".$phone);
return "Нет телефона";
}
function unicode_escape_decode($str) {
return html_entity_decode(
preg_replace('~\\\u([a-zA-Z0-9]{4})~', '$1;', $str), null, 'UTF-8'
);
}
public function grab()
{
print_r("start GRAB");
// $this->deal_type = $this->deals[];
// $category = $this->categories["kvartiry"];
$city = conf::$city->id;
$this->session_start();
$this->sections = [[deal_type => "prodam", category => "kvartiry"]];
foreach ($this->sections as $section) {
$this->deal_type = $section["deal_type"];
$this->category = $section["category"];
// if(!isset($sections[$deal_type][$category])) continue;
$this->__log("\n\n=========\n grab($city, $this->deal_type, $this->category);\n=========\n");
// parent::grab($city, $deal_type, $category);
$url = $this->create_url($this->category);
$pages = $this->getPages($url);
$limit = $this->limit;
for ($p = $this->startpage; $p <= $pages; $p++){
$url = $this->create_url($urlsfx,$p);
$items = $this->getItems($url);
foreach($items as $item)
{
if(!$this->checkItem($item)) $limit--;
else { $limit = $this->limit; $this->items[$item->id] = $item; }
}
}
$n = count($this->items);
$this->__log("\n\n===========\n ITEMS END: {$n} \n==========\n\n");
foreach($this->items as $item)
{
$this->getItem($item) ? null : $this->saveUrl($item);
}
}
$this->_session_end();
return true;
}
public function create_url($urlsfx, $p = 1)
{
$url="https://www.sibdom.ru/kvartiry/prodam_krasnoyarsk_ot-sobstvennika/?q=20_dateadd_desc&page={$p}";
//$url="https://www.sibdom.ru/{$this->category}/{$this->deal_type}/?page={$p}"
// $url = "_blagoveschensk/{$this->category}/{$this->deal_type}{$urlsfx}";
// if($p > 1) $url ."?page={$p}";
return $url;
}
public function session_end() { }
public function _session_end()
{
@unlink("{$_ENV[ROOT]}/../pids/{$this->site->code}.pid");
@unlink($this->cookies);
}
public function get_proxy($proxy = "", $reset = 0)
{
$proxy = $this->get_proxylist();
$this->__proxy = (object) [ip => "", port => "", login => "", pass => ""];
// $this->proxy = "10.0.0.1:8800";
$this->proxy = $proxy[mt_rand(0, count($proxy) - 1)];
list($this->__proxy->ip, $this->__proxy->port, $this->__proxy->login, $this->__proxy->pass) = explode(":", $this->proxy);
$this->proxy = $this->__proxy->ip . ":" . $this->__proxy->port;
return $this->proxy;
}
public function update_proxy($n, $status = "", $error = "")
{
return false;
}
}