php

php

PHP解析百度图片搜索结果json中objURL图片原始地址函数

孤魂 发表了文章 • 0 个评论 • 1847 次浏览 • 2016-11-17 09:01 • 来自相关话题

<?php
$str = 'ippr_z2C$qAzdH3FAzdH3Fojgojg_z&e3Bf5f5_z&e3Bv54AzdH3FrAzdH3Fda8a8a8nAzdH3Fda8a8a8n8m9bdc-8a0abn8cdb_z&e3B3r2';
function baidtu_uncomplie($k) {
$c = array('_z2C$q', '_z&e3B', 'AzdH3F');
$d = array('w' => "a", 'k' => "b", 'v' => "c", '1' => "d", 'j' => "e", 'u' => "f", '2' => "g", 'i' => "h", 't' => "i", '3' => "j", 'h' => "k", 's' => "l", '4' => "m", 'g' => "n", "5" => "o", 'r' => "p", 'q' => "q", "6" => "r", 'f' => "s", 'p' => "t", "7" => "u", 'e' => "v", 'o' => "w", "8" => "1", 'd' => "2", 'n' => "3", "9" => "4", 'c' => "5", 'm' => "6", "0" => "7", 'b' => "8", 'l' => "9", 'a' => "0", '_z2C$q' => ":", '_z&e3B' => ".", 'AzdH3F' => "/");
if (!$k || strpos($k, "http")) return $k;
$j = $k;
foreach ($c as $value) {
$j = str_replace($value, $d[$value], $j);
}
$arr = str_split($j);
foreach ($arr as $k => $v) {
if (preg_match('/^[a-w\d]+$/', $v)) $arr[$k] = $d[$v];
}
return implode('', $arr);
}
print_r(baidtu_uncomplie($str)); 查看全部
<?php
$str = 'ippr_z2C$qAzdH3FAzdH3Fojgojg_z&e3Bf5f5_z&e3Bv54AzdH3FrAzdH3Fda8a8a8nAzdH3Fda8a8a8n8m9bdc-8a0abn8cdb_z&e3B3r2';
function baidtu_uncomplie($k) {
$c = array('_z2C$q', '_z&e3B', 'AzdH3F');
$d = array('w' => "a", 'k' => "b", 'v' => "c", '1' => "d", 'j' => "e", 'u' => "f", '2' => "g", 'i' => "h", 't' => "i", '3' => "j", 'h' => "k", 's' => "l", '4' => "m", 'g' => "n", "5" => "o", 'r' => "p", 'q' => "q", "6" => "r", 'f' => "s", 'p' => "t", "7" => "u", 'e' => "v", 'o' => "w", "8" => "1", 'd' => "2", 'n' => "3", "9" => "4", 'c' => "5", 'm' => "6", "0" => "7", 'b' => "8", 'l' => "9", 'a' => "0", '_z2C$q' => ":", '_z&e3B' => ".", 'AzdH3F' => "/");
if (!$k || strpos($k, "http")) return $k;
$j = $k;
foreach ($c as $value) {
$j = str_replace($value, $d[$value], $j);
}
$arr = str_split($j);
foreach ($arr as $k => $v) {
if (preg_match('/^[a-w\d]+$/', $v)) $arr[$k] = $d[$v];
}
return implode('', $arr);
}
print_r(baidtu_uncomplie($str));

PHP通过pthreads扩展实现真正的多线程采集

孤魂 发表了文章 • 0 个评论 • 1748 次浏览 • 2015-12-25 09:11 • 来自相关话题

最近自己的项目采集,一直在使用PHP CURL的功能在进行采集,使用命令行执行PHP文件,解决了PHP运行超时的问题,但只能单线程采集。最近找到了使用pthreads实现多线程采集的方法,这里安装方法就不在详细说明了,如果你使用Phpstudy的套件的话,需要注意到两点,一是选择好正确的版本,php 5.x只能使用2.09以下的版本;其次是需要将php_pthreads.dll放在ext目录,然后在php.ini文件中加载此文件;最后需要将pthreadVC2.dll分别复制到./PHPa/目录和./Apache/bin/目录。下面分享一下我的采集源码。<?php
set_time_limit(0);

class new_thread_run extends Thread
{
public $url;
public $data;
public function __construct($url)
{
$this->url = $url;
}
public function run()
{
if (($url = $this->url)) {
$this->data = model_http_curl_get($url);
}
}
}
function model_thread_result_get($urls_array)
{
if (class_exists('Thread')) {
foreach ($urls_array as $key => $value) {
$thread_array[$key] = new new_thread_run($value);
$thread_array[$key]->start();
}
foreach ($thread_array as $thread_array_key => $thread_array_value) {
while ($thread_array[$thread_array_key]->isRunning()) {
usleep(10);
}
if ($thread_array[$thread_array_key]->join()) {
$variable_data[$thread_array_key] = $thread_array[$thread_array_key]->data;
}
}
} else {
foreach ($urls_array as $key => $value) {
$variable_data[$key] = model_http_curl_get($value);
}
}
return $variable_data;
}
function model_http_curl_get($url)
{
$userAgent = 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.2)';
$curl = curl_init();
curl_setopt($curl, CURLOPT_URL, $url);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($curl, CURLOPT_TIMEOUT, 20);
curl_setopt($curl, CURLOPT_USERAGENT, $userAgent);
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, false);
$result = curl_exec($curl);
curl_close($curl);
return $result;
}

//实际例子
for ($i = 0; $i < 50; $i++) {
$urls_array = "http://www.baidu.com/s?wd=" . mt_rand(10000, 20000);
}
$t = microtime(true);
$result = model_thread_result_get($urls_array);
$e = microtime(true);
echo "多线程:" . ($e - $t) . "\n";
?>参考链接:
http://www.thinkphp.cn/topic/22676.htmlhttp://zyan.cc/pthreads/ 查看全部
最近自己的项目采集,一直在使用PHP CURL的功能在进行采集,使用命令行执行PHP文件,解决了PHP运行超时的问题,但只能单线程采集。最近找到了使用pthreads实现多线程采集的方法,这里安装方法就不在详细说明了,如果你使用Phpstudy的套件的话,需要注意到两点,一是选择好正确的版本,php 5.x只能使用2.09以下的版本;其次是需要将php_pthreads.dll放在ext目录,然后在php.ini文件中加载此文件;最后需要将pthreadVC2.dll分别复制到./PHPa/目录和./Apache/bin/目录。下面分享一下我的采集源码。
<?php
set_time_limit(0);

class new_thread_run extends Thread
{
public $url;
public $data;
public function __construct($url)
{
$this->url = $url;
}
public function run()
{
if (($url = $this->url)) {
$this->data = model_http_curl_get($url);
}
}
}
function model_thread_result_get($urls_array)
{
if (class_exists('Thread')) {
foreach ($urls_array as $key => $value) {
$thread_array[$key] = new new_thread_run($value);
$thread_array[$key]->start();
}
foreach ($thread_array as $thread_array_key => $thread_array_value) {
while ($thread_array[$thread_array_key]->isRunning()) {
usleep(10);
}
if ($thread_array[$thread_array_key]->join()) {
$variable_data[$thread_array_key] = $thread_array[$thread_array_key]->data;
}
}
} else {
foreach ($urls_array as $key => $value) {
$variable_data[$key] = model_http_curl_get($value);
}
}
return $variable_data;
}
function model_http_curl_get($url)
{
$userAgent = 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.2)';
$curl = curl_init();
curl_setopt($curl, CURLOPT_URL, $url);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($curl, CURLOPT_TIMEOUT, 20);
curl_setopt($curl, CURLOPT_USERAGENT, $userAgent);
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, false);
$result = curl_exec($curl);
curl_close($curl);
return $result;
}

//实际例子
for ($i = 0; $i < 50; $i++) {
$urls_array = "http://www.baidu.com/s?wd=" . mt_rand(10000, 20000);
}
$t = microtime(true);
$result = model_thread_result_get($urls_array);
$e = microtime(true);
echo "多线程:" . ($e - $t) . "\n";
?>
参考链接:

PHP解析百度图片搜索结果json中objURL图片原始地址函数

孤魂 发表了文章 • 0 个评论 • 1847 次浏览 • 2016-11-17 09:01 • 来自相关话题

<?php
$str = 'ippr_z2C$qAzdH3FAzdH3Fojgojg_z&e3Bf5f5_z&e3Bv54AzdH3FrAzdH3Fda8a8a8nAzdH3Fda8a8a8n8m9bdc-8a0abn8cdb_z&e3B3r2';
function baidtu_uncomplie($k) {
$c = array('_z2C$q', '_z&e3B', 'AzdH3F');
$d = array('w' => "a", 'k' => "b", 'v' => "c", '1' => "d", 'j' => "e", 'u' => "f", '2' => "g", 'i' => "h", 't' => "i", '3' => "j", 'h' => "k", 's' => "l", '4' => "m", 'g' => "n", "5" => "o", 'r' => "p", 'q' => "q", "6" => "r", 'f' => "s", 'p' => "t", "7" => "u", 'e' => "v", 'o' => "w", "8" => "1", 'd' => "2", 'n' => "3", "9" => "4", 'c' => "5", 'm' => "6", "0" => "7", 'b' => "8", 'l' => "9", 'a' => "0", '_z2C$q' => ":", '_z&e3B' => ".", 'AzdH3F' => "/");
if (!$k || strpos($k, "http")) return $k;
$j = $k;
foreach ($c as $value) {
$j = str_replace($value, $d[$value], $j);
}
$arr = str_split($j);
foreach ($arr as $k => $v) {
if (preg_match('/^[a-w\d]+$/', $v)) $arr[$k] = $d[$v];
}
return implode('', $arr);
}
print_r(baidtu_uncomplie($str)); 查看全部
<?php
$str = 'ippr_z2C$qAzdH3FAzdH3Fojgojg_z&e3Bf5f5_z&e3Bv54AzdH3FrAzdH3Fda8a8a8nAzdH3Fda8a8a8n8m9bdc-8a0abn8cdb_z&e3B3r2';
function baidtu_uncomplie($k) {
$c = array('_z2C$q', '_z&e3B', 'AzdH3F');
$d = array('w' => "a", 'k' => "b", 'v' => "c", '1' => "d", 'j' => "e", 'u' => "f", '2' => "g", 'i' => "h", 't' => "i", '3' => "j", 'h' => "k", 's' => "l", '4' => "m", 'g' => "n", "5" => "o", 'r' => "p", 'q' => "q", "6" => "r", 'f' => "s", 'p' => "t", "7" => "u", 'e' => "v", 'o' => "w", "8" => "1", 'd' => "2", 'n' => "3", "9" => "4", 'c' => "5", 'm' => "6", "0" => "7", 'b' => "8", 'l' => "9", 'a' => "0", '_z2C$q' => ":", '_z&e3B' => ".", 'AzdH3F' => "/");
if (!$k || strpos($k, "http")) return $k;
$j = $k;
foreach ($c as $value) {
$j = str_replace($value, $d[$value], $j);
}
$arr = str_split($j);
foreach ($arr as $k => $v) {
if (preg_match('/^[a-w\d]+$/', $v)) $arr[$k] = $d[$v];
}
return implode('', $arr);
}
print_r(baidtu_uncomplie($str));

PHP通过pthreads扩展实现真正的多线程采集

孤魂 发表了文章 • 0 个评论 • 1748 次浏览 • 2015-12-25 09:11 • 来自相关话题

最近自己的项目采集,一直在使用PHP CURL的功能在进行采集,使用命令行执行PHP文件,解决了PHP运行超时的问题,但只能单线程采集。最近找到了使用pthreads实现多线程采集的方法,这里安装方法就不在详细说明了,如果你使用Phpstudy的套件的话,需要注意到两点,一是选择好正确的版本,php 5.x只能使用2.09以下的版本;其次是需要将php_pthreads.dll放在ext目录,然后在php.ini文件中加载此文件;最后需要将pthreadVC2.dll分别复制到./PHPa/目录和./Apache/bin/目录。下面分享一下我的采集源码。<?php
set_time_limit(0);

class new_thread_run extends Thread
{
public $url;
public $data;
public function __construct($url)
{
$this->url = $url;
}
public function run()
{
if (($url = $this->url)) {
$this->data = model_http_curl_get($url);
}
}
}
function model_thread_result_get($urls_array)
{
if (class_exists('Thread')) {
foreach ($urls_array as $key => $value) {
$thread_array[$key] = new new_thread_run($value);
$thread_array[$key]->start();
}
foreach ($thread_array as $thread_array_key => $thread_array_value) {
while ($thread_array[$thread_array_key]->isRunning()) {
usleep(10);
}
if ($thread_array[$thread_array_key]->join()) {
$variable_data[$thread_array_key] = $thread_array[$thread_array_key]->data;
}
}
} else {
foreach ($urls_array as $key => $value) {
$variable_data[$key] = model_http_curl_get($value);
}
}
return $variable_data;
}
function model_http_curl_get($url)
{
$userAgent = 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.2)';
$curl = curl_init();
curl_setopt($curl, CURLOPT_URL, $url);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($curl, CURLOPT_TIMEOUT, 20);
curl_setopt($curl, CURLOPT_USERAGENT, $userAgent);
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, false);
$result = curl_exec($curl);
curl_close($curl);
return $result;
}

//实际例子
for ($i = 0; $i < 50; $i++) {
$urls_array = "http://www.baidu.com/s?wd=" . mt_rand(10000, 20000);
}
$t = microtime(true);
$result = model_thread_result_get($urls_array);
$e = microtime(true);
echo "多线程:" . ($e - $t) . "\n";
?>参考链接:
http://www.thinkphp.cn/topic/22676.htmlhttp://zyan.cc/pthreads/ 查看全部
最近自己的项目采集,一直在使用PHP CURL的功能在进行采集,使用命令行执行PHP文件,解决了PHP运行超时的问题,但只能单线程采集。最近找到了使用pthreads实现多线程采集的方法,这里安装方法就不在详细说明了,如果你使用Phpstudy的套件的话,需要注意到两点,一是选择好正确的版本,php 5.x只能使用2.09以下的版本;其次是需要将php_pthreads.dll放在ext目录,然后在php.ini文件中加载此文件;最后需要将pthreadVC2.dll分别复制到./PHPa/目录和./Apache/bin/目录。下面分享一下我的采集源码。
<?php
set_time_limit(0);

class new_thread_run extends Thread
{
public $url;
public $data;
public function __construct($url)
{
$this->url = $url;
}
public function run()
{
if (($url = $this->url)) {
$this->data = model_http_curl_get($url);
}
}
}
function model_thread_result_get($urls_array)
{
if (class_exists('Thread')) {
foreach ($urls_array as $key => $value) {
$thread_array[$key] = new new_thread_run($value);
$thread_array[$key]->start();
}
foreach ($thread_array as $thread_array_key => $thread_array_value) {
while ($thread_array[$thread_array_key]->isRunning()) {
usleep(10);
}
if ($thread_array[$thread_array_key]->join()) {
$variable_data[$thread_array_key] = $thread_array[$thread_array_key]->data;
}
}
} else {
foreach ($urls_array as $key => $value) {
$variable_data[$key] = model_http_curl_get($value);
}
}
return $variable_data;
}
function model_http_curl_get($url)
{
$userAgent = 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.2)';
$curl = curl_init();
curl_setopt($curl, CURLOPT_URL, $url);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($curl, CURLOPT_TIMEOUT, 20);
curl_setopt($curl, CURLOPT_USERAGENT, $userAgent);
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, false);
$result = curl_exec($curl);
curl_close($curl);
return $result;
}

//实际例子
for ($i = 0; $i < 50; $i++) {
$urls_array = "http://www.baidu.com/s?wd=" . mt_rand(10000, 20000);
}
$t = microtime(true);
$result = model_thread_result_get($urls_array);
$e = microtime(true);
echo "多线程:" . ($e - $t) . "\n";
?>
参考链接: