phpquery抓取搜狗微信的内容
直接上代码:
/**
* 根据微信文章网址抓取文章内容
*/
class WxArticlesAction extends BaseAction {
//根据地址抓取内容
public function getContent(){
if(!isset($_POST["wxurl"])){
exit;
}
$wxurl = $_POST["wxurl"];
//抓取文章记录到数据库
$memberid = "0";//0代表用户没有登录
if(session("?homeuser")){
$loginuser = session("homeuser");
$memberid=$loginuser["id"];
}
$wxurls = M("wxurl");
$where["memberid"]=$memberid;
$where["wxurl"]=$wxurl;
$resul = $wxurls->add($where);
phpQuery::newDocumentFile($wxurl);
$title = pq("#img-content")->find("#activity-name")->text();
$authername = pq("#post-user")->text();
$times = pq(".rich_media_meta_list")->find("#post-date")->text();
$origianl = pq(".rich_media_meta_list")->find("#copyright_logo")->text();
$con = pq("#js_content");
$title = trim($title);
$contents = $con->find("img");
foreach($contents as $cont){
$imgurl=pq($cont)->attr("data-src");//获取每张图片地址
//将图片下载到本地,返回本地地址
$url = self::getimg($imgurl);
//将抓取内容中的图片改成本地地址
pq($cont)->attr("src","http://www.imeibian.com/".$url);
pq($cont)->attr("title",$imgurl);
}
$cons = $con->html();
//json形式返回
echo json_encode(array("success"=>true,"names"=>$authername,"title"=>$title,"times"=>$times,"contents"=>$cons,"orign"=>$origianl,"links"=>$wxurl));
}
//根据图片地址下载微信图片
public function getimg($url){
$ch = curl_init();
$httpheader = array(
"Host"=>"mmbiz.qpic.cn",
"Connection"=>"keep-alive",
"Pragma"=>"no-cache",
"Cache-Control"=>"no-cache",
"Accept"=>"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,/;q=0.8",
"User-Agent" => "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.89 Safari/537.36",
"Accept-Encoding" => "gzip, deflate, sdch",
"Accept-Language" => "zh-CN,zh;q=0.8,en;q=0.6,zh-TW;q=0.4"
);
$options = array(
CURLOPT_HTTPHEADER => $httpheader,
CURLOPT_URL => $url,
CURLOPT_TIMEOUT => 5,
CURLOPT_FOLLOWLOCATION => 1,
CURLOPT_RETURNTRANSFER => true
);
curl_setopt_array($ch,$options);
$result = curl_exec($ch);
curl_close($ch);
$time = time();
$urlname = $time.rand();
$dat = date("Y-m-d",$time);
if(!file_exists("ueditor/wximg/$dat")){
mkdir("ueditor/wximg/$dat");
}
$imgurl = "ueditor/wximg/$dat/$urlname".".jpg";
file_put_contents($imgurl,$result);
return $imgurl;
}
}
?>
- 上一篇: 如何实现自动采集微信公众号文章
- 下一篇: PHP基于标准的CBC模式的DES加密算法