Python模拟百度登录实例
原文:http://www.jb51.net/article/78406.htm
记录下来,慢慢学习:
要先获取token,然后再登录
#
-*- coding: utf8 -*-
import
urllib2
import
urllib
import
cookielib
import
re
import
bs4
URL_BAIDU_INDEX
=
u"http://www.baidu.com/";
#https://passport.baidu.com/v2/api/?getapi&class=login&tpl=mn&tangram=true
也可以用这个
URL_BAIDU_TOKEN
=
"https://passport.baidu.com/v2/api/?getapi&tpl=pp&apiver=v3&class=login";
URL_BAIDU_LOGIN
=
"https://passport.baidu.com/v2/api/?login";
#设置用户名、密码
username
=
"";
password
=
"";
#设置cookie,这里cookiejar可自动管理,无需手动指定
cj
=
cookielib.CookieJar();
opener
=
urllib2.build_opener(urllib2.HTTPCookieProcessor(cj));
urllib2.install_opener(opener);
reqReturn
=
urllib2.urlopen(URL_BAIDU_INDEX);
#获取token,
tokenReturn
=
urllib2.urlopen(URL_BAIDU_TOKEN);
matchVal
=
re.search(u""token"
: "(?P<tokenVal>.*?)"",tokenReturn.read());
tokenVal
=
matchVal.group("tokenVal");
#构造登录请求参数,该请求数据是通过抓包获得,对应https://passport.baidu.com/v2/api/?login请求
postData
=
{
"username"
: username,
"password"
: password,
"u"
: "https://passport.baidu.com/",
"tpl"
: "pp",
"token"
: tokenVal,
"staticpage"
: "https://passport.baidu.com/static/passpc-account/html/v3Jump.html",
"isPhone"
: "false",
"charset"
: "UTF-8",
"callback"
: "parent.bd__pcbs__ra48vi"
};
postData
=
urllib.urlencode(postData);
#发送登录请求
loginRequest
=
urllib2.Request(URL_BAIDU_LOGIN,postData);
loginRequest.add_header("Accept","text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
loginRequest.add_header("Accept-Encoding","gzip,deflate,sdch");
loginRequest.add_header("Accept-Language","zh-CN,zh;q=0.8");
loginRequest.add_header("User-Agent","Mozilla/5.0
(Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.72 Safari/537.36");
loginRequest.add_header("Content-Type","application/x-www-form-urlencoded");
sendPost
=
urllib2.urlopen(loginRequest);
#查看贴吧个人主页
,测试是否登陆成功,由于cookie自动管理,这里处理起来方便很多
#http://tieba.baidu.com/home/main?un=XXXX&fr=index
这个是贴吧个人主页,各项信息都可以在此找到链接
teibaUrl
=
"http://tieba.baidu.com/f/like/mylike?v=1387441831248"
content
=
urllib2.urlopen(teibaUrl).read();
content
=
content.decode("gbk").encode("utf8");
print
content;
#解析数据,用的BeautifulSoup4,感觉没有jsoup用的爽
soup
=
bs4.BeautifulSoup(content);
list
=
soup.findAll("tr");
list
=
list[1:len
声明:该文观点仅代表作者本人,牛骨文系教育信息发布平台,牛骨文仅提供信息存储空间服务。
copyright © 2008-2019 亿联网络 版权所有 备案号:粤ICP备14031511号-2
