先上代码,后分析出现的问题:
1 #coding:utf-8
2 importre3 from bs4 importBeautifulSoup4 importgzip5 importurllib.request6 importurllib.parse7 importhttp.cookiejar8 importssl9 importtime10
11 defget_opener(heads):12 cj=http.cookiejar.CookieJar()13 pro=urllib.request.HTTPCookieProcessor(cj)14 opener=urllib.request.build_opener(pro)15 header=[]16 for key,value inheads.items():17 header.append((key,value))18 opener.addheaders=header19 returnopener20
21 defungzip(data):22 try:23 print("正在解压....")24 data=gzip.decompress(data)25 print("解压完成")26 except:27 print("无需解压")28 returndata29
30 if __name__=="__main__":31 ssl._create_default_https_context =ssl._create_unverified_context32 heads={33 "Accept":"text/html, application/xhtml+xml, */*",34 "Accept-Language":"zh-CN",35 "User-Agent":"Mozilla/5.0 (Windows NT 6.3; WOW64; rv:43.0) Gecko/0101 Firefox/43.0",36 "Accept-Encoding": "gzip, deflate",37 "Host": "",38 "DNT": "1",39 "Connection": "Keep-Alive"
40 }41 opener=get_opener(heads)42 url="/"
43 op=opener.open(url)44 data1=op.read()45 data1=ungzip(data1).decode('utf-8')46 #print(data1.decode('utf-8'))
47 #print(op.read().decode('utf-8'))
48 ## xsrf=re.findall(r'name="_xsrf" value=".*"',data1)
49 ## print(xsrf[0])
50 ## print(type(xsrf[0]))
51 ## value=xsrf[0].split(" ")
52 ## print(value)
53 ## _xsrf=re.findall(r'".*"',value[1])[0]
54 ## print(_xsrf)
55 soup=BeautifulSoup(data1,"html.parser")56 _xsrf=soup.find("input",{'type':'hidden'}).get("value")57 password="hzc19911005"
58 #captcha_type="cn"
59 phone_num="13267243809"
60 captcha_url="/captcha.gif?r=%d&type=login"% (time.time() * 1000)61 captchadata=opener.open(captcha_url).read()62 with open("1.gif",'wb') as file:63 file.write(captchadata)64 yanzhengma=input("captcha:")65 postdata={66 "_xsrf":_xsrf,67 "password":password,68 #"captcha_type":captcha_type,#不能带有这个字段
69 "phone_num":phone_num,70 "captcha":yanzhengma71 }72 postdata=urllib.parse.urlencode(postdata).encode()73 login_url="/login/phone_num"
74 op2=opener.open(login_url,postdata)75 login_data=op2.read()76 data=ungzip(login_data).decode("utf-8")77 print(data)78 result=dict(eval(data))79 if result["r"]==0:80 print("登录成功")81
1、出现“SSLError: [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed (_ssl.c:581)”:
Python 2.7.9 之后版本引入了一个新特性
当你urllib.urlopen一个 https 的时候会验证一次 SSL 证书
当目标使用的是自签名的证书时就会爆出一个
urllib.URLError: 的错误消息,
处理方法:
importssl
ssl._create_default_https_context= ssl._create_unverified_context
2、出现验证码错误,返回: 验证码过期:{ "r": 1, "errcode": 1991829, "data": {"captcha":"验证码回话无效 :(","name":"ERR_VERIFY_CAPTCHA_SESSION_INVALID"}, "msg": "验证码回话无效 :(" }:
发给服务器的post数据没有带验证码:"captcha",解决办法:postdata={
"_xsrf":_xsrf,
"password":password,
#"captcha_type":captcha_type,#不能带有这个字段
"phone_num":phone_num,
"captcha":yanzhengma
}
验证码过期,解决办法:先从url="/captcha.gif?r=%d&type=login"% (time.time() * 1000)下载图片保存在本地,然后人工识别,手动输入验证码
1 captcha_url="/captcha.gif?r=%d&type=login"% (time.time() * 1000)2 captchadata=opener.open(captcha_url).read()3 with open("1.gif",'wb') as file:4 file.write(captchadata)5 yanzhengma=input("captcha:")
如果觉得《python爬带用户名密码的网页_python爬虫:使用账号 密码和验证码登录知乎网页...》对你有帮助,请点赞、收藏,并留下你的观点哦!