该楼层疑似违规已被系统折叠隐藏此楼查看此楼
#!/usr/bin/python3
#code=utf-8
import urllib.request
import re
path = "C:\\Users\\Administrator\\Pictures\\python\\816\\" #图片下载的地址
#根据网址字符串,读取网页
def getHtml(url):
page = urllib.request.urlopen(url)
html = page.read()
return html
def getUrllist(html):
#reg = r'bpic="(.+?\.jpg)"'
#imgre = pile(reg)
#指定编码要求
html = html.decode('utf-8')
# imglist = re.findall(imgre,html)
reg1 = r'
urlre = pile(reg1)
urllist = re.findall(urlre,html)
print(urllist)
return urllist
def downloadImg(urllist):
x = 0
for imgurl in urllist:
#拼接完整url
imgurl = "http://" + imgurl
print(imgurl)
imgurl = getHtml(imgurl)
#图片正则表达式
reg = r'bpic="(.+?\.jpg)"'
imgre = pile(reg)
try:
html = imgurl.decode('utf-8')
except Exception as err:
print(err)
#获取网页图片数组
imglist = re.findall(imgre, html)
#下载图片
for imgurl in imglist:
urllib.request.urlretrieve(imgurl, path + 'w%s.jpg' % x)
print('w%s.jpg' % x)
x += 1
'''
x = 0
for imgurl in imglist:
urllib.request.urlretrieve(imgurl,path+ '%s.jpg' % x)
x+=1
'''
html = getHtml("/f?kw=%D0%A3%BB%A8&fr=ala0&tpl=5")
urllist = getUrllist(html)
downloadImg(urllist)
#异常日志输出
如果觉得《python抓取贴吧_python抓取百度贴吧-校花吧 网页图片》对你有帮助,请点赞、收藏,并留下你的观点哦!