初学python,自己实现的一个抓取网站图片的小爬虫。
import reimport urllib.requestdef getResouce(url):source = urllib.request.urlopen(url).read()return source#从源代码中匹配正则表达式,提取urldef getimg(source):#正则表达式reg = r'src="(.*?\.jpg)" width'#生成正则表达式对象regcompile = pile(reg)#匹配正则表达式,获取url 主要网页的编码方式 utf-8 、gb2312等#urls = regcompile.findall(source.decode('gb2312'))urls = regcompile.findall(source.decode('utf-8'))return urlsdef download(urls):num = input("picture编号:(a-)")number =1for url in urls[1:]:#下载数据,并写入文件,利用urlretrieveurllib.request.urlretrieve(url,'E:/picture/litter_picture/%s%s.jpg'% (num,number))number +=1def downloadbyurllib(url):#获取源代码source = getResouce(url)#获取urlsurls = getimg(source)#下载数据download(urls)print("finish the download!")if __name__ == '__main__':downloadbyurllib("/pc/8.html")
如果觉得《python 实现网站图片抓取小爬虫》对你有帮助,请点赞、收藏,并留下你的观点哦!