该楼层疑似违规已被系统折叠隐藏此楼查看此楼
报错信息:
UnboundLocalError: local variable 'text_list' referenced before assignment
代码如下:
import requests
from bs4 import BeautifulSoup
import datetime
import time
for i in range(1,101):
link = '/bxj-' + str(i)
post_link = get_page(link)
data_list = get_data(post_link)
for each in data_list:
print(each)
time.sleep(5)
print('第',i,'页获取完成', '休息3秒')
def get_page(link):
headers = {'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36'}
r = requests.get(link, headers=headers)
html= r.content # 要搞清楚什么时候用text,什么时候用content
html = html.decode('UTF-8') # 要搞清楚什么时候用text,什么时候用content
soup = BeautifulSoup(html, 'lxml')
text_list1 = soup.find_all('ul',class_='for-list')
for post in text_list1:
text_list = post.find_all('li')
return text_list
def get_data(post_list):
data_list = []
for post in post_list:
title_td = post.find('a',class_='truetit').text.strip()
title_url = '' + post.find('a',class_='truetit')['href']
title_user = post.find('a',class_='aulink').text.strip()
title_user_url = post.find('a',class_='aulink')['href']
data_list.append([title_td, title_url, title_user, title_user_url])
return data_list
如果觉得《python爬取论坛帖子_python爬虫爬取虎扑论坛的帖子名称和链接 为什么只能爬10页就报...》对你有帮助,请点赞、收藏,并留下你的观点哦!