自己写的代码:
import requestsimport reimport timefrom bs4 import BeautifulSoupurl = ["/subject/26630480/comments/hot?p=" + str(i) for i in range(1,5)]count1, count2 = 1,1s = 0for i in range(1,5):requset = requests.get(url[i-1])soup = BeautifulSoup(requset.text, "lxml")comments = soup.find_all('span', 'short')for comment in comments:if count1 < 50:with open("26630480comments.txt", "a", encoding="utf-8")as f:f.write(comment.string + "\n")count1 += 1pattern = pile('<span class="user-stars allstar(.*?) rating"')p = re.findall(pattern, requset.text)for star in p:s += int(star)count2 += 1if count2 == 50:print(s/count2)time.sleep(5)
别人写的代码:
import requests, re, timefrom bs4 import BeautifulSoupcount = 0i = 0s, count_s = 0, 0while count < 50:try:r = requests.get('/subject/26630480/comments/hot?p=' + str(i+1))except Exception as err:print(err)breaksoup = BeautifulSoup(r.text, 'lxml')comments = soup.find_all('span', 'short')for item in comments:count = count + 1print(count, item.string)if count == 50:breakpattern = pile('<span class="user-stars allstar(.*?) rating"')p = re.findall(pattern, r.text)for star in p:count_s = count_s + 1s += int(star)time.sleep(5) # delay request from douban's robots.txti += 1if count == 50:print('\n平均分是:%d' %(s // count_s))
如果觉得《爬取豆瓣《毒木圣经》短书评前50条及其评分》对你有帮助,请点赞、收藏,并留下你的观点哦!