将list合并成dict,采集网站的代理IP格式如下:
ip_nodes=['177.220.136.22','195.178.157.216' ,'179.85.149.192']
port_nodes=['80','8080','3128']
方法一:
tmp = dict(zip(ip_nodes, port_nodes))
方法二(使用list表达式):
tmp={ ip_nodes[i]:port_nodes[i] for i in range(len(port_nodes))}
方法三:
combined = {}
for i in range(len(ip_nodes)) :
combined[ip_nodes[i]] = port_nodes[i]
方法四:lambda:
dict(map(lambda x,y:[x,y],ip_nodes,port_nodes))
#!/usr/bin/env python
# coding: utf-8
import requests, urllib2, time
from lxml import etree
url = "/wt/"
headers = {'content-type': 'text/html',
'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:22.0) Gecko/0101 Firefox/22.0'}
ip_check_url = '/a/1027/149787.html'
user_agent = 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:12.0) Gecko/0101 Firefox/12.0'
socket_timeout = 10
# Check proxy
def check_proxy(ip, port):
try:
ip = "%s:%s" % (ip, port)
proxy_ip = {"http": ip}
proxy_handler = urllib2.ProxyHandler(proxy_ip)
# proxy = urllib2.ProxyHandler(proxy_ip)
opener = urllib2.build_opener(proxy_handler)
# opener.addheaders = [('User-agent', user_agent)] #这句加上以后无法正常检测,不知道是什么原因。
urllib2.install_opener(opener)
req = urllib2.Request(ip_check_url)
time_start = time.time()
conn = urllib2.urlopen(req)
# conn = urllib2.urlopen(ip_check_url)
time_end = time.time()
detected_pip = conn.read()
proxy_detected = True
except urllib2.HTTPError, e:
print "ERROR: Code ", e.code
return False
except Exception, detail:
print "ERROR: ", detail
return False
return proxy_detected
try:
r = requests.get(url, headers=headers)
r.raise_for_status() # 如果响应状态码不是 200,就主动抛出异常
tree = etree.HTML(r.content)
ip_nodes = tree.xpath("//tr//td[2]/text()")
port_nodes = tree.xpath("//tr//td[3]/text()")
# print port_nodes
"""
# blablabla...
combined = {}
for i in range(len(students)) :
combined[students[i]] = courses[i]
"""
# tmp = zip(ip_nodes, port_nodes)
# dict((y, x) for x, y in tmp)
tmp = dict(zip(ip_nodes, port_nodes))
print tmp
for (ip, port) in tmp.items():
proxy_detected = check_proxy(ip, port)
if proxy_detected:
print (" WORKING: " + ip+":"+port)
else:
print " FAILED: %s " % (ip,)
# tmp1={ ip_nodes[i]:port_nodes[i] for i in range(len(port_nodes))}
except requests.RequestException as e:
print(e)
如果觉得《python两个list合并成字典_Python将两个list合并为一个字典》对你有帮助,请点赞、收藏,并留下你的观点哦!