Crawler:基于urllib库获取cn-proxy代理的IP地址
目录
输出结果
实现代码
输出结果
后期更新……
实现代码
import urllib.request
import re
import os
print(os.getcwd())
def open_url(url):
req=urllib.request.Request(url)
req.add_header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.90 Safari/537.36 2345Explorer/9.2.1.17116")
page=urllib.request.urlopen(req)
html=page.read().decode("utf-8")
return html
def get_img(html):
p=r'(?:(?:[0,1])?\d?\d|2[0-4]\d|25[0-5]\.){3}(?:[0,1]?\d?\d|2[0-4]\d|25[0-5]))'
iplist=re.findall(p,html)
for each in iplist:
print(each)
if __name__=="__mian__":
url="http://cn-proxy.com"
get_img(open_url(url))
url="http://cn-proxy.com"
get_img(open_url(url))