@nemos
2017-05-06T01:11:53.000000Z
字数 1461
阅读 757
py
import urllib2response = urllib.urlopen(url[, data #访问URL时要传入的数据,要urlencode处理, timeout])#延迟html = response.read()
import urllib2headers = {'User-agent' : user_agent} #修改请求信息request = urllib2.Request(url, headers = headers)#发送请求response = urllib2.urlopen(request) #获得回应html = response.read()
proxy_handler = urllib2.ProxyHandler({"http" : 'http://some-proxy.com:8080'})opener = urllib2.build_opener(proxy_handler)None = urllib2.install_opener(opener)
try:response = urllib2.urlopen(url)except: urllib2.URLError, urllib2.HTTPError as e:e.reason #错误信息e.code #错误代码
CookieJar -> FileCookJar -> MozillaCookieJar & LWPCookieJar
import urllib2import cookielibcookie = cookielib.CookieJar() #声明实例保存cookiehandler = urllib2.HTTPCookieProcessor(cookie)#创建cookie处理器opener = urllib2.build_opener(handler) #类似urlopenresponse = opener.open(url)#此时cookie保存有从url中存在的cookie
filename = 'cookie.txt'cookie = cookielib.MozillaCookieJar(filename)handler = urllib2.HTTPCookieProcessor(cookie)opener = urllib2.build_opener(handler)response = opener.open(url)cookie.save(ignore_discard = True, #即使Cookie被丢弃也保存ignore_expires = True) #覆盖原文件写入
import http.cookiejar as cookielibcookie = cookielib.MozillaCookieJar()cookie.load('cookie.txt', ignore_discard=True, ignore_expires=True)request = urllib2.Request(url)opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie))response = opener.open(request)
import urlparseurlparse.urljoin(seed_url, link) #将相对路径转化为绝对路径