@nemos
2017-05-06T01:11:53.000000Z
字数 1461
阅读 718
py
import urllib2
response = urllib.urlopen(url[
, data #访问URL时要传入的数据,要urlencode处理
, timeout])#延迟
html = response.read()
import urllib2
headers = {'User-agent' : user_agent} #修改请求信息
request = urllib2.Request(url, headers = headers)#发送请求
response = urllib2.urlopen(request) #获得回应
html = response.read()
proxy_handler = urllib2.ProxyHandler({"http" : 'http://some-proxy.com:8080'})
opener = urllib2.build_opener(proxy_handler)
None = urllib2.install_opener(opener)
try:
response = urllib2.urlopen(url)
except: urllib2.URLError, urllib2.HTTPError as e:
e.reason #错误信息
e.code #错误代码
CookieJar -> FileCookJar -> MozillaCookieJar & LWPCookieJar
import urllib2
import cookielib
cookie = cookielib.CookieJar() #声明实例保存cookie
handler = urllib2.HTTPCookieProcessor(cookie)#创建cookie处理器
opener = urllib2.build_opener(handler) #类似urlopen
response = opener.open(url)
#此时cookie保存有从url中存在的cookie
filename = 'cookie.txt'
cookie = cookielib.MozillaCookieJar(filename)
handler = urllib2.HTTPCookieProcessor(cookie)
opener = urllib2.build_opener(handler)
response = opener.open(url)
cookie.save(ignore_discard = True, #即使Cookie被丢弃也保存
ignore_expires = True) #覆盖原文件写入
import http.cookiejar as cookielib
cookie = cookielib.MozillaCookieJar()
cookie.load('cookie.txt', ignore_discard=True, ignore_expires=True)
request = urllib2.Request(url)
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie))
response = opener.open(request)
import urlparse
urlparse.urljoin(seed_url, link) #将相对路径转化为绝对路径