python网页信息_python获取网页信息的三种方法

import urllib.request

import http.cookiejar

url = ‘http://www.baidu.com/’

# 方法一

print(‘方法一’)

req_one = urllib.request.Request(url)

req_one.add_header(‘User-Agent’, ‘Mozilla/6.0’)

res_one = urllib.request.urlopen(req_one)

code_one = res_one.getcode()

html_one = res_one.read().decode(‘utf-8’)

res_one.close()

print(‘方法一网页状态码：%s’ % (code_one))

print(‘方法一网页内容：’+html_one)

# 方法二

print(‘方法二’)

res_two = urllib.request.urlopen(url)

code_two = res_two.getcode()

html_two = res_two.read().decode(‘utf-8’)

print(‘方法二网页状态码：%s’ % (code_two))

print(‘方法二网页内容：’+html_two)

#方法三

print(‘方法三’)

cj = http.cookiejar.LWPCookieJar()

opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj))

urllib.request.install_opener(opener)

res_three = urllib.request.urlopen(url)

print(cj)

code_three = res_three.getcode()

html_three = res_three.read().decode(‘utf-8’)

res_three.close()

print(‘方法三网页状态码：%s’ % (code_three))

print(‘方法三的网页内容：’+html_three)