1、抓取本地网页解析其中的图片、标题、价格、星级和浏览量
经过查看和分析,每一项都是由一个div包裹
抓取数据的Python代码#
from bs4 import BeautifulSoup
path = r’G:/1_2_homework_required/index.html’
with open(path,’r’) as wb_data:
soup = BeautifulSoup(wb_data,’lxml’)
imgs = soup.select(‘div.col-sm-4 > div.thumbnail > img’)
titles = soup.select(‘div.col-sm-4 > div.thumbnail > div.caption > h4:nth-of-type(2) > a’)
prices = soup.select(‘div.col-sm-4 > div.thumbnail > div.caption > h4:nth-of-type(1)’)
stars = soup.select(‘div.col-sm-4 > div.thumbnail > div.ratings > p:nth-of-type(2)’)
views = soup.select(‘div.col-sm-4 > div.thumbnail > div.ratings > p.pull-right’)
for img,title,price,star,view in zip(imgs,titles,prices,stars,views):
data = {
‘title