requests.get(url).content得到的是b二进制的字节码,
requests.get(url,headers=header).text得到的是字符串。
import requests
from lxml import etree
import random
#1. url address
url=’https://music.163.com/playlist?id=2170930338′
#5.external link hhtjim convert by # into following,single song address on web top
#https://music.163.com/#/song?id=156915
base_url=’https://link.hhtjim.com/163/’
#2.request
header_list = [{“User-Agent”:”Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36″},
{“User-Agent”:”Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0″},
{“User-Agent”:”Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; en) Opera 9.50″},
{“User-Agent”:”Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Maxthon 2.0″},
{“User-Agent”:”Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)”},
{“User-Agent”:”Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11″}]
header = random.choice(header_list)
result=requests.get(url,headers=header).text
#3.select data
dom=etree.HTML(result)
ids=dom.xpath(‘//a[contains(@href,”song?”)]/@href’)
#4.Bian Li
for song_id in ids:
count_id=song_id.strip(‘/song?id=’)
#filter deldete $
if(‘$’ in count_id) == False:
song_url=base_url+’%s’%count_id+’.mp3′
song_name=song_url.split(‘/’)[-1]
music=requests.get(song_url).content
with open(‘./music/%s’%song_name,’wb’) as file:
file.write(music)