import requests
from bs4 import BeautifulSoup
import xlwt #写入Excel的库
def excel_write(MV_list):
newtable = 'MV.xls' #创建Excel文件的名称
wb = xlwt.Workbook(encoding = 'utf-8') #创建Excel文件
ws = wb.add_sheet('MV_name') #创建Excel表格的名称
headData = ['名称','链接','歌手名称','播放次数','简介'] #创建表格的标题信息
for colnum in range(0,5):
#0表示第1行,colnum表示第几列,headData[colnum]表示获取headData里面的信息,xlwt.easyxf加粗
ws.write(0,colnum,headData[colnum],xlwt.easyxf('font:bold on'))
index = 1
for MV_list in MV_lists:
for i in range(0,5):
print(MV_list[i])
ws.write(index,i,MV_list[i])
index += 1
wb.save(newtable)
page = range(1,3)
MV_lists = []
for S in page: #循环num,并把树枝赋值给S,然后构建爬去的URL
url = 'http://mv.yinyuetai.com/all?pageType=page&sort=weekViews&page=%d&tab=allmv&parenttab=mv'%S
print(url)
html = requests.get(url)
soup = BeautifulSoup(html.text,'html.parser')
MV_s_html = soup.find('ul',{'class':'clearfix'}).find_all('div',{'class':'info'}) #获取MV信息HTML,类型为列表
for MV in MV_s_html: #遍历MV的HTML,并赋值给变量MV
MV_title = MV.a.string #获取MV名称
MV_href = MV.a['href'] #获取MV链接
MV_name = MV.find('p').a.string #获取MV歌手名称
MV_count = MV.find('span',{'class':'c6'}).get_text() #获取MV播放次数
MV_description = MV.find('p',{'class':'description hid J_id'}).get_text() #获取MV的简介
MV_list_tuple = (MV_title,MV_href,MV_name,MV_count.replace('
','').strip(' '),MV_description.replace('
','').strip(' '))
MV_lists.append(MV_list_tuple)
excel_write(MV_lists) #调用写入Excel的函数,并把MV列表信息掺传入到函数里面