爬取三寸人间
#coding=gbk import requests from fake_useragent import UserAgent from lxml import etree url = 'https://www.81zw.com/book/32934/' headers = { 'User-Agent':UserAgent().random } response = requests.get(url,headers = headers) e = etree.HTML(response.text) txt_urls = e.xpath('//div[@>) txt_urls = ['https://www.81zw.com/' + txt_url[1:] for txt_url in txt_urls] for num in range(len(txt_urls)): file = open('三寸人间.txt', 'a', encoding='utf-8-sig') response = requests.get(txt_urls[num], headers=headers) e = etree.HTML(response.content.decode('utf-8')) txt_title = e.xpath('//h1/text()')[0] txt_content = e.xpath('//div[@>) file.write(str(txt_title) + ' ') for line in txt_content: file.write(line + ' ') # time.sleep(random.randint(1,3)) print("第 {} 章下载完毕".format(num+1)) file.close()
2020-07-15