#!/usr/local/bin/python3.7
import re
import urllib.request
import urllib.parse
import os
import time
"""
@File : qiushibaike.py
@Time : 2020/04/06
@Author : Mozili
"""
"""
爬取糗事百科中指定页码的图片
"""
def handler_request(url):
headers = {
'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1 Safari/605.1.15'
}
# 创建请求
req = urllib.request.Request(url=url, headers=headers)
# 发送请求
rep = urllib.request.urlopen(req)
# 获取返回内容
cont = rep.read().decode()
return cont
def download_image(content):
pattern = re.compile(r'<div class="thumb">.*?<img src="(.*?)" .*?">.*?</div>', re.S)
ret = pattern.findall(content)
# print(ret)
# 生成图片url,下载图片到本地
for image_url in ret:
image_url = 'https:' + image_url
# 创建文件夹保存下载下来的文件
driname = 'Reptile/images'
if not os.path.exists(driname):
os.mkdir(driname)
# 创建保存路径
image_name = image_url.split('/')[-1]
image_path = driname + '/' + image_name
# 直接将返回的内容保存
print('图片{}开始下载....'.format(image_name))
time.sleep(1)
resp = urllib.request.urlretrieve(image_url, image_path)
def main():
# 提示输入爬取第几页到第几页的图片
start_page = int(input('请输入起始页码:'))
end_page = int(input('请输入结束页码:'))
for i in range(start_page, end_page + 1):
url = 'https://www.qiushibaike.com/imgrank/page/' + str(i) + '/'
# 生成一个请求
content = handler_request(url)
# 从返回内容中获取图片链接,下载图片
download_image(content)
print('第%s页下载结束...'%i)
time.sleep(1)
print()
print()
if __name__ == "__main__":
main()