1 import re
2 import urllib.request
3
4 # ------ 获取网页源代码的方法 ---
5 def getHtml(url):
6 page = urllib.request.urlopen(url)
7 html = page.read()
8 return html
9
10 # ------ getHtml()内输入任意静态URL ------
11 html = getHtml("http://www.meizitu.com/a/5485.html")
12 # ------ 修改html对象内的字符编码为gbk或者UTF-8 ------
13 html = html.decode('gbk')
14
15 # ------ 获取所有图片地址的方法 ------
16
17 # ------ 利用正则表达式匹配网页内容找到图片地址 ------
18 reg = r'(http://[^s]*/[0-9][0-9].jpg)'
19 mmurl = re.findall(reg,html)
20 #reg = r'src="([.*S]*.jpg)" pic_ext="jpeg"'
21 #reg = r'src="(..jpg)" pic_ext="jpeg"'
22 # imgre = re.compile(reg)
23 # imglist = re.findall(reg, html)
24 # return imglist
25 #print(re.findall(reg, html))
26
27 x = 0
28 for imgurl in mmurl:
29 opener = urllib.request.build_opener()
30 opener.addheaders = [('User-Agent','Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1941.0 Safari/537.36')]
31 urllib.request.install_opener(opener)
32 urllib.request.urlretrieve(imgurl, 'd://1/5485/%s.jpg' % x )
33 x += 1
34
35 print("All Done!")