1 import requests
2 import re
3 import pymysql
4 #10页 仔细观察路由
5 db = pymysql.connect("localhost","root","root","testdb" )
6 cursor = db.cursor()
7
8 for i in range(1,10):
9 url = 'http://*******8****'
10 url=url+'index_'+str(i)+'.html'
11 response = requests.get(url)
12 HTML = response.text
13 pattern= re.compile(r'(((2(5[0-5]|[0-4]d))|[0-1]?d{1,2})(.((2(5[0-5]|[0-4]d))|[0-1]?d{1,2})){3})')
14 compile_1 = re.compile(r's+(d{3,6})s')
15 compile_2 = re.compile(r's+([u4e00-u9fa5]{1,9})s?(?:省|新疆|内蒙古|市|县|区])')
16 # compile_4 = re.compile(r's+(d{4}/d{2}/d{2}s+d{2}:d{2}:d{2})s')
17 res2 = compile_2.findall(HTML)
18 res1 = compile_1.findall(HTML)
19 result = pattern.findall(HTML)
20 # res3 = compile_4.findall(HTML)
21 # print(res3,len(result))
22 for ip_ in result:
23 print(ip_[0])
24 sql = "INSERT INTO test (ip,port,place) VALUES ('%s','%s','%s')" %(ip_[0],res1[0],res2[0])
25 print(sql)
26 cursor.execute(sql)
27 db.close()
28
29 # 使用cursor()方法获取操作游标