怎么把爬取的数据保存到MongoDB中

怎么把爬取的数据保存到MongoDB中

问题描述:

img

img

img

tree =etree.HTML(data)这代码错了,你data=response.json()返回的是json数据,,,如果你确定网页是html代码,应该是data=response.text

这是我写的原码

import requests
import json
import pymongo
from lxml import etree

建立连接

client = pymongo.MongoClient(host='localhost',port=27017)
db = client.shuju
a = db.imgs
def sver(dic):
a.insert_one(dic)
def hzp_a(url):
headers={
'User - Agent': 'Mozilla / 5.0(Windows NT 10.0;Win64;x64) AppleWebKit / 537.36(KHTML, likeGecko) Chrome / 94.0.4606.81Safari / 537.36'

}
param={
'on': 'true',
'page': 1,
'pageSize': 15,
'productName':'',
'conditionType': 1,
'applyname':'',
'applysn':'',
}
response=requests.post(url=url,params=param,headers=headers)
data=response.json()
tree =etree.HTML(data)
li_list=tree.xpath('//*[@id="gzlist"]')
for li in li_list:
cd=li.xpath('./li[1]/dl/a/text()')
ab=li.xpath('./li[1]/ol/a/text()')
ac=li.xpath('./li[1]/p/text()')
ad=li.xpath('./li[1]/em/text()')
ae=li.xpath('./li[1]/i/text()')
dic = {'企业名称':cd,'许可证编号':ab,'发证机关':ac,'有效期至':ad,'发证日期':ae}
sver(dic)

url='http://scxk.nmpa.gov.cn:81/xk/itownet/portalAction.do?method=getXkzsList%27
hzp_a(url)