python采摘页面报错

python采集页面报错

本帖最后由 ecshop528 于 2014-06-05 00:23:43 编辑

import sys

import urllib

import re

from urllib.request import Request, urlopen

from urllib.error import URLError, HTTPError

class collect():


	def get_html(self,href):

		req = urllib.request.Request(href)

		try:

			response = urllib.request.urlopen(req)

		except HTTPError as e:

			print('The server couldn\'t fulfill the request.')

			print('Error code: ', e.code)

		except URLError as e:

			print('We failed to reach a server.')

			print('Reason: ', e.reason)

		else:

			print("good!")

			the_page = response.read()

			return the_page.decode("utf-8")


		return ''


collect_model = collect()

print(collect_model.get_html('http://www.68ecshop.com/article_cat-30-2.html'))

提示以下错误

------解决方案--------------------
抓包看了一下，返回的头三个字节是1F 8B 08，是gzip压缩格式

python采摘页面报错

相关推荐