1 lines (8 sloc) 333 Bytes
2 from urllib.request import urlopen
3 from bs4 import BeautifulSoup
4
5 html = urlopen("http://en.wikipedia.org/wiki/Python_(programming_language)")
6 bsObj = BeautifulSoup(html, "html.parser")
7 content = bsObj.find("div", {"id":"mw-content-text"}).get_text()
8 content = bytes(content, "UTF-8")
9 content = content.decode("UTF-8")
10 print(content)
1 from urllib.request import urlopen
2
3 textPage = urlopen("http://www.pythonscraping.com/pages/warandpeace/chapter1.txt")
4 print(str(textPage.read(),'utf-8'))用字符串转换编码