Python爬虫分析微博热搜关键词的实现代码
1,使用到的第三方库
requests
BeautifulSoup 美味汤
worldcloud 词云
jieba 中文分词
matplotlib 绘图
2,代码实现部分
import requests import wordcloud import jieba from bs4 import BeautifulSoup from matplotlib import pyplot as plt from pylab import mpl #设置字体 mpl.rcParams['font.sans-serif'] = ['SimHei'] mpl.rcParams['axes.unicode_minus'] = False url = 'https://s.weibo.com/top/summary?Refer=top_hot&topnav=1&wvr=6' try: #获取数据 r = requests.get(url) r.raise_for_status() r.encoding = r.apparent_encoding soup = BeautifulSoup(r.text,'html.parser') data = soup.find_all('a') d_list = [] for item in data: d_list.append(item.text) words = d_list[4:-11:] #中文分词 result = list(jieba.cut(words[0])) for word in words[1::]: result.extend(jieba.cut(word)) redata = [] for it in result: if len(it) <= 1: continue else: redata.append(it) result_str = ' '.join(redata) #输出词云图 font = r'C:\Windows\Fonts\simhei.ttf' w = wordcloud.WordCloud(font_path=font,width=600,height=400) w.generate(result_str) w.to_file('微博热搜关键词词云.png') key = list(set(redata)) x,y = [],[] #筛选数据 for st in key: count = redata.count(st) if count <= 1: continue else: x.append(st) y.append(count) x.sort() y.sort() #绘制结果图 plt.plot(x,y) plt.show() except Exception as e: print(e)
3,运行结果