python 匹配excel和txt文件,将匹配的关键词放在后面新建的一列
问题描述:
有两个文件,txt和excel
其中txt存放的是关键词,excel存放的是内容
txt存放关键词的形式是
成都
北京
天津
上海
重庆
excel存放的是一行一行的内容
能否将txt和excel进行匹配,将匹配到的关键词放在excel后新的一列
答
试试这样
def get_key_list(txt_file):
try:
file_data = open(txt_file, 'r', encoding='utf8')
list1 = file_data.readlines()
key_list = [k.strip('\n') for k in list1]
return key_list
except:
pass
return []
def xls_select(xls_sr, xls_tg, key_list, key_flag=''):
from openpyxl import Workbook
from openpyxl import load_workbook
# 只处理第一个工作表 , 待匹配数据在第一列
wb = load_workbook(xls_sr)
ws = wb[wb.sheetnames[0]]
wb_tg = Workbook()
sheet_tg = wb_tg.active
sheet_tg.title = "Data Select"
save_i = 0
sr_i = 0
for row in ws.rows:
sr_i = sr_i + 1
if row[0] is None:
break
match_keys = []
for k in key_list:
if str(row[0].value).find(k) >= 0:
match_keys.append(k)
if len(match_keys)>0:
# print(match_keys)
match_keys_str = ','.join(match_keys)
y_i = 0
for y in row:
if y is None:
sheet_tg[chr(ord('A') + y_i) + '%d' % (save_i + 1)] = ""
else:
sheet_tg[chr(ord('A') + y_i) + '%d' % (save_i + 1)] = y.value
y_i = y_i + 1
sheet_tg[chr(ord('A') + y_i) + '%d' % (save_i + 1)] = key_flag
y_i = y_i + 1
sheet_tg[chr(ord('A') + y_i) + '%d' % (save_i + 1)] = match_keys_str
save_i = save_i + 1
wb_tg.save(xls_tg)
wb.close()
wb_tg.close()
def xls_select2(xls_sr, xls_tg, key_file_data):
for kf in key_file_data:
tg_file = "{}_{}.xlsx".format(xls_tg,kf[0][:-4])
print('匹配中 =>', tg_file, '\t关键字:', kf[0], ','.join(kf[1]))
xls_select(xls_sr, tg_file, kf[1], kf[0])
key_file = ['广东.txt','河南.txt','四川.txt']
key_file_data = []
try:
for kf in key_file:
key_file_data.append([kf, get_key_list(kf)])
xls_select2('文本.xlsx', '文本匹配结果', key_file_data)
except Exception as e:
print('处理出错:\n',repr(e))
答
In [1]: import pandas as pd
In [2]: df = pd.DataFrame({'A':['AAAI','ICDM','SDM','WWW','KDD'],
'B':[0.88, 0.41,0.22, 0.33, 0.35]})
In [3]: type_dict = {"AAAI":"AI","ICDM":"DM","SDM":"DM","KDD":"DM","WWW":"NEW"}
In [4]: df["C"] = df['A'].map(type_dict)
In [5]: df
Out[5]:
A B C
0 AAAI 0.88 AI
1 ICDM 0.41 DM
2 SDM 0.22 DM
3 WWW 0.33 NEW
4 KDD 0.35 DM
https://blog.csdn.net/csw19970124/article/details/90205047