python 匹配excel和txt文件,将匹配的关键词放在后面新建的一列

python 匹配excel和txt文件,将匹配的关键词放在后面新建的一列

问题描述:

有两个文件,txt和excel
其中txt存放的是关键词,excel存放的是内容

txt存放关键词的形式是

成都
北京
天津
上海
重庆

excel存放的是一行一行的内容
能否将txt和excel进行匹配,将匹配到的关键词放在excel后新的一列

试试这样


def get_key_list(txt_file):
    try:
        file_data = open(txt_file, 'r', encoding='utf8')
        list1 = file_data.readlines()
        key_list = [k.strip('\n') for k in list1]
        return key_list
    except:
        pass
    return []


def xls_select(xls_sr, xls_tg, key_list, key_flag=''):
    from openpyxl import Workbook
    from openpyxl import load_workbook
    # 只处理第一个工作表 , 待匹配数据在第一列
    wb = load_workbook(xls_sr)
    ws = wb[wb.sheetnames[0]]

    wb_tg = Workbook()
    sheet_tg = wb_tg.active
    sheet_tg.title = "Data Select"
    save_i = 0
    sr_i = 0

    for row in ws.rows:
        sr_i = sr_i + 1
        if row[0] is None:
            break
        match_keys = []
        for k in key_list:
            if str(row[0].value).find(k) >= 0:
                match_keys.append(k)
        if len(match_keys)>0:
            # print(match_keys)
            match_keys_str = ','.join(match_keys)
            y_i = 0
            for y in row:
                if y is None:
                    sheet_tg[chr(ord('A') + y_i) + '%d' % (save_i + 1)] = ""
                else:
                    sheet_tg[chr(ord('A') + y_i) + '%d' % (save_i + 1)] = y.value
                y_i = y_i + 1
            sheet_tg[chr(ord('A') + y_i) + '%d' % (save_i + 1)] = key_flag
            y_i = y_i + 1
            sheet_tg[chr(ord('A') + y_i) + '%d' % (save_i + 1)] = match_keys_str
            save_i = save_i + 1

    wb_tg.save(xls_tg)
    wb.close()
    wb_tg.close()


def xls_select2(xls_sr, xls_tg, key_file_data):
    for kf in key_file_data:
        tg_file = "{}_{}.xlsx".format(xls_tg,kf[0][:-4])
        print('匹配中 =>', tg_file, '\t关键字:', kf[0], ','.join(kf[1]))
        xls_select(xls_sr, tg_file, kf[1], kf[0])

key_file = ['广东.txt','河南.txt','四川.txt']
key_file_data = []
try:
    for kf in key_file:
        key_file_data.append([kf, get_key_list(kf)])
    xls_select2('文本.xlsx', '文本匹配结果', key_file_data)
except Exception as e:
    print('处理出错:\n',repr(e))


In [1]: import pandas as pd

In [2]: df = pd.DataFrame({'A':['AAAI','ICDM','SDM','WWW','KDD'],
'B':[0.88, 0.41,0.22, 0.33, 0.35]})

In [3]: type_dict = {"AAAI":"AI","ICDM":"DM","SDM":"DM","KDD":"DM","WWW":"NEW"}

In [4]: df["C"] = df['A'].map(type_dict)

In [5]: df
Out[5]:
A B C
0 AAAI 0.88 AI
1 ICDM 0.41 DM
2 SDM 0.22 DM
3 WWW 0.33 NEW
4 KDD 0.35 DM
https://blog.csdn.net/csw19970124/article/details/90205047