python分析log

python分析log

最近做的一个项目,系统log下会生成如下的log(部分):

[2015-05-19 11:16:06] Processing File transfer configured from ship to shore....
[2015-05-19 11:16:06] File Limit is = 2048
[2015-05-19 11:16:06] Done processing File transfer configured from ship to shore....
[2015-05-19 11:16:06] Connection starts now
[2015-05-19 11:16:06] Connected Via FB
[2015-05-19 11:16:06] Before Setting Server URL
[2015-05-19 11:16:06] Setting Server URL is done.
[2015-05-19 11:16:06] SHOREMAIL: Start Preparing Shore Files
[2015-05-19 11:16:07] prepareShoreMail: The outboxDir at shore is empty.
[2015-05-19 11:16:07] SHOREMAIL: End Preparing Shore Files
[2015-05-19 11:16:07] Exchange File: toUpload= true
[2015-05-19 11:16:07] Exchange File: toDownload= false
[2015-05-19 11:16:07] Exchange File: toDelete= true
[2015-05-19 11:16:07] Exchange File: DownStatus= Done
[2015-05-19 11:16:07] Exchange File: emailStatus= 0
[2015-05-19 11:16:07] LogCon: Updating status: Partial
[2015-05-19 11:16:07] Before total_zip_size_sent= 0
....

然后需要去算它的响应时间,说白了就是给你一堆类似的log,然后你去算每个log运行的时间。有很多语言可以用,我用的python,很久不写,回忆一下
直接上代码:

import os,sys,csv,time,re,datetime;

class cur_env:
    path = sys.path[0]
    #print(path)
    os.chdir(path)
    result = path + '\results'

class csv_writer:
    
    # get the log file list in the folder
    def get_files(self):
        file_list = list()
        files = os.listdir(cur_env.path)
        for file in files:
            if file.endswith('.log'):
                file_list.append(file)
        return file_list

    # generate result file name depend on run time
    def gen_filename(self):
        file_name = 'res_' + time.strftime('%y%m%d_%H%M%S',
                         time.localtime(time.time()))+'.csv'
        return file_name

    # create result folder if not exist
    def create_resFolder(self):
        if not os.path.isdir(cur_env.result):
            os.makedirs(cur_env.result)
        
    # write csv response time log    
    def write_csv(self, file_name, content):
        csvfile = open(cur_env.result + '\' + file_name,'w+', newline = '')
        headers = ['FILE NAME','TIMING(S)','LINE COUNT','DESC','WARNING']
        try:
            writer = csv.DictWriter(csvfile,headers)
            writer.writeheader()
            for con in content:
                con = dict(zip(headers,con))
                writer.writerow(con)
            #writer.writerow(headers)
            #writer.writerows(content)
        finally:
            del writer
            csvfile.close
    
    # generate contents from log files
    def gen_contents(self, file_list):
        content = list()
        for file in file_list:
            log = logger(file)
            log.generate_content()
            logcnt = log.content
            logcnt.insert(0,file)
            content.append(logcnt)
        return content

class logger:
    '''
    "generate 'TIMING','LINE COUNT','DESC','WARNING' from log files"
    '''
    def __init__(self, file):
        self.logfile = open(cur_env.path + '\' +file,'r+')
        self.content = dict()

    def generate_content(self):
        text = self.logfile.read()
        # get total lines of the log file
        lines = text.count('
')
        pattern = re.compile(r'[.*]')
        lsttime = re.findall(pattern,text)
        if (lines <=0 or len(lsttime) <=0):
            warning = 'Unknown issue. Please verify the format of your log file.'
            self.content = ['','','',warning]
            return
        start_time = str(lsttime[0])[1:-1]
        end_time = str(lsttime[-1])[1:-1]

        # get desc. log start time & end time
        desc = 'start time is: ' + start_time + '. end time is: ' + end_time

        # get the response time
        timing = time.mktime(time.strptime(end_time,'%Y-%m-%d %H:%M:%S'))
                 - time.mktime(time.strptime(start_time,'%Y-%m-%d %H:%M:%S'))

        # verify if there is any error in the log
        pattern = re.compile(r'[Ee][Rr][Rr][Oo][Rr]')
        errors = len(re.findall(pattern,text))
        if (errors > 0):
            warning = 'Totally ' + str(errors) + ' error(s) detected in the log.'
        else:
            warning = 'NA'

        # generate result content
        self.content = [str(timing),str(lines),desc,warning]
        self.logfile.close()

# Testing code
writer = csv_writer()
writer.create_resFolder()
file_name = writer.gen_filename()
file_list = writer.get_files()
content = writer.gen_contents(file_list)
writer.write_csv(file_name, content)

三个类,cur_env主要放的系统当前路径之类的,logger类根据每个log文件,生成相关的csv文件的行,关键信息包括 'TIMING','LINE COUNT','DESC','WARNING';最后一个类是写csv文件的csv_writer,测试结果如下:

python分析log

 

总结: python很久不用,结果很多地方我都要重新去百度或者查帮助,只有自己动手去写了才能记的更清楚。