.doc 2 .docx可用代码

  1 # # -*- coding:utf-8 -*-
  2 #读取docx中的文本代码示例
  3 import docx
  4 from win32com import client as wc
  5 from pyhanlp import *
  6 import time
  7 import eventlet#导入eventlet这个模块
  8 import shutil
  9 word = wc.Dispatch('Word.Application')
 10 '''重启计时器'''
 11 def restart(time_start,time_end,sub_deadline):
 12     #E:pycharmWorkPlaceGraduation_projectUtilfile_process.py
 13     time_sub = time_end - time_start
 14 
 15     i = 0
 16     print("NJNNNNNNNNN",time_sub,"NNNNNNNNNNNNNNN")
 17     if (time_sub > sub_deadline):
 18         str = "CHCP 65001" + "&&"
 19         str += "E:" + "&&"
 20         str += r"cd E:pycharmWorkPlaceGraduation_projectUtil" + "&&"
 21         str += "python file_process.py "
 22         # print(str)
 23         print("TIME______", i, "________", time_sub)
 24         i = i + 1
 25         cmd = os.system(str)
 26         # print(cmd)
 27 '''文件操作'''
 28 #将doc转成docx
 29 def doSaveAas(doc_path,docx_path):
 30     # time_start = time.time()
 31     print("MMMMMMMMMMMMMMMMMMMMMMMMMMMMM")
 32     eventlet.monkey_patch()#必须加这条代码
 33     with eventlet.Timeout(10, False):  # 设置超时时间为2秒
 34         # print("*$$$$$$$$$$$$$$$$")
 35         # time.sleep(2)
 36         # print("^^^^^^^^^^^^")
 37         doc = word.Documents.Open(doc_path)  # 目标路径下的文件
 38         # print("@@@@@@@@@@@@")
 39         doc.SaveAs(docx_path, 12, False, "", True, "", False, False, False, False)  # 转化后路径下的文件
 40         doc.Close()
 41         print("《《《《《《《《《《《《《《《《《《《《《《《《《《《《《")
 42         # time_end = time.time()
 43         # restart(time_start,time_end,20)
 44 
 45 #将相对路径转换乘绝对路径,同时调用转换文件进行转换,同时再顺便删除之前的文件
 46 def Dir_doc2docx(Dir_path):
 47     i=0
 48     for file_name in os.listdir(Dir_path):
 49 
 50         print("********************************************************************************************")
 51         try:
 52             print("文件名:"+file_name)
 53             file_path = os.path.join(Dir_path, file_name)
 54             print("文件后缀:"+os.path.splitext(file_name)[1] )
 55             if os.path.splitext(file_name)[1] == '.doc':
 56                 i = i + 1
 57                 abs_file_path=os.path.abspath(file_path)
 58                 print(i," 绝对路径:"+abs_file_path)
 59                 doSaveAas(abs_file_path,abs_file_path+'x')
 60                 os.remove(file_path)
 61 
 62         except:
 63             continue
 64 
 65 def Get_num_file_end(Dir_path,end):
 66     i=0
 67     for file_name in os.listdir(Dir_path):
 68         print("********************************************************************************************")
 69         try:
 70             if os.path.splitext(file_name)[1] == end:
 71                 i=i+1
 72         except:
 73             continue
 74     return i
 75 #获取文件值
 76 def Get_file_value(Dir_path,file_name):
 77     paragraph_id=[]
 78     paragraph_value=[]
 79     file_path = os.path.join(Dir_path, file_name)
 80     file = docx.Document(file_path)
 81     # 输出段落编号及段落内容
 82     for i in range(len(file.paragraphs)):
 83         paragraph_id.append(i)
 84         paragraph_value.append(file.paragraphs[i].text.strip().replace(u'u3000', u'').replace(u'xa0', u'').replace(' ', ''))
 85     return paragraph_id,paragraph_value
 86 #移动文件
 87 def remove_file(Dir_path,To_dirpath):
 88     i = 0
 89     for file_name in os.listdir(Dir_path):
 90         print("********************************************************************************************")
 91         try:
 92             print("文件名:" + file_name)
 93             file_path = os.path.join(Dir_path, file_name)
 94             print("文件后缀:" + os.path.splitext(file_name)[1])
 95             if os.path.splitext(file_name)[1] == '.docx':
 96                 i = i + 1
 97                 abs_file_path = os.path.abspath(file_path)
 98                 abs_to_file_path=os.path.abspath(os.path.join(To_dirpath, file_name))
 99                 shutil.move(abs_file_path,abs_to_file_path)
100                 print(i, " 绝对路径:" + abs_file_path)
101                 print(i, " 目标绝对路径:" + abs_to_file_path)
102 
103         except:
104             continue
105 if __name__ =="__main__":
106     print("AAAAAAAAA")
107     # a=Get_num_doc("D:ATESTjie")
108     Dir_doc2docx("D:ATESTjie")
109     # remove_file(r"D:ATESTjie", r"D:ATEST	ojie")
110     print("LLLLLLL")
111     # word.Quit()