1 # # -*- coding:utf-8 -*-
2 #读取docx中的文本代码示例
3 import docx
4 from win32com import client as wc
5 from pyhanlp import *
6 import time
7 import eventlet#导入eventlet这个模块
8 import shutil
9 word = wc.Dispatch('Word.Application')
10 '''重启计时器'''
11 def restart(time_start,time_end,sub_deadline):
12 #E:pycharmWorkPlaceGraduation_projectUtilfile_process.py
13 time_sub = time_end - time_start
14
15 i = 0
16 print("NJNNNNNNNNN",time_sub,"NNNNNNNNNNNNNNN")
17 if (time_sub > sub_deadline):
18 str = "CHCP 65001" + "&&"
19 str += "E:" + "&&"
20 str += r"cd E:pycharmWorkPlaceGraduation_projectUtil" + "&&"
21 str += "python file_process.py "
22 # print(str)
23 print("TIME______", i, "________", time_sub)
24 i = i + 1
25 cmd = os.system(str)
26 # print(cmd)
27 '''文件操作'''
28 #将doc转成docx
29 def doSaveAas(doc_path,docx_path):
30 # time_start = time.time()
31 print("MMMMMMMMMMMMMMMMMMMMMMMMMMMMM")
32 eventlet.monkey_patch()#必须加这条代码
33 with eventlet.Timeout(10, False): # 设置超时时间为2秒
34 # print("*$$$$$$$$$$$$$$$$")
35 # time.sleep(2)
36 # print("^^^^^^^^^^^^")
37 doc = word.Documents.Open(doc_path) # 目标路径下的文件
38 # print("@@@@@@@@@@@@")
39 doc.SaveAs(docx_path, 12, False, "", True, "", False, False, False, False) # 转化后路径下的文件
40 doc.Close()
41 print("《《《《《《《《《《《《《《《《《《《《《《《《《《《《《")
42 # time_end = time.time()
43 # restart(time_start,time_end,20)
44
45 #将相对路径转换乘绝对路径,同时调用转换文件进行转换,同时再顺便删除之前的文件
46 def Dir_doc2docx(Dir_path):
47 i=0
48 for file_name in os.listdir(Dir_path):
49
50 print("********************************************************************************************")
51 try:
52 print("文件名:"+file_name)
53 file_path = os.path.join(Dir_path, file_name)
54 print("文件后缀:"+os.path.splitext(file_name)[1] )
55 if os.path.splitext(file_name)[1] == '.doc':
56 i = i + 1
57 abs_file_path=os.path.abspath(file_path)
58 print(i," 绝对路径:"+abs_file_path)
59 doSaveAas(abs_file_path,abs_file_path+'x')
60 os.remove(file_path)
61
62 except:
63 continue
64
65 def Get_num_file_end(Dir_path,end):
66 i=0
67 for file_name in os.listdir(Dir_path):
68 print("********************************************************************************************")
69 try:
70 if os.path.splitext(file_name)[1] == end:
71 i=i+1
72 except:
73 continue
74 return i
75 #获取文件值
76 def Get_file_value(Dir_path,file_name):
77 paragraph_id=[]
78 paragraph_value=[]
79 file_path = os.path.join(Dir_path, file_name)
80 file = docx.Document(file_path)
81 # 输出段落编号及段落内容
82 for i in range(len(file.paragraphs)):
83 paragraph_id.append(i)
84 paragraph_value.append(file.paragraphs[i].text.strip().replace(u'u3000', u'').replace(u'xa0', u'').replace(' ', ''))
85 return paragraph_id,paragraph_value
86 #移动文件
87 def remove_file(Dir_path,To_dirpath):
88 i = 0
89 for file_name in os.listdir(Dir_path):
90 print("********************************************************************************************")
91 try:
92 print("文件名:" + file_name)
93 file_path = os.path.join(Dir_path, file_name)
94 print("文件后缀:" + os.path.splitext(file_name)[1])
95 if os.path.splitext(file_name)[1] == '.docx':
96 i = i + 1
97 abs_file_path = os.path.abspath(file_path)
98 abs_to_file_path=os.path.abspath(os.path.join(To_dirpath, file_name))
99 shutil.move(abs_file_path,abs_to_file_path)
100 print(i, " 绝对路径:" + abs_file_path)
101 print(i, " 目标绝对路径:" + abs_to_file_path)
102
103 except:
104 continue
105 if __name__ =="__main__":
106 print("AAAAAAAAA")
107 # a=Get_num_doc("D:ATESTjie")
108 Dir_doc2docx("D:ATESTjie")
109 # remove_file(r"D:ATESTjie", r"D:ATEST ojie")
110 print("LLLLLLL")
111 # word.Quit()