找到某种特定形式的字符串

找出某种特定形式的字符串
# encoding:utf-8

import time
import re
from functools import reduce


# --------------------------class RePattern():---------------------------------
# 把某字符串转化成正则表达式,再用这个正则表达式去匹配其他字符串
class RePattern():
    def __init__(self, patternStr):
        self.pattern = ''      # 存放正则表达式
        self.pre = None        # 存放re.compile的返回值
        self.patternLength = len(patternStr)
        self.bCount = 1        # 计算每个block的长度
        self.block = []        # 同类型连在一起为一个block长度, ‘xXX34XXX’[1,2,2,3]
        self.blockType = []    # 例如‘xXX34XXX’[lower,upper,digit,upper]
        self.typedict = {'lower': '[a-z]{',
                         'upper': '[A-Z]{',
                         'digit': '[0-9]{',
                         'space': '[\s]{',
                         'other': '[\D\W\S]{'
        }
        self.__setPattern(patternStr)

    # 产生patternStr对应的正则表达式
    def __setPattern(self, patternStr):
        ns = []
        [ns.append(self.__toType(s)) for s in patternStr]
        # 添加一个尾巴,让__same判断
        ns.append('end')
        reduce(self.__same, ns)

        # 产生正则表达式
        for btype, blen in zip(self.blockType, self.block):
            self.pattern += self.typedict[btype]+str(blen)+'}'
        self.pre = re.compile(r''+self.pattern)

    # 把每个字符转换成对应的类型
    def __toType(self,s):
        if s.islower():
            return 'lower'
        elif s.isupper():
            return 'upper'
        elif s.isdigit():
            return 'digit'
        elif s.isspace():
            return 'space'
        else:
            return 'other'

    # 作为reduce的参数,返回第二个参数参与下次比较
    # 填充self.block   和self.blockType
    def __same(self, a, b):
        if a is b:
            self.bCount += 1
        else:
            self.block.append(self.bCount)
            self.bCount = 1
            self.blockType.append(a)
        return b

    # 参数compareStr为比较对象
    def isPattern(self, compareStr):
        tmp = self.pre.match(compareStr)
        if tmp:
            return tmp.group()   

    # 
    def __str__(self):
        return ' block:{0}\n blockType:{1}\n pattern :{2}'\
            .format(self.block, self.blockType, self.pattern)
# --------------------------class RePattern(): end----------


# 从file中找出特定形式字符串
# 以strr[start:end]的形式打印匹配字符串
def pickFromFile(file, strr, start=None, end=None):
    count = 0
    f = open(file)
    rp = RePattern(strr)
    print(rp)
    patternLength = rp.patternLength

    for line in f:
        compareTimes = len(line)-patternLength
        for n in range(compareTimes):
            comp = rp.isPattern(line[n:n+patternLength])
            if comp:
                yield(comp[start:end], count)
                count += 1

    f.close()


#--------------------run-------------------------
time1 = time.time()        

strs = pickFromFile("../data.txt", 'xxx %XX00', )
for s in strs:
    print(s)
# 输出  'abc &BC34'   'sdf @VN03'


time2 = time.time()
print(time2-time1)