Python 三 日记(二)
2012-12-10 星期一
1.控制大小写和访问子字符串
1)控制大小写
org_str = 'heLLo PyTHon, Hello worLd' # 转成大写 big_str = org_str.upper() print(big_str) # 转成小写 little_str = org_str.lower() print(little_str) # 第一个字母大写,其他小写,相当于 # s[:1].upper() + s[1:].lower() capitalize_str = org_str[:1].upper() + org_str[1:].lower() print(capitalize_str) capitalize_str = org_str.capitalize() print(capitalize_str) # 每个单词第一个字母大写 title_str = org_str.title() print(title_str)输出:
HELLO PYTHON, HELLO WORLD
hello python, hello world
Hello python, hello world
Hello python, hello world
Hello Python, Hello World
2)访问子字符串
# 访问子字符串 the_line = b'hello keyan python hi hello world!' # 分片 print(the_line[12:18]) # unpack import struct # 得到一个6字节的字符串,跳过6字节,得到一个7字节,跳过3字节,得到8字节及其余部分 base_format = '6s 6x 7s 3x 6s' # 计算超出的长度 num_remain = len(the_line) - struct.calcsize(base_format) print(num_remain) # 用合适的s或x子段完成格式,然后unpack the_format = '%s %ds' % (base_format, num_remain) print(the_format) l, s1, s2, t = struct.unpack(the_format, the_line) print(l, s1, s2, t) print(l + s1 + s2 + t) # 若剩余部分跳过 l, s1, s2 = struct.unpack(base_format, the_line[:struct.calcsize(base_format)]) print(l, s1, s2) print(l + s1 + s2) # 获取5个字节一组的数据 print('**************************************************************') fivers = [the_line[k : k+5] for k in range(0, len(the_line), 5)] print(fivers) # 将字符串的前5个字符切成一个个的字符 chars = list(the_line[:5]) print(chars) # 将数据切成指定长度的列 print('**************************************************************') cuts = [6, 12, 19, 22, 28] # zip返回一个列表,其中除最后一项和第一项外每项都是形如(cuts[k], cuts[k+1])这样的数对 # 第一项是(0, cuts[0]), 最后一项是(cuts[len(cuts)-1], None) pieces = [the_line[i:j] for i, j in zip([0] + cuts, cuts + [None])] print(pieces)输出:
b'python'
6
6s 6x 7s 3x 6s 6s
b'hello ' b'python ' b'hello ' b'world!'
b'hello python hello world!'
b'hello ' b'python ' b'hello '
b'hello python hello '
**************************************************************
[b'hello', b' keya', b'n pyt', b'hon h', b'i hel', b'lo wo', b'rld!']
[104, 101, 108, 108, 111]
**************************************************************
[b'hello ', b'keyan ', b'python ', b'hi ', b'hello ', b'world!']
封装成函数:
# 封装成函数 print('**************************************************************') def fields(base_format, the_line, last_field = False): num_remain = len(the_line) - struct.calcsize(base_format) the_format = '%s %d%s' % (base_format, num_remain, last_field and 's' or 'x') return struct.unpack(the_format, the_line) print(fields(base_format, the_line, False)) print(fields(base_format, the_line, True)) # 使用memorizing机制的fields版本 # 适用于在循环内部调用 print('**************************************************************') def fields_mem(base_format, the_line, last_field = False, _cache = {}): key = base_format, len(the_line), last_field the_format = _cache.get(key) if the_format is None: num_remain = len(the_line) - struct.calcsize(base_format) _cache[key] = the_format = '%s %d%s' % ( base_format, num_remain, last_field and 's' or 'x') return struct.unpack(the_format, the_line) print(fields(base_format, the_line, False)) print(fields(base_format, the_line, True)) # 对按字符个数分隔的方法的封装 def split_by(the_line, n, last_field = False): pieces = [the_line[k: k+n] for k in range(0, len(the_line), n)] if not last_field and len(pieces[-1]) < n: pieces.pop() return pieces print(split_by(the_line, 5, False)) print(split_by(the_line, 5, True)) # 将数据切成指定的列的封装 def split_at(the_line, cuts, last_field = False): pieces = [ the_line[i:j] for i, j in zip([0] + cuts, cuts + [None]) ] if not last_field: pieces.pop() return pieces print(split_at(the_line, cuts, False)) print(split_at(the_line, cuts, True)) # 用生成器来实现 print('**************************************************************') def split_at_yield(the_line, cuts, last_field = False): last = 0 for cut in cuts: yield the_line[last: cut] last = cut if last_field: yield the_line[last: ] print(split_at_yield(the_line, cuts, False)) print(split_at_yield(the_line, cuts, True)) def split_by_yield(the_line, n, last_field = False): return split_at_yield(the_line, range(n, len(the_line), n), last_field) print(list(split_by_yield(the_line, 5, False))) print(list(split_by_yield(the_line, 5, True)))输出:
**************************************************************
(b'hello ', b'python ', b'hello ')
(b'hello ', b'python ', b'hello ', b'world!')
**************************************************************
(b'hello ', b'python ', b'hello ')
(b'hello ', b'python ', b'hello ', b'world!')
[b'hello', b' keya', b'n pyt', b'hon h', b'i hel', b'lo wo']
[b'hello', b' keya', b'n pyt', b'hon h', b'i hel', b'lo wo', b'rld!']
[b'hello ', b'keyan ', b'python ', b'hi ', b'hello ']
[b'hello ', b'keyan ', b'python ', b'hi ', b'hello ', b'world!']
**************************************************************
[b'hello ', b'keyan ', b'python ', b'hi ', b'hello ']
[b'hello ', b'keyan ', b'python ', b'hi ', b'hello ', b'world!']
[b'hello', b' keya', b'n pyt', b'hon h', b'i hel', b'lo wo']
[b'hello', b' keya', b'n pyt', b'hon h', b'i hel', b'lo wo', b'rld!']