Python 三 日记(二)

Python 3 日记(二)

2012-12-10 星期一

1.控制大小写和访问子字符串

1)控制大小写

org_str = 'heLLo PyTHon, Hello worLd'

# 转成大写
big_str = org_str.upper()
print(big_str)

# 转成小写
little_str = org_str.lower()
print(little_str)

# 第一个字母大写,其他小写,相当于
# s[:1].upper() + s[1:].lower()
capitalize_str = org_str[:1].upper() + org_str[1:].lower()
print(capitalize_str)

capitalize_str = org_str.capitalize()
print(capitalize_str)

# 每个单词第一个字母大写
title_str = org_str.title()
print(title_str)
输出:

HELLO PYTHON, HELLO WORLD
hello python, hello world
Hello python, hello world
Hello python, hello world
Hello Python, Hello World
2)访问子字符串

# 访问子字符串
the_line = b'hello keyan python hi hello world!'

# 分片
print(the_line[12:18])

# unpack
import struct

# 得到一个6字节的字符串,跳过6字节,得到一个7字节,跳过3字节,得到8字节及其余部分
base_format = '6s 6x 7s 3x 6s'

# 计算超出的长度
num_remain = len(the_line) - struct.calcsize(base_format)
print(num_remain)

# 用合适的s或x子段完成格式,然后unpack
the_format = '%s %ds' % (base_format, num_remain)
print(the_format)
l, s1, s2, t = struct.unpack(the_format, the_line)
print(l, s1, s2, t)
print(l + s1 + s2 + t)

# 若剩余部分跳过
l, s1, s2 = struct.unpack(base_format, the_line[:struct.calcsize(base_format)])
print(l, s1, s2)
print(l + s1 + s2)

# 获取5个字节一组的数据
print('**************************************************************')
fivers = [the_line[k : k+5] for k in range(0, len(the_line), 5)]
print(fivers)

# 将字符串的前5个字符切成一个个的字符
chars = list(the_line[:5])
print(chars)

# 将数据切成指定长度的列
print('**************************************************************')
cuts = [6, 12, 19, 22, 28]
# zip返回一个列表,其中除最后一项和第一项外每项都是形如(cuts[k], cuts[k+1])这样的数对
# 第一项是(0, cuts[0]), 最后一项是(cuts[len(cuts)-1], None)
pieces = [the_line[i:j] for i, j in zip([0] + cuts, cuts + [None])]
print(pieces)
输出:

b'python'
6
6s 6x 7s 3x 6s 6s
b'hello ' b'python ' b'hello ' b'world!'
b'hello python hello world!'
b'hello ' b'python ' b'hello '
b'hello python hello '
**************************************************************
[b'hello', b' keya', b'n pyt', b'hon h', b'i hel', b'lo wo', b'rld!']
[104, 101, 108, 108, 111]
**************************************************************
[b'hello ', b'keyan ', b'python ', b'hi ', b'hello ', b'world!']
封装成函数:

# 封装成函数
print('**************************************************************')
def fields(base_format, the_line, last_field = False):
    num_remain = len(the_line) - struct.calcsize(base_format)
    the_format = '%s %d%s' % (base_format, num_remain, last_field and 's' or 'x')
    return struct.unpack(the_format, the_line)

print(fields(base_format, the_line, False))
print(fields(base_format, the_line, True))

# 使用memorizing机制的fields版本
# 适用于在循环内部调用
print('**************************************************************')
def fields_mem(base_format, the_line, last_field = False, _cache = {}):
    key = base_format, len(the_line), last_field
    the_format = _cache.get(key)
    if the_format is None:
        num_remain = len(the_line) - struct.calcsize(base_format)
        _cache[key] = the_format = '%s %d%s' % (
            base_format, num_remain, last_field and 's' or 'x')
    return struct.unpack(the_format, the_line)

print(fields(base_format, the_line, False))
print(fields(base_format, the_line, True))

# 对按字符个数分隔的方法的封装
def split_by(the_line, n, last_field = False):
    pieces = [the_line[k: k+n] for k in range(0, len(the_line), n)]
    if not last_field and len(pieces[-1]) < n:
        pieces.pop()
    return pieces

print(split_by(the_line, 5, False))
print(split_by(the_line, 5, True))

# 将数据切成指定的列的封装
def split_at(the_line, cuts, last_field = False):
    pieces = [ the_line[i:j] for i, j in zip([0] + cuts, cuts + [None]) ]
    if not last_field:
        pieces.pop()
    return pieces

print(split_at(the_line, cuts, False))
print(split_at(the_line, cuts, True))


# 用生成器来实现
print('**************************************************************')
def split_at_yield(the_line, cuts, last_field = False):
    last = 0
    for cut in cuts:
        yield the_line[last: cut]
        last = cut
    if last_field:
        yield the_line[last: ]
        
print(split_at_yield(the_line, cuts, False))
print(split_at_yield(the_line, cuts, True))
        
def split_by_yield(the_line, n, last_field = False):
    return split_at_yield(the_line, range(n, len(the_line), n), last_field)

print(list(split_by_yield(the_line, 5, False)))
print(list(split_by_yield(the_line, 5, True)))
输出:

**************************************************************
(b'hello ', b'python ', b'hello ')
(b'hello ', b'python ', b'hello ', b'world!')
**************************************************************
(b'hello ', b'python ', b'hello ')
(b'hello ', b'python ', b'hello ', b'world!')
[b'hello', b' keya', b'n pyt', b'hon h', b'i hel', b'lo wo']
[b'hello', b' keya', b'n pyt', b'hon h', b'i hel', b'lo wo', b'rld!']
[b'hello ', b'keyan ', b'python ', b'hi ', b'hello ']
[b'hello ', b'keyan ', b'python ', b'hi ', b'hello ', b'world!']
**************************************************************
[b'hello ', b'keyan ', b'python ', b'hi ', b'hello ']
[b'hello ', b'keyan ', b'python ', b'hi ', b'hello ', b'world!']
[b'hello', b' keya', b'n pyt', b'hon h', b'i hel', b'lo wo']
[b'hello', b' keya', b'n pyt', b'hon h', b'i hel', b'lo wo', b'rld!']