python 通用方法

总结一下在开发中使用的一些通用方法

获取从开始到某一子串最后出现之间的字符串

def find_last_index(str, sub_str):
    '''
    Return the index of sub_str last appear
    :param str:
    :param sub_str:
    :return: index
    '''
    last_position = -1
    while True:
        position = str.find(sub_str,last_position+1)
        if position == -1:
            return last_position
        last_position = position

创建目录

1
2
3

def create_dir(dir):
    if not os.path.exists(dir):
        os.makedirs(dir)

保留两位小数

1
2
3

f_num = 3.1415
print("%.02f" % f_num)  # 输出 3.14
print(round(f_num, 2))  # 输出 3.14

ip判断

第一种 正则
import re
def _check_ip(self, ip):
    p = re.compile('^((25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(25[0-5]|2[0-4]\d|[01]?\d\d?)$')
    if p.match(ip):
        return True
    else:
        return False
第二种 netaddr模块
import netaddr
netaddr.valid_ipv4(ip)  ##ipv4版本
netaddr.valid_ipv6(ip)  ##ipv6版本

copy和deepcopy

复制不可变数据类型(数值、字符串、元组)，不管copy还是deepcopy，和=“赋值”的情况一样，新对象的id值与原来的值相同

复制可变数据类型(列表和字典)
copy有两种情况：

复制的对象中无复杂子对象, 原来值的改变不会影响copy后的值。原来值的id值与浅复制原来的值不同
复制的对象中有复杂子对象(如有一个元素是一个列表)，改变原来的之中的复杂子对象的值，会影响copy后的值

deepcopy: 完全独立复制，包括内层列表和字典

from copy import copy, deepcopy
a = '123' # 不可变对象
b = copy(a)
c = deepcopy(a)
id(a) # 输出4399399016
id(b) # 输出4399399016
id(c) # 输出4399399016
a = [1, 2, 3, 'a'] # 列表元素没有复杂子对象, copy和deepcopy后与原来的值独立，互不影响
b = copy(a)
c = deepcopy(a)
id(a) # 输出 4398207176
id(b) # 输出 4396980552
id(c) # 输出 4399070728
a = [1, 2, 3, ['a']] # 列表包含复杂子元素
b = copy(a)
c = copy(a)
a[3][0] = 'b' # 把列表a嵌套的子列表元素修改为b
print(a)  # 输出 [1, 2, 3, ['b']]  # 修改后的列表a
print(b)  # 输出 [1, 2, 3, ['b']]  # 内嵌的列表受到原值的影响
print(c)  # 输出 [1, 2, 3, ['a']]  # 未受影响

秒转换成时分秒

def seconds_to_hour_minutes_seconds(seconds):
    m, s = divmod(seconds, 60)
    h, m = divmod(m, 60)
    return h, m, s

导入字符串module

python module 的动态加载，我们有时希望从配置文件等地获取要被动态加载的 module，但是所读取的配置项通常为字符串类型，
无法用 import 加载，例如：

>>> import 'os'
 File "<stdin>", line 1
  import 'os'
   ^
 SyntaxError: invalid syntax
使用__import__
>>> __import__('os')
<module 'os' from '/usr/lib64/python2.7/os.pyc'>

注意：如果引入模块带有点号(.)
参考：https://github.com/openstack/oslo.utils/blob/master/oslo_utils/importutils.py

获取当前文件夹名称

1
2
3

假设当前目录为/root/test,想要获取test这个名称
import os
os.path.basename('/root/test')

获取cpu数

import os
try:
    import multiprocessing
except ImportError:
    # Multiprocessing is not available on Google App Engine.
    multiprocessing = None
def cpu_count():
    """Returns the number of processors on this machine."""
    if multiprocessing is None:
        return 1
    try:
        return multiprocessing.cpu_count()
    except NotImplementedError:
        pass
    try:
        return os.sysconf("SC_NPROCESSORS_CONF")
    except (AttributeError, ValueError):
        pass
    print "Could not detect number of processors; assuming 1"
    return 1

生成验证码

# 默认生成4为验证码
def generate_code(size=4):
    characters = '01234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
    choices = [random.choice(characters) for _x in range(size)]
    return ''.join(choices)

统计字符串每个单词出现的次数

# 方法一， 使用collections的Counter方法
from collections import Counter
s = "kjalfj;ldsjafl;hdsllfdhg;lahfbl;hl;ahlf;h"
Counter(s)
Counter({'l': 9, ';': 6, 'h': 6, 'f': 5, 'a': 4, 'j': 3, 'd': 3, 's': 2, 'k': 1, 'g': 1, 'b': 1})
# 方法二
s_dic = {}
for i in s:
  if i not in s_idc:
    s_dic[i] = 1
  else:
    s_dic[i] += 1

记录字符串中所有字符出现的次数

def counter(strs):
    dict_check = {}
    for v in strs:
        if v in dict_check:
            dict_check[v] += 1
        else:
            dict_check[v] = 1
    return dict_check

本地时间转UTC时间

import datetime
import pytz
def native_time_to_utc(native_time):
    local = pytz.timezone("Asia/Shanghai")
    local_dt = local.localize(native_time, is_dst=None)
    utc_dt = local_dt.astimezone(pytz.utc)
    return utc_dt
native = datetime.datetime.strptime ("2001-2-3 10:11:12", "%Y-%m-%d %H:%M:%S")
native_time_to_utc(native)

列表反转的几种方式

reverse方法

>>> a = ['a', 99, 55, 12, 8, 6, 4, 3, 1]
>>> a.reverse()
>>> print a
[1, 3, 4, 6, 8, 12, 55, 99, 'a']

reversed方法

1
2
3

>>> a = ['a', 99, 55, 12, 8, 6, 4, 3, 1]
>>> list(reversed(a)) ## 因为reversed方法返回的是迭代器，所以需要list转换
[1, 3, 4, 6, 8, 12, 55, 99, 'a']

切片

1
2
3

>>> a = ['a', 99, 55, 12, 8, 6, 4, 3, 1]
>>> a[::-1]  
[1, 3, 4, 6, 8, 12, 55, 99, 'a']

列表推导式

1
2
3

>>> a = ['a', 99, 55, 12, 8, 6, 4, 3, 1]
>>> [a[len(a)-i] for i in range(1, len(a)+1)] ## 或者 [a[-i] for i in range(1, len(a)+1)]
[1, 3, 4, 6, 8, 12, 55, 99, 'a']

字符串常用方法

join

将包含若干子串的列表分解，并肩这些子串合成一个完整的字符串，与split方法相反

1
2
3

# lists = ['Yeti', 'Bigfoot', 'Loch Ness Monster']
# '-'.join(lists)
'Yeti-Bigfoot-Loch Ness Monster'

startswith/endswith

字符串是否以某个子串开始/结束

1
2
3

# s = 'Yeti-Bigfoot-Loch Ness Monster'
# s.startswith('Yeti')
True

find/rfind

子串在字符串中第一次/最后一次出现的索引

# s = 'Yeti-Bigfoot-Loch Ness Monster'
# sub_s = 'Big'
# s.find(sub_s)
5

count

子串在字符串中出现的次数

1
2
3

# s = 'Yeti-Bigfoot-Loch Ness Monster'
# s.count('o')
4

isalnum

字符串中出现的都是字母或者数字吗

1
2
3

# s = 'Yeti-Bigfoot-Loch Ness Monster'
# s.isalnum()
False  # 因s中有-和空格

capitalize/title/upper/lower/swapcase

capitalize: 字符串第一个字母大写
title: 所有单词开头字母变成大写
upper: 所有字符都变成大写
lower: 所有字符都变成小写
swapcase: 将所有字符的大小写转换

# s = 'Yeti-Bigfoot-Loch Ness Monster'
# s.capitalize()
'Yeti-bigfoot-loch ness monster'
# s.title()
'Yeti-Bigfoot-Loch Ness Monster'
# s.swapcase()
'yETI-bIGFOOT-lOCH nESS mONSTER'

replace

子串替换

1
2
3

# s = 'Yeti-Bigfoot-Loch Ness Monster'
# s.replace('-', ' ')
'Yeti Bigfoot Loch Ness Monster'

列表的常用方法

append/extend/insert

append: 添加元素之尾部
extend: 合并两个列表
insert: 在执行索引位置插入元素

del/remove/pop

del: 删除指定索引位置的元素
remove: 删除具有指定值的元素
pop: 获取并删除指定位置的元素

# lists = ['Yeti', 'Bigfoot', 'Loch', 'Ness', 'Monster']
# del lists[-1]
['Yeti', 'Bigfoot', 'Loch', 'Ness']
# lists = ['Yeti', 'Bigfoot', 'Loch', 'Ness', 'Monster']
# lists.remove('Ness')
['Yeti', 'Bigfoot', 'Loch', 'Monster']
# lists = ['Yeti', 'Bigfoot', 'Loch', 'Ness', 'Monster']
# lists.pop(0)
Yeti

in/index/count/join/sort/len

in: 判断特定值是否存在
index: 查询特定值的索引位置
count: 特定值在列表中出现的次数
join: 字符串方法，把列表转为字符串
sort: 排序(会改变原列表元素位置，使用sorted(lists) 则不会改变原列表，而是返回一个新的排好序的列表)
len: 获取列表长度

# lists = ['Yeti', 'Bigfoot', 'Loch', 'Ness', 'Monster']
# 'Ness' in lists
True
# lists.index('Ness')
3
# lists.count('Ness')
1
# ', '.join(lists)
'Yeti, Bigfoot, Loch, Ness, Monster'
# lists.sort()
['Bigfoot', 'Loch', 'Monster', 'Ness', 'Yeti']
# len(lists)
5

列表的复制

如果通过=把列表赋值给多个变量，改变其中任何一处都会造成其他变量对应的值也被修改。
我们可以通过：

copy模块(注意深拷贝与浅拷贝的区别)
list()函数
列表分片[:]
三种方式将一个列表的值复制到另一个新的列表中，使其与原来的列表互不影响

# import copy
# a = [1, 2, 3]
# b = copy.copy(a)
# c = list(a)
# d = [:]
# id(a)
4505092040
# id(b)
4505026856
# id(c)
4506505088
# id(d)
4504545040

我们会发现他们的id不一样，不是指向的同一个列表，所以针对a、b、c、d的修改不会互相影响

字典常用方法

dict

可以使用dict()将包含双值子序列的序列转换成字典

# lot = [ ('a', 'b'), ('c', 'd'), ('e', 'f') ] # 双值序列的列表
# dict(lot)
{'a': 'b', 'c': 'd', 'e': 'f'}
# tol = ( ['a', 'b'], ['c', 'd'], ['e', 'f'] ) # 双值序列的元组
# dict(tol)
{'a': 'b', 'c': 'd', 'e': 'f'}
# los = [ 'ab', 'cd', 'ef' ] # 双字符的字符串组成的列表/元组
# dict(los)
{'a': 'b', 'c': 'd', 'e': 'f'}

OrderedDict() 按键排序

默认的字典中键的顺序是不可预知的。如果想要按照键的插入的顺序排列，可以使用OrderedDict()来按键排序

# from collections import OrderedDict
# quotes = OrderedDict({'Moe': 'A wise guy, hub?', 'Larrry': 'Ow!', 'Curly': 'Nyuk nyuk!'})
# for k in quotes:
#     print k
Moe
Larrry
Curly

update

使用update()可以将一个字典的键值对复制到另一个字典中去, 如果键相同，则新的值会覆盖相应的值

del

是用del可以删除指定键的元素

# dic = {'a': 'b', 'c': 'd', 'e': 'f'}
# del dic['a']
# dic
{'c': 'd', 'e': 'f'}

clear

使用clear()会清空字典

1	# dic.clear()

in/has_key()

用于判断某一个键是否在一个字典中(has_key已经不推荐使用)

# dic = {'a': 'b', 'c': 'd', 'e': 'f'}
# 'a' in dic
True
# dic.has_key('a')
True

keys()/values()

使用keys()可以获得字典的所有键，使用values()可以获得字典的所有值

# dic = {'a': 'b', 'c': 'd', 'e': 'f'}
# dic.keys()
['a', 'c', 'e']
# dic.values()
['b', 'd', 'f']

items()/iteritems()

使用items() 获取所有的键值对. 每一个键值对以元祖的形式返回
使用iteritems()返回的是迭代器, 当迭代很大的序列是此方法更好

1
2
3

# dic = {'a': 'b', 'c': 'd', 'e': 'f'}
# dic.items()
[('a', 'b'), ('c', 'd'), ('e', 'f')]

字典根据键排序

根据键从小到大排序

1
2
3

dic = {'name': 'bj', 'age': 30, 'city': '北京', 'tel': '13812345678'}
new_list = sorted(dic.items(), key=lambda i: i[0])
print(dict(new_list))  # 输出 {"age": 30, "city": "北京", "name": "bj", "tel": "13812345678"}

集合

python中，集合就像舍弃了值，仅剩下键的字典一样。键与键之间不允许重复。集合与字典的键一样，是无序的。

创建集合

使用set()函数创建一个集合，或者使用大括号将一系列以逗号隔开的值包裹起来。
注意：如果使用大括号，而大括号里没有逗号分隔的值，则创建的是一个字典。

# empty_set = set()
# empty_set
set([])
# even_numbers = {0, 2, 4, 6, 8}
# even_numbers
set([8, 0, 2, 4, 6])

转换为集合

使用set()可以将已有列表、字符串、元祖或者字典的内容来创建字典，其中重复的值会被丢弃

# set('letters')
set(['s', 'r', 'e', 'l', 't'])  # 把 重复的t和e去掉了, 同时生成的是无序集合。
# set( ['Dasher', 'Dancer', 'Prancer', 'Mason-Dixon'] )
set(['Dancer', 'Prancer', 'Mason-Dixon', 'Dasher'])   # 把重复的元素删除了
# set( ('Ummagumma', 'Echoes', 'Atom Heart Mother') )
set(['Echoes', 'Ummagumma', 'Atom Heart Mother'])
# set( {'apple': 'red', 'orange': 'orange', 'cherry': 'red'} )
set(['orange', 'cherry', 'apple'])  # 只有键被使用

集合合并

可以使用& 符号或者intersection()函数来获取两个集合的交集

# a = {1, 2}
# b = {2, 3}
# a & b
set([2])

可以使用|或者union()函数来获取集合的并集

# a = {1, 2}
# b = {2, 3}
# a|b
set([1, 2, 3])

可以使用-或者difference()获取两个集合的差集(出现在第一个集合但不出现在第二个集合)

# a = {1, 2}
# b = {2, 3}
# a-b
set([1])

可以使用^或者symmetric_difference()获得两个集合的异或集

# a = {1, 2}
# b = {2, 3}
# a^b
set([1, 3])

可以使用<=或者issubset()判断一个集合是否是另外一个集合的子集(第一个集合的所有元素都出现在第二个集合中)
注意：>=或者issuperset() 与之相反

# a = {1, 2}
# b = {1, 2, 3}
# a <= b
True

循环注意事项

while

while 循环后面跟else:

while ***:
    pass
else:
    pass

如果while循环正常结束(没有使用break跳出)，程序将进入到可选的else段。正常情况我们找到满足条件的解后使用break跳出；
如果循环结束，没有找到匹配的解，则进入else部分的代码段：

# numbers = [1, 3, 5]:
# position = 0
# while position < len(numbers):
      if numbers[position] == 2:
          print 'Found it'
          break
      position += 1
  else:  # 会执行该分支
      print 'No number found'
No number found

for

for 循环后面跟else:

for * in *:
    pass
else:
    pass

与while类似，用来判断for循环是否正常结束(没有调用break跳出), 否则会执行else段

# for i in range(5):
      if i == 6:
          print 'Found it'
          break
  else:
      'Not found'
Not found

推导式

列表推导式

1
2
3

# number_list = [number for number in range(1, 6)]
# number_list
[1, 2, 3, 4, 5]

字典推导式

1
2
3

# dic = {letter: 'letters'.count(letter) for letter in set('letters')}
# dic
{'s': 1, 'r': 1, 'e': 2, 'l': 1, 't': 2}

集合推导式

生成器推导式(没有元祖推导式)

1
2
3

# number_thing = (number for number in range(1, 6))
# type(number_thing)
<type 'generator'>

圆括号之间的是生成器推导式，而不是元祖推导式。
一个生成器只能运行一次。列表、集合、字符串和字典都存储在内存中，但是生成器仅在运行中产生之，不会被存下来，
所以不能重新使用或者备份一个生成器

生成器

生成器是用来创建python序列的一个对象，该对象通过连续调用next()方法生成一系列的结果。使用它可以地带庞大的序列，且不需要再内存中创建存储整个序列。
通常，生成器是为迭代器产生数据的。

def countdown(n):
    print 'Counting down!'
    while n > 0:
        yield n  # 生成一个值(n)
        n -= 1
# c = countdown(5)
# c.next()
Counting down!
# c.next()
5
# c.next()
4
...

next()调用使生成器函数一直运行，到下一条yield语句为止。此时next()将返回传递给yield的值,而且函数将暂时中止执行。
再次调用next()时，函数将继续执行yield之后的语句。此过程持续到函数返回为止。
我们通常不会手动调用next(), 而是会适用for循环：

# def my_range(first=0, last=10, step=1):
      number = first
      while number < last:
          yield number    # 生成一个值 number
          number += step

协程

协程是通过将yield语句作为表达式(yield)的形式创建的。把yield语句作为表达式使用，出现在复制运算符的右边:

def print_matches(matchtext):
    print 'Looking for', matchtext
    while True:
        line = (yield)  # 获得一行文本
        if matchtext in line:
            print line

以这种方式使用yield语句的函数称为协程
要使用这个函数，首先要调用它，然后执行next()来到到第一条(yield)语句暂停执行，然后使用send()给它发送数据，例如：

# matcher = print_matches('python')
# matcher.next()  # 会从该方法开始一直到第一个条(yield) 暂停
Looking for python
# matcher.send('Hello world')  # 没有输出，因为 line 并不在 matchtext 里
# matcher.send('python is cool')
python is cool
# matcher.send('yow!')
# matcher.close()  # matcher函数调用结束

由于上述函数在使用之前，需要先执行next()，很容易出错。因此可以通过装饰器的方式来自动完成首次next()的操作：

def coroutine(func):
    def start(*args, **kwargs):
        g = func(*args, **kwargs)
        g.next()
        return g
    return start
@coroutine
def receiver():
    print 'Ready to receive'
    while True:
        n = (yield)
        print 'Got it %s' % n
r = receiver()
r.send('Hello world')   # 这里使用send() 方法之前不用执行r.next()

使用Counter()计数

# from collections import Counter
# breakfast = ['spam', 'spam', 'eggs', 'spam']
# breakfast_counter = Counter(breakfast)
# breakfast_counter
Counter({'spam': 3, 'eggs': 1})

函数most_common()以降序返回所有元素，或者如果给定一个数字，会返回该数字前的元素

# breakfast_counter.most_common()
[('spam', 3), ('eggs', 1)]
# breakfast_counter.most_common(1)
[('spam', 3)]
# breakfast_counter.most_common(2)
[('spam', 3), ('eggs', 1)]

双端队列(deque): 栈+队列

deque是一种双端队列，同时具有栈和队列的特征。它可以从序列的任何一端添加和删除项。从一个词的两端扫向中间，
判断是否为回文串。函数popleft()去掉最左边的项并返回该项，pop()去掉最右边的项并返回该项。从两边一直向中间扫描，
只要两端的字符匹配，一直弹出直到到达中间:

# from collections import deque
# def palindrome(word):
     dq = deque(word)
     while len(dq) > 1:
         if dq.popleft() != dq.pop():
             return False
     return True
   
# palindrome('a')
True
# palindrome('racecar')
True
# palindrome('')
True
# palindrome('radar')
True
# palindrome('halibut')
False

itertools迭代代码结构

chain()迭代单个对象

# import itertools
# for item in itertools.chain([1, 2], ['a', 'b']):
#     print item
1
2
a
b

cycle() 无限迭代

# import itertools
# for item in itertools.cycle([1, 2]):
#     print item
1
2
1
2
1
...

accumulate() 计算累计的值

# import itertools
# for item in itertools.accumulate([1, 2, 3, 4]):
#     print item
1
3
6
10

特殊方法

比较相关

数学相关

其他相关