面向对象补充,进程,数据共享,锁,进程池,模块(requests,bs4(beautifulsoup)),协程
分类:
IT文章
•
2023-11-06 20:18:48
一丶面向对象补充
"""
class Foo(object):
def __init__(self):
self.info = {}
def __setitem__(self, key, value):
self.info[key] = value
def __getitem__(self, item):
return self.info.get(item)
obj = Foo()
obj['x'] = 123
print(obj['x'])
"""
from flask import globals
class Foo(object):
def __init__(self):
object.__setattr__(self, 'info', {}) # 在对象中设置值的本质
def __setattr__(self, key, value):
self.info[key] = value
def __getattr__(self, item):
print(item)
return self.info[item]
obj = Foo()
obj.name = 'alex'
print(obj.name)
v = []
for i in range(10000):
v.append(i)
print(v)
小补充...
二丶进程
进程间数据不共享
data_list = []
def task(arg):
data_list.append(arg)
print(data_list)
def run():
for i in range(10):
p = multiprocessing.Process(target=task,args=(i,))
# p = threading.Thread(target=task,args=(i,))
p.start()
if __name__ == '__main__':
run()
...
常用功能:
- join
- deamon
- name
- multiprocessing.current_process()
- multiprocessing.current_process().ident/pid
类继承方式创建进程
class MyProcess(multiprocessing.Process):
def run(self):
print('当前进程',multiprocessing.current_process())
def run():
p1 = MyProcess()
p1.start()
p2 = MyProcess()
p2.start()
if __name__ == '__main__':
run()
...
进程间数据共享
Queue:
q = multiprocessing.Queue()
def task(arg,q):
q.put(arg)
def run():
for i in range(10):
p = multiprocessing.Process(target=task, args=(i, q,))
p.start()
while True:
v = q.get()
print(v)
run()
linux:
def task(arg,q):
q.put(arg)
if __name__ == '__main__':
q = multiprocessing.Queue()
for i in range(10):
p = multiprocessing.Process(target=task,args=(i,q,))
p.start()
while True:
v = q.get()
print(v)
windows:
Manager:(*)
m = multiprocessing.Manager()
dic = m.dict()
def task(arg):
dic[arg] = 100
def run():
for i in range(10):
p = multiprocessing.Process(target=task, args=(i,))
p.start()
input('>>>')
print(dic.values())
if __name__ == '__main__':
run()
Linux:
def task(arg,dic):
time.sleep(2)
dic[arg] = 100
if __name__ == '__main__':
m = multiprocessing.Manager()
dic = m.dict()
process_list = []
for i in range(10):
p = multiprocessing.Process(target=task, args=(i,dic,))
p.start()
process_list.append(p)
while True:
count = 0
for p in process_list:
if not p.is_alive():
count += 1
if count == len(process_list):
break
print(dic)
windows:
三丶进程锁
四丶进程池
from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor (官方推荐方式)
import time
from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor
def task(arg):
time.sleep(2)
print(arg)
if __name__ == '__main__':
pool = ProcessPoolExecutor(5)
for i in range(10):
pool.submit(task,i)
进程池
五丶初识爬虫
安装:
pip3 install requests
pip3 intall beautifulsoup4
问题:
找不到命令?
方式一:
C:UsersAdministratorAppDataLocalProgramsPythonPython36Scriptspip3 install requests
方式二:
C:UsersAdministratorAppDataLocalProgramsPythonPython36Scriptspip3 install requests
实例:
import requests
from bs4 import BeautifulSoup
from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor
# 模拟浏览器发送请求
# 内部创建 sk = socket.socket()
# 和抽屉进行socket连接 sk.connect(...)
# sk.sendall('...')
# sk.recv(...)
def task(url):
print(url)
r1 = requests.get(
url=url,
headers={
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.92 Safari/537.36'
}
)
# 查看下载下来的文本信息
soup = BeautifulSoup(r1.text,'html.parser')
print(soup.text)
# content_list = soup.find('div',attrs={'id':'content-list'})
# for item in content_list.find_all('div',attrs={'class':'item'}):
# title = item.find('a').text.strip()
# target_url = item.find('a').get('href')
# print(title,target_url)
def run():
pool = ThreadPoolExecutor(5)
for i in range(1,50):
pool.submit(task,'https://dig.chouti.com/all/hot/recent/%s' %i)
if __name__ == '__main__':
run()
实例
相关:
a. 以上示例进程和线程那个好?
- 线程好
b. requests模块模拟浏览器发送请求
- 本质 requests.get(...):
- 创建socket客户端
- 连接 【阻塞】
- 发送请求
- 接收请求【阻塞】
- 断开连接
c. 线程和进程池