python 多进程存储返回值
import warnings
import time
from multiprocessing import Manager
from multiprocessing.dummy import freeze_support
import pandas as pd
from multiprocessing.pool import Pool
def get_pool(tu):
text = "djaiojeiwoiobnaeoijojxocvmoiojes" * 10000
text_df = pd.DataFrame({"voc_b_id": list(text), "count": [1] * len(text)})
tu.append(text_df.groupby("voc_b_id", as_index=False)["count"].count())
if __name__ == '__main__':
freeze_support()
warnings.filterwarnings('ignore')
manager = Manager()
return_list = manager.list()
pool=Pool(8)
start=time.time()
for i in range(200):
pool.apply_async(get_pool,args=(return_list,))
pool.close()
pool.join()
print(time.time()-start)
pd.concat(list(return_list)).groupby("voc_b_id", as_index=False)["count"].count()