In [1]:

    
import pickle
from collections import Counter

from celery import group
from celery import chain
import pandas as pd

from celery_basics import add
from celery_basics import make_pi
from celery_basics import apply_counter

Sum 1 to 2



In [2]:

    
res = add.apply_async(args=(1, 2))
results = res.get()
results









    Out[2]:





3

Calculate pi value for 105, 106, 10**7



In [3]:

    
%%timeit
job = group(make_pi.subtask((10**x, )) for x in range(5,8))
job_result = job.delay()
results = job_result.get()
print(results)









    



[3.1415826535897198, 3.1415916535897743, 3.1415925535897915]
[3.1415826535897198, 3.1415916535897743, 3.1415925535897915]
[3.1415826535897198, 3.1415916535897743, 3.1415925535897915]
[3.1415826535897198, 3.1415916535897743, 3.1415925535897915]
1 loop, best of 3: 6.45 s per loop



In [ ]:

    
%%timeit
pis = []
for x in range(5,8):
    pis.append(make_pi(10**x))
print(pis)

Sum the numbers between the 1 to 1000 twice



In [ ]:

    
%%timeit
jobs = chain(add.si(i, i) for i in range(1,1001))
job_result = jobs.apply_async()
results = job_result.collect()
print(sum(results))



In [6]:

    
%%timeit
results = sum([add(i, i) for i in range(1,1001)])
print(results)









    



1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
10 loops, best of 3: 89.7 ms per loop

Counter of the words in reviews summaries



In [ ]:

    
reviews = pd.read_csv("reviews.csv", encoding="utf-8", iterator=True, chunksize=1000, nrows=5000)
counter = Counter()



In [ ]:

    
for review in reviews:
    counter += review.summary.apply(lambda x :Counter(str(x).split(" "))).values.sum()



In [ ]:

    
counter.most_common()

Counter of the words in reviews summaries with Celery



In [ ]:

    
reviews = pd.read_csv("reviews.csv", encoding="utf-8", iterator=True, chunksize=1000, nrows=5000)



In [ ]:

    
jobs =[]
for review in reviews:
    job = apply_counter.apply_async(args=(pickle.dumps(review),))
    jobs.append(job)
counter = Counter()
for job in jobs:
    counter += pickle.loads(job.get())



In [ ]:

    
counter.sum().most_common()



In [ ]:



In [ ]:



In [ ]: