In [1]:
import pickle
from collections import Counter

from celery import group
from celery import chain
import pandas as pd

from celery_basics import add
from celery_basics import make_pi
from celery_basics import apply_counter

Sum 1 to 2


In [2]:
res = add.apply_async(args=(1, 2))
results = res.get()
results


Out[2]:
3

Calculate pi value for 105, 106, 10**7


In [3]:
%%timeit
job = group(make_pi.subtask((10**x, )) for x in range(5,8))
job_result = job.delay()
results = job_result.get()
print(results)


[3.1415826535897198, 3.1415916535897743, 3.1415925535897915]
[3.1415826535897198, 3.1415916535897743, 3.1415925535897915]
[3.1415826535897198, 3.1415916535897743, 3.1415925535897915]
[3.1415826535897198, 3.1415916535897743, 3.1415925535897915]
1 loop, best of 3: 6.45 s per loop

In [ ]:
%%timeit
pis = []
for x in range(5,8):
    pis.append(make_pi(10**x))
print(pis)

Sum the numbers between the 1 to 1000 twice


In [ ]:
%%timeit
jobs = chain(add.si(i, i) for i in range(1,1001))
job_result = jobs.apply_async()
results = job_result.collect()
print(sum(results))

In [6]:
%%timeit
results = sum([add(i, i) for i in range(1,1001)])
print(results)


1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
1001000
10 loops, best of 3: 89.7 ms per loop

Counter of the words in reviews summaries


In [ ]:
reviews = pd.read_csv("reviews.csv", encoding="utf-8", iterator=True, chunksize=1000, nrows=5000)
counter = Counter()

In [ ]:
for review in reviews:
    counter += review.summary.apply(lambda x :Counter(str(x).split(" "))).values.sum()

In [ ]:
counter.most_common()

Counter of the words in reviews summaries with Celery


In [ ]:
reviews = pd.read_csv("reviews.csv", encoding="utf-8", iterator=True, chunksize=1000, nrows=5000)

In [ ]:
jobs =[]
for review in reviews:
    job = apply_counter.apply_async(args=(pickle.dumps(review),))
    jobs.append(job)
counter = Counter()
for job in jobs:
    counter += pickle.loads(job.get())

In [ ]:
counter.sum().most_common()

In [ ]:


In [ ]:


In [ ]: