In [ ]:
import pickle
from collections import Counter
from celery import group
from celery import chain
import pandas as pd
from celery_basics import add
from celery_basics import make_pi
from celery_basics import apply_counter
In [ ]:
res = add.apply_async(args=(1, 2))
results = res.get()
results
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
reviews = pd.read_csv("reviews.csv", encoding="utf-8", iterator=True, chunksize=5000, nrows=50000)
counter = Counter()
In [ ]:
for review in reviews:
counter += review.summary.apply(lambda x :Counter(str(x).split(" "))).values.sum()
In [ ]:
counter.most_common()
In [ ]:
reviews = pd.read_csv("reviews.csv", encoding="utf-8", iterator=True, chunksize=5000, nrows=50000)
In [ ]:
jobs =[]
for review in reviews:
job = apply_counter.apply_async(args=(pickle.dumps(review),))
jobs.append(job)
counter = Counter()
for job in jobs:
counter += pickle.loads(job.get())
In [ ]:
counter.sum().most_common()
In [ ]:
In [ ]:
In [ ]: