Отзывы об интернет провайдерах, спарсенные с сайта: http://www.moskvaonline.ru/rating
In [2]:
import numpy as np
import pandas as pd
import matplotlib
%matplotlib inline
In [10]:
reviews = pd.read_csv('data/internet_reviews (1).csv')
In [11]:
reviews.head()
Out[11]:
In [12]:
reviews.shape
Out[12]:
In [13]:
reviews = reviews[~reviews.comment.duplicated()]
reviews.shape
Out[13]:
In [14]:
reviews.rating.value_counts()
Out[14]:
In [15]:
reviews[reviews.comment.apply(len) < 3000].comment.apply(len).hist(bins=50)
Out[15]:
In [8]:
# reviews = reviews[reviews.comment.apply(len) < 500]
In [23]:
reviews[reviews.comment.apply(lambda s: len(s.split()) < 200)].comment.apply(lambda s: len(s.split())).hist(bins=30)
Out[23]: