In [1]:
import pandas as pd
import boto3

In [3]:
s3 = boto3.client("s3")

Gather all reviews


In [19]:
paginator = s3.get_paginator('list_objects_v2')
page_iterator = paginator.paginate(Bucket = 'dogfaces', Prefix = 'reviews/inter_reviews/reviews-')

In [20]:
resdf = pd.DataFrame()
for page in page_iterator:
    for dfs in page['Contents']:
        df_path = "s3://dogfaces/"+dfs['Key']
        temp_df = pd.read_csv(df_path)
        resdf = pd.concat([resdf, temp_df], axis=0)

In [24]:
resdf.shape


Out[24]:
(61202, 7)

In [22]:
resdf.head()


Out[22]:
rating review_content review_id review_time toy_id toy_name user_name
0 5 My Fur Baby loves his Rope pull toy. I brings ... 185682550 Sep 4, 2017 108574 mammoth-cottonblend-5-knot-dog-rope Gunnerfavorite
1 5 We love this toy. I got the largest size for m... 185591533 Sep 1, 2017 108574 mammoth-cottonblend-5-knot-dog-rope Lindsay
2 4 this was a good product but not for very stron... 185522967 Aug 29, 2017 108574 mammoth-cottonblend-5-knot-dog-rope bcodpas3
3 5 The dogs aren't big chewers so the rope is hol... 185487336 Aug 28, 2017 108574 mammoth-cottonblend-5-knot-dog-rope SickPup
4 5 My Aussies love this rope. It's long enough th... 185484167 Aug 28, 2017 108574 mammoth-cottonblend-5-knot-dog-rope AussieMom

In [23]:
resdf.columns


Out[23]:
Index([u'rating', u'review_content', u'review_id', u'review_time', u'toy_id',
       u'toy_name', u'user_name'],
      dtype='object')

In [25]:
review_df = resdf.to_csv(index=False)
s3_res = boto3.resource('s3')
s3_res.Bucket('dogfaces').put_object(Key='reviews/reviews.csv', Body=review_df)


Out[25]:
s3.Object(bucket_name='dogfaces', key='reviews/reviews.csv')

Gather all pictures


In [26]:
paginator = s3.get_paginator('list_objects_v2')
page_iterator = paginator.paginate(Bucket = 'dogfaces', Prefix = 'reviews/inter_reviews/pictures-')

In [28]:
resdf = pd.DataFrame()
for page in page_iterator:
    for dfs in page['Contents']:
        df_path = "s3://dogfaces/"+dfs['Key']
        temp_df = pd.read_csv(df_path)
        resdf = pd.concat([resdf, temp_df], axis=0)

In [29]:
resdf.shape


Out[29]:
(2619, 7)

In [31]:
pic_df = resdf.to_csv(index=False)
s3_res = boto3.resource('s3')
s3_res.Bucket('dogfaces').put_object(Key='reviews/pictures_log.csv', Body=pic_df)


Out[31]:
s3.Object(bucket_name='dogfaces', key='reviews/pictures_log.csv')

In [ ]: