In [1]:
import pandas as pd
import boto3
In [3]:
s3 = boto3.client("s3")
In [19]:
paginator = s3.get_paginator('list_objects_v2')
page_iterator = paginator.paginate(Bucket = 'dogfaces', Prefix = 'reviews/inter_reviews/reviews-')
In [20]:
resdf = pd.DataFrame()
for page in page_iterator:
for dfs in page['Contents']:
df_path = "s3://dogfaces/"+dfs['Key']
temp_df = pd.read_csv(df_path)
resdf = pd.concat([resdf, temp_df], axis=0)
In [24]:
resdf.shape
Out[24]:
In [22]:
resdf.head()
Out[22]:
In [23]:
resdf.columns
Out[23]:
In [25]:
review_df = resdf.to_csv(index=False)
s3_res = boto3.resource('s3')
s3_res.Bucket('dogfaces').put_object(Key='reviews/reviews.csv', Body=review_df)
Out[25]:
In [26]:
paginator = s3.get_paginator('list_objects_v2')
page_iterator = paginator.paginate(Bucket = 'dogfaces', Prefix = 'reviews/inter_reviews/pictures-')
In [28]:
resdf = pd.DataFrame()
for page in page_iterator:
for dfs in page['Contents']:
df_path = "s3://dogfaces/"+dfs['Key']
temp_df = pd.read_csv(df_path)
resdf = pd.concat([resdf, temp_df], axis=0)
In [29]:
resdf.shape
Out[29]:
In [31]:
pic_df = resdf.to_csv(index=False)
s3_res = boto3.resource('s3')
s3_res.Bucket('dogfaces').put_object(Key='reviews/pictures_log.csv', Body=pic_df)
Out[31]:
In [ ]: