In [1]:
# Install the PyDrive wrapper & import libraries.
# This only needs to be done once in a notebook.
!pip install -U -q PyDrive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
# Authenticate and create the PyDrive client.
# This only needs to be done once in a notebook.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)
In [2]:
# Create & upload a text file.
uploaded = drive.CreateFile({'title': 'File2.txt'})
uploaded.SetContentString('Hello World')
uploaded.Upload()
print('Uploaded file with ID {}'.format(uploaded.get('id')))
In [12]:
# List .txt files in the root.
#
# Search query reference:
# https://developers.google.com/drive/v2/web/search-parameters
listed = drive.ListFile({'q': "title contains '.csv' and 'root' in parents"}).GetList()
for file in listed:
print('title {}, id {}'.format(file['title'], file['id']))
In [0]:
# Download a file based on its file ID.
#
# A file ID looks like: laggVyWshwcyP6kEI-y_W3P8D26sz
file_id = '1kE2SbrXuVLZcE0wnTCANZPsuMvej3YGz' # https://drive.google.com/open?id=1kE2SbrXuVLZcE0wnTCANZPsuMvej3YGz
downloaded = drive.CreateFile({'id': file_id})
print('Downloaded content "{}"'.format(downloaded.GetContentString()))
In [0]:
downloaded.GetContentFile('mobile_cleaned_local.csv')
In [7]:
!ls
In [0]:
import pandas as pd
In [0]:
df = pd.read_csv('mobile_cleaned_local.csv')
In [11]:
df.head()
Out[11]:
In [13]:
df.tail()
Out[13]:
In [14]:
type(df)
Out[14]:
In [0]:
dir(df)
In [16]:
len(df)
Out[16]:
In [17]:
df.shape
Out[17]:
In [18]:
df.loc[5]
Out[18]:
In [0]:
df_short = df[23:29]
In [23]:
df_short.shape
Out[23]:
In [24]:
df_short.head()
Out[24]:
In [0]:
df_thin = df[['stand_by_time', 'expandable_memory', 'price', 'battery_capacity', 'is_liked']]
In [26]:
df_thin.shape
Out[26]:
In [27]:
df_thin.head()
Out[27]:
In [0]:
df_liked = df_thin[df_thin['is_liked'] == 1]
In [29]:
df_liked.shape
Out[29]:
In [34]:
df_thin['price'].describe()
Out[34]:
In [35]:
df_thin.describe()
Out[35]:
In [36]:
df_thin[df_thin['is_liked'] == 1]['price'].mean()
Out[36]:
In [37]:
df_thin[df_thin['is_liked'] == 0]['price'].mean()
Out[37]:
In [0]:
g = df_thin.groupby(['is_liked'])
In [39]:
for key, df_key in g:
print(key)
print(df_key)
In [43]:
df_thin.groupby(['is_liked']).describe()
Out[43]:
In [0]:
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
In [45]:
ax = sns.pairplot(df_thin, diag_kind='hist')
In [46]:
ax = sns.pairplot(df_thin, diag_kind='hist', hue='is_liked')
In [0]:
import random
In [0]:
def factorial(x):
if (x == 0):
return 1
return x * factorial(x - 1)
In [49]:
factorial(5)
Out[49]:
In [0]:
def code_to_debug():
# import pdb; pdb.set_trace()
for i in range(10):
x = random.random()
factorial(x)
In [61]:
%xmode Verbose
In [62]:
code_to_debug()
In [0]:
def factorial_debugged(x):
if (not isinstance(x, int)):
print('This method only supports integers')
return -1
if (x == 0):
return 1
return x * factorial(x - 1)
In [0]:
def code_to_debug():
import pdb; pdb.set_trace()
for i in range(10):
x = random.random()
factorial_debugged(x)
In [58]:
code_to_debug()
In [0]: