Google Drive <-> Google Colaboratory



In [1]:

    
# Install the PyDrive wrapper & import libraries.
# This only needs to be done once in a notebook.
!pip install -U -q PyDrive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

# Authenticate and create the PyDrive client.
# This only needs to be done once in a notebook.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)









    



    100% |████████████████████████████████| 993kB 6.2MB/s 
  Building wheel for PyDrive (setup.py) ... done
Uploaded file with ID 1xSpkqokICQVUd1bhPnGq_zVlZYJDgmQb



In [2]:

    
# Create & upload a text file.
uploaded = drive.CreateFile({'title': 'File2.txt'})
uploaded.SetContentString('Hello World')
uploaded.Upload()
print('Uploaded file with ID {}'.format(uploaded.get('id')))









    



Uploaded file with ID 1U0LJtwyr29QpJ6lVeDdwm0JGn1ASRPCJ



In [12]:

    
# List .txt files in the root.
#
# Search query reference:
# https://developers.google.com/drive/v2/web/search-parameters
listed = drive.ListFile({'q': "title contains '.csv' and 'root' in parents"}).GetList()
for file in listed:
  print('title {}, id {}'.format(file['title'], file['id']))









    



title mobile_cleaned.csv, id 1kE2SbrXuVLZcE0wnTCANZPsuMvej3YGz



In [0]:

    
# Download a file based on its file ID.
#
# A file ID looks like: laggVyWshwcyP6kEI-y_W3P8D26sz
file_id = '1kE2SbrXuVLZcE0wnTCANZPsuMvej3YGz'  # https://drive.google.com/open?id=1kE2SbrXuVLZcE0wnTCANZPsuMvej3YGz
downloaded = drive.CreateFile({'id': file_id})
print('Downloaded content "{}"'.format(downloaded.GetContentString()))



In [0]:

    
downloaded.GetContentFile('mobile_cleaned_local.csv')



In [7]:

    
!ls









    



adc.json  mobile_cleaned_local.csv  sample_data



In [0]:

    
import pandas as pd

Pandas



In [0]:

    
df = pd.read_csv('mobile_cleaned_local.csv')



In [11]:

    
df.head()









    Out[11]:







  
    
      
      sim_type
      aperture
      gpu_rank
      weight
      stand_by_time
      processor_frequency
      thickness
      flash_type
      front_camera_resolution
      auto_focus
      ...
      micro_sd_slot
      screen_pixel_density
      water_proof_rate
      phone_width
      expandable_memory
      version
      usb_type
      battery_capacity
      processor_rank
      is_liked
    
  
  
    
      0
      0
      12
      55
      155.0
      250
      1.3
      10.5
      5
      2.00
      0
      ...
      4
      2
      3
      67.8
      64.0
      5
      3
      2000
      125
      1
    
    
      1
      0
      1
      55
      132.0
      300
      1.3
      10.6
      5
      0.30
      1
      ...
      4
      4
      6
      64.0
      32.0
      5
      3
      2000
      165
      1
    
    
      2
      0
      9
      55
      142.0
      329
      1.5
      8.5
      5
      2.00
      3
      ...
      4
      9
      6
      72.0
      32.0
      6
      3
      2500
      164
      0
    
    
      3
      0
      8
      55
      152.0
      385
      1.3
      8.0
      5
      2.00
      3
      ...
      4
      1
      3
      75.1
      32.0
      6
      3
      3000
      165
      1
    
    
      4
      1
      1
      55
      234.0
      385
      1.3
      7.9
      5
      1.92
      3
      ...
      4
      1
      6
      91.0
      32.0
      6
      3
      3000
      165
      0
    
  

5 rows × 40 columns



In [13]:

    
df.tail()









    Out[13]:







  
    
      
      sim_type
      aperture
      gpu_rank
      weight
      stand_by_time
      processor_frequency
      thickness
      flash_type
      front_camera_resolution
      auto_focus
      ...
      micro_sd_slot
      screen_pixel_density
      water_proof_rate
      phone_width
      expandable_memory
      version
      usb_type
      battery_capacity
      processor_rank
      is_liked
    
  
  
    
      104
      3
      10
      14
      192.0
      540
      1.8
      9.4
      2
      2.0
      2
      ...
      4
      29
      6
      77.2
      256.0
      6
      3
      3410
      8
      1
    
    
      105
      0
      5
      3
      157.0
      400
      2.3
      7.7
      5
      5.0
      3
      ...
      6
      27
      1
      72.6
      200.0
      9
      3
      3600
      101
      1
    
    
      106
      3
      10
      6
      192.0
      384
      1.8
      7.3
      2
      5.0
      3
      ...
      1
      16
      6
      77.9
      0.0
      14
      13
      2750
      1
      1
    
    
      107
      3
      10
      12
      129.0
      250
      1.4
      6.9
      2
      1.2
      3
      ...
      1
      13
      6
      67.0
      0.0
      12
      13
      1810
      7
      1
    
    
      108
      2
      8
      3
      158.0
      400
      2.2
      7.4
      6
      8.0
      3
      ...
      5
      16
      6
      74.7
      0.0
      10
      12
      3000
      2
      0
    
  

5 rows × 40 columns



In [14]:

    
type(df)









    Out[14]:





pandas.core.frame.DataFrame



In [0]:

    
dir(df)



In [16]:

    
len(df)









    Out[16]:





109



In [17]:

    
df.shape









    Out[17]:





(109, 40)



In [18]:

    
df.loc[5]









    Out[18]:





sim_type                                0.0
aperture                               14.0
gpu_rank                               55.0
weight                                179.0
stand_by_time                         280.0
processor_frequency                     1.3
thickness                               7.9
flash_type                              5.0
front_camera_resolution                 5.0
auto_focus                              3.0
screen_size                             5.5
frames_per_second                      30.0
FM                                      3.0
no_of_reviews_in_gsmarena_in_week       6.0
os                                      0.0
phone_height                          150.0
screen_protection                       5.0
sim_size                                3.0
price                                5999.0
talk_time                              22.0
video_resolution                      720.0
display_resolution                      0.0
removable_battery                       0.0
display_type                            2.0
primary_camera_resolution               8.0
battery_type                            1.0
ram_memory                              1.0
internal_memory                         7.0
brand_rank                              4.0
no_of_cores                             6.0
micro_sd_slot                           4.0
screen_pixel_density                    7.0
water_proof_rate                        3.0
phone_width                            71.0
expandable_memory                      32.0
version                                 6.0
usb_type                                3.0
battery_capacity                     2900.0
processor_rank                        165.0
is_liked                                0.0
Name: 5, dtype: float64



In [0]:

    
df_short = df[23:29]



In [23]:

    
df_short.shape









    Out[23]:





(6, 40)



In [24]:

    
df_short.head()









    Out[24]:







  
    
      
      sim_type
      aperture
      gpu_rank
      weight
      stand_by_time
      processor_frequency
      thickness
      flash_type
      front_camera_resolution
      auto_focus
      ...
      micro_sd_slot
      screen_pixel_density
      water_proof_rate
      phone_width
      expandable_memory
      version
      usb_type
      battery_capacity
      processor_rank
      is_liked
    
  
  
    
      23
      3
      1
      43
      97.0
      345
      1.2
      5.1
      5
      5.0
      3
      ...
      2
      10
      2
      68.1
      0.0
      5
      3
      2000
      94
      1
    
    
      24
      0
      10
      29
      150.0
      322
      1.5
      8.2
      5
      5.0
      3
      ...
      6
      23
      6
      71.0
      32.0
      6
      4
      2750
      21
      1
    
    
      25
      0
      8
      43
      202.0
      914
      1.2
      10.6
      2
      5.0
      3
      ...
      6
      7
      6
      77.5
      64.0
      5
      4
      5000
      94
      1
    
    
      26
      0
      8
      43
      170.0
      456
      1.2
      10.8
      2
      5.0
      3
      ...
      4
      7
      6
      77.2
      128.0
      5
      3
      3000
      94
      1
    
    
      27
      0
      8
      38
      155.0
      350
      1.3
      9.3
      5
      5.0
      3
      ...
      6
      7
      6
      77.0
      64.0
      6
      4
      3000
      19
      1
    
  

5 rows × 40 columns



In [0]:

    
df_thin = df[['stand_by_time', 'expandable_memory', 'price', 'battery_capacity', 'is_liked']]



In [26]:

    
df_thin.shape









    Out[26]:





(109, 5)



In [27]:

    
df_thin.head()









    Out[27]:







  
    
      
      stand_by_time
      expandable_memory
      price
      battery_capacity
      is_liked
    
  
  
    
      0
      250
      64.0
      3870
      2000
      1
    
    
      1
      300
      32.0
      4059
      2000
      1
    
    
      2
      329
      32.0
      4777
      2500
      0
    
    
      3
      385
      32.0
      5799
      3000
      1
    
    
      4
      385
      32.0
      5990
      3000
      0



In [0]:

    
df_liked = df_thin[df_thin['is_liked'] == 1]



In [29]:

    
df_liked.shape









    Out[29]:





(92, 5)



In [34]:

    
df_thin['price'].describe()









    Out[34]:





count      109.000000
mean     19373.211009
std      14039.197220
min       3870.000000
25%       8999.000000
50%      14614.000000
75%      24999.000000
max      64500.000000
Name: price, dtype: float64



In [35]:

    
df_thin.describe()









    Out[35]:







  
    
      
      stand_by_time
      expandable_memory
      price
      battery_capacity
      is_liked
    
  
  
    
      count
      109.00000
      109.000000
      109.000000
      109.000000
      109.000000
    
    
      mean
      404.66055
      104.513761
      19373.211009
      2841.779817
      0.844037
    
    
      std
      176.44206
      275.799767
      14039.197220
      655.003963
      0.364496
    
    
      min
      160.00000
      0.000000
      3870.000000
      1560.000000
      0.000000
    
    
      25%
      264.00000
      0.000000
      8999.000000
      2470.000000
      1.000000
    
    
      50%
      360.00000
      32.000000
      14614.000000
      2900.000000
      1.000000
    
    
      75%
      500.00000
      128.000000
      24999.000000
      3100.000000
      1.000000
    
    
      max
      1093.00000
      2048.000000
      64500.000000
      5000.000000
      1.000000



In [36]:

    
df_thin[df_thin['is_liked'] == 1]['price'].mean()









    Out[36]:





19393.239130434784



In [37]:

    
df_thin[df_thin['is_liked'] == 0]['price'].mean()









    Out[37]:





19264.823529411766



In [0]:

    
g = df_thin.groupby(['is_liked'])



In [39]:

    
for key, df_key in g:
  print(key)
  print(df_key)









    



0
     stand_by_time  expandable_memory  price  battery_capacity  is_liked
2              329               32.0   4777              2500         0
4              385               32.0   5990              3000         0
5              280               32.0   5999              2900         0
11             300              128.0   6990              2600         0
22             354              128.0   7999              2400         0
38             490               32.0   9999              2100         0
53             345               64.0  14300              2950         0
60             840                0.0  15689              4100         0
74             390              128.0  21999              2800         0
77             620              128.0  22999              3100         0
78             618                0.0  24499              3600         0
80             598                0.0  24999              3000         0
83             504              200.0  25500              2600         0
90             500                0.0  34999              3760         0
91             240                0.0  34999              1624         0
95             580              256.0  37766              2840         0
108            400                0.0  27999              3000         0
1
     stand_by_time  expandable_memory  price  battery_capacity  is_liked
0              250               64.0   3870              2000         1
1              300               32.0   4059              2000         1
3              385               32.0   5799              3000         1
6              230              128.0   5999              1700         1
7              182               32.0   6599              2000         1
8              182               32.0   6599              2000         1
9              435               32.0   6649              3000         1
10             514               32.0   6749              4000         1
12             280               32.0   6999              2500         1
13             198               32.0   6999              2200         1
14             200               32.0   6999              2500         1
15             680              256.0   6999              2500         1
16             576              128.0   7340              2200         1
17             264               32.0   7499              2300         1
18             180              128.0   7590              2000         1
19             160               32.0   7790              2230         1
20             450               32.0   7899              4000         1
21             264               32.0   7914              2900         1
23             345                0.0   8490              2000         1
24             322               32.0   8499              2750         1
25             914               64.0   8999              5000         1
26             456              128.0   8999              3000         1
27             350               64.0   8999              3000         1
28             617               32.0   9399              2100         1
29             775               32.0   9499              3100         1
30             218              128.0   9700              2420         1
31             270               32.0   9715              2900         1
32             250               32.0   9999              2470         1
33             264                0.0   9999              4050         1
34             265                0.0   9999              4050         1
..             ...                ...    ...               ...       ...
70             170              128.0  19890              2600         1
71             360                0.0  19999              3100         1
72             250                0.0  20397              1560         1
73             687              128.0  21300              2600         1
75             635              200.0  21999              2930         1
76             360                0.0  22999              3300         1
79             180              128.0  24900              2900         1
81             410              128.0  24999              3000         1
82             590              128.0  25500              2930         1
84             250              128.0  27580              2850         1
85             200              128.0  29900              3220         1
86             420               64.0  29990              3200         1
87             410             2048.0  30947              3000         1
88             440                0.0  31999              3450         1
89             354                0.0  33900              2550         1
92             360                0.0  35900              2700         1
93             250                0.0  36499              1810         1
94             250                0.0  36999              1810         1
96             410               32.0  38000              3000         1
97             340              256.0  39890              2900         1
98             362               64.0  40900              2600         1
99             250                0.0  48329              1810         1
100            242              200.0  48900              3000         1
101            240                0.0  49499              1715         1
102            330                0.0  50895              3450         1
103            600              256.0  52699              3430         1
104            540              256.0  54900              3410         1
105            400              200.0  56900              3600         1
106            384                0.0  59000              2750         1
107            250                0.0  64500              1810         1

[92 rows x 5 columns]



In [43]:

    
df_thin.groupby(['is_liked']).describe()









    Out[43]:







  
    
      
      battery_capacity
      expandable_memory
      ...
      price
      stand_by_time
    
    
      
      count
      mean
      std
      min
      25%
      50%
      75%
      max
      count
      mean
      ...
      75%
      max
      count
      mean
      std
      min
      25%
      50%
      75%
      max
    
    
      is_liked
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      0
      17.0
      2874.941176
      591.777035
      1624.0
      2600.0
      2900.0
      3000.0
      4100.0
      17.0
      68.235294
      ...
      25500.00
      37766.0
      17.0
      457.235294
      157.094927
      240.0
      345.00
      400.0
      580.0
      840.0
    
    
      1
      92.0
      2835.652174
      668.850998
      1560.0
      2457.5
      2900.0
      3100.0
      5000.0
      92.0
      111.217391
      ...
      24924.75
      64500.0
      92.0
      394.945652
      178.876246
      160.0
      258.25
      354.0
      482.0
      1093.0
    
  

2 rows × 32 columns



In [0]:

    
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()



In [45]:

    
ax = sns.pairplot(df_thin, diag_kind='hist')



In [46]:

    
ax = sns.pairplot(df_thin, diag_kind='hist', hue='is_liked')

Debugging



In [0]:

    
import random



In [0]:

    
def factorial(x):
  if (x == 0):
    return 1
  return x * factorial(x - 1)



In [49]:

    
factorial(5)









    Out[49]:





120



In [0]:

    
def code_to_debug():
  # import pdb; pdb.set_trace()
  
  for i in range(10):
    x = random.random()
    factorial(x)



In [61]:

    
%xmode Verbose









    



Exception reporting mode: Verbose



In [62]:

    
code_to_debug()









    



---------------------------------------------------------------------------
RecursionError                            Traceback (most recent call last)
<ipython-input-62-35361d661c6e> in <module>()
----> 1 code_to_debug()
        global code_to_debug = <function code_to_debug at 0x7fdc1ead88c8>

<ipython-input-59-84611e850098> in code_to_debug()
      4   for i in range(10):
      5     x = random.random()
----> 6     factorial(x)
        global factorial = <function factorial at 0x7fdc1ead89d8>
        x = 0.9542626647624946

<ipython-input-47-6a1d5582b0f3> in factorial(x=0.9542626647624946)
      2   if (x == 0):
      3     return 1
----> 4   return x * factorial(x - 1)
        x = 0.9542626647624946
        global factorial = <function factorial at 0x7fdc1ead89d8>

... last 1 frames repeated, from the frame below ...

<ipython-input-47-6a1d5582b0f3> in factorial(x=-0.04573733523750545)
      2   if (x == 0):
      3     return 1
----> 4   return x * factorial(x - 1)
        x = -0.04573733523750545
        global factorial = <function factorial at 0x7fdc1ead89d8>

RecursionError: maximum recursion depth exceeded in comparison



In [0]:

    
def factorial_debugged(x):
  if (not isinstance(x, int)):
    print('This method only supports integers')
    return -1
  if (x == 0):
    return 1
  return x * factorial(x - 1)



In [0]:

    
def code_to_debug():
  import pdb; pdb.set_trace()
  
  for i in range(10):
    x = random.random()
    factorial_debugged(x)



In [58]:

    
code_to_debug()









    



> <ipython-input-57-3364bd0836cb>(4)code_to_debug()
-> for i in range(10):
(Pdb) ?

Documented commands (type help <topic>):
========================================
EOF    c          d        h         list      q        rv       undisplay
a      cl         debug    help      ll        quit     s        unt      
alias  clear      disable  ignore    longlist  r        source   until    
args   commands   display  interact  n         restart  step     up       
b      condition  down     j         next      return   tbreak   w        
break  cont       enable   jump      p         retval   u        whatis   
bt     continue   exit     l         pp        run      unalias  where    

Miscellaneous help topics:
==========================
exec  pdb

(Pdb) help c
c(ont(inue))
        Continue execution, only stop when a breakpoint is encountered.
(Pdb) c
This method only supports integers
This method only supports integers
This method only supports integers
This method only supports integers
This method only supports integers
This method only supports integers
This method only supports integers
This method only supports integers
This method only supports integers
This method only supports integers



In [0]:

	sim_type	aperture	gpu_rank	weight	stand_by_time	processor_frequency	thickness	flash_type	front_camera_resolution	auto_focus	...	micro_sd_slot	screen_pixel_density	water_proof_rate	phone_width	expandable_memory	version	usb_type	battery_capacity	processor_rank	is_liked
0	0	12	55	155.0	250	1.3	10.5	5	2.00	0	...	4	2	3	67.8	64.0	5	3	2000	125	1
1	0	1	55	132.0	300	1.3	10.6	5	0.30	1	...	4	4	6	64.0	32.0	5	3	2000	165	1
2	0	9	55	142.0	329	1.5	8.5	5	2.00	3	...	4	9	6	72.0	32.0	6	3	2500	164	0
3	0	8	55	152.0	385	1.3	8.0	5	2.00	3	...	4	1	3	75.1	32.0	6	3	3000	165	1
4	1	1	55	234.0	385	1.3	7.9	5	1.92	3	...	4	1	6	91.0	32.0	6	3	3000	165	0

	sim_type	aperture	gpu_rank	weight	stand_by_time	processor_frequency	thickness	flash_type	front_camera_resolution	auto_focus	...	micro_sd_slot	screen_pixel_density	water_proof_rate	phone_width	expandable_memory	version	usb_type	battery_capacity	processor_rank	is_liked
104	3	10	14	192.0	540	1.8	9.4	2	2.0	2	...	4	29	6	77.2	256.0	6	3	3410	8	1
105	0	5	3	157.0	400	2.3	7.7	5	5.0	3	...	6	27	1	72.6	200.0	9	3	3600	101	1
106	3	10	6	192.0	384	1.8	7.3	2	5.0	3	...	1	16	6	77.9	0.0	14	13	2750	1	1
107	3	10	12	129.0	250	1.4	6.9	2	1.2	3	...	1	13	6	67.0	0.0	12	13	1810	7	1
108	2	8	3	158.0	400	2.2	7.4	6	8.0	3	...	5	16	6	74.7	0.0	10	12	3000	2	0

	sim_type	aperture	gpu_rank	weight	stand_by_time	processor_frequency	thickness	flash_type	front_camera_resolution	auto_focus	...	micro_sd_slot	screen_pixel_density	water_proof_rate	phone_width	expandable_memory	version	usb_type	battery_capacity	processor_rank	is_liked
23	3	1	43	97.0	345	1.2	5.1	5	5.0	3	...	2	10	2	68.1	0.0	5	3	2000	94	1
24	0	10	29	150.0	322	1.5	8.2	5	5.0	3	...	6	23	6	71.0	32.0	6	4	2750	21	1
25	0	8	43	202.0	914	1.2	10.6	2	5.0	3	...	6	7	6	77.5	64.0	5	4	5000	94	1
26	0	8	43	170.0	456	1.2	10.8	2	5.0	3	...	4	7	6	77.2	128.0	5	3	3000	94	1
27	0	8	38	155.0	350	1.3	9.3	5	5.0	3	...	6	7	6	77.0	64.0	6	4	3000	19	1

	stand_by_time	expandable_memory	price	battery_capacity	is_liked
0	250	64.0	3870	2000	1
1	300	32.0	4059	2000	1
2	329	32.0	4777	2500	0
3	385	32.0	5799	3000	1
4	385	32.0	5990	3000	0

	stand_by_time	expandable_memory	price	battery_capacity	is_liked
count	109.00000	109.000000	109.000000	109.000000	109.000000
mean	404.66055	104.513761	19373.211009	2841.779817	0.844037
std	176.44206	275.799767	14039.197220	655.003963	0.364496
min	160.00000	0.000000	3870.000000	1560.000000	0.000000
25%	264.00000	0.000000	8999.000000	2470.000000	1.000000
50%	360.00000	32.000000	14614.000000	2900.000000	1.000000
75%	500.00000	128.000000	24999.000000	3100.000000	1.000000
max	1093.00000	2048.000000	64500.000000	5000.000000	1.000000