notebook.community

Edit and run



In [88]:

    
cd "/home/bakuda/pandas-book/"









    



/home/bakuda/pandas-book



In [89]:

    
path = 'ch02/usagov_bitly_data2012-03-16-1331923249.txt'



In [90]:

    
open(path).readline()









    Out[90]:





'{ "a": "Mozilla\\/5.0 (Windows NT 6.1; WOW64) AppleWebKit\\/535.11 (KHTML, like Gecko) Chrome\\/17.0.963.78 Safari\\/535.11", "c": "US", "nk": 1, "tz": "America\\/New_York", "gr": "MA", "g": "A6qOVH", "h": "wfLQtf", "l": "orofrog", "al": "en-US,en;q=0.8", "hh": "1.usa.gov", "r": "http:\\/\\/www.facebook.com\\/l\\/7AQEFzjSi\\/1.usa.gov\\/wfLQtf", "u": "http:\\/\\/www.ncbi.nlm.nih.gov\\/pubmed\\/22415991", "t": 1331923247, "hc": 1331822918, "cy": "Danvers", "ll": [ 42.576698, -70.954903 ] }\n'



In [91]:

    
import json
records = [json.loads(line) for line in open(path)]



In [92]:

    
records[:1]









    Out[92]:





[{u'a': u'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.78 Safari/535.11',
  u'al': u'en-US,en;q=0.8',
  u'c': u'US',
  u'cy': u'Danvers',
  u'g': u'A6qOVH',
  u'gr': u'MA',
  u'h': u'wfLQtf',
  u'hc': 1331822918,
  u'hh': u'1.usa.gov',
  u'l': u'orofrog',
  u'll': [42.576698, -70.954903],
  u'nk': 1,
  u'r': u'http://www.facebook.com/l/7AQEFzjSi/1.usa.gov/wfLQtf',
  u't': 1331923247,
  u'tz': u'America/New_York',
  u'u': u'http://www.ncbi.nlm.nih.gov/pubmed/22415991'}]



In [14]:

    
records[0]['tz']









    Out[14]:





u'America/New_York'



In [16]:

    
tzs = [rec['tz'] for rec in records if 'tz' in rec]



In [18]:

    
tzs[:5]









    Out[18]:





[u'America/New_York',
 u'America/Denver',
 u'America/New_York',
 u'America/Sao_Paulo',
 u'America/New_York']



In [19]:

    
type(tzs)









    Out[19]:





list



In [20]:

    
len(tzs)









    Out[20]:





3440



In [21]:

    
len(set(tzs))









    Out[21]:





97



In [22]:

    
%rmagic









    



ERROR: Line magic function `%rmagic` not found.



In [23]:

    
tzs[:10]









    Out[23]:





[u'America/New_York',
 u'America/Denver',
 u'America/New_York',
 u'America/Sao_Paulo',
 u'America/New_York',
 u'America/New_York',
 u'Europe/Warsaw',
 u'',
 u'',
 u'']



In [25]:

    
from collections import Counter



In [26]:

    
counts = Counter(tzs)



In [28]:

    
counts.most_common(10)









    Out[28]:





[(u'America/New_York', 1251),
 (u'', 521),
 (u'America/Chicago', 400),
 (u'America/Los_Angeles', 382),
 (u'America/Denver', 191),
 (u'Europe/London', 74),
 (u'Asia/Tokyo', 37),
 (u'Pacific/Honolulu', 36),
 (u'Europe/Madrid', 35),
 (u'America/Sao_Paulo', 33)]



In [93]:

    
import pandas as pd



In [94]:

    
df = pd.DataFrame(records)



In [95]:

    
df.shape









    Out[95]:





(3560, 18)



In [96]:

    
df['tz'][:10]









    Out[96]:





0     America/New_York
1       America/Denver
2     America/New_York
3    America/Sao_Paulo
4     America/New_York
5     America/New_York
6        Europe/Warsaw
7                     
8                     
9                     
Name: tz, dtype: object



In [35]:

    
tz_counts = df['tz'].value_counts()



In [36]:

    
tz_counts[:10]









    Out[36]:





America/New_York       1251
                        521
America/Chicago         400
America/Los_Angeles     382
America/Denver          191
Europe/London            74
Asia/Tokyo               37
Pacific/Honolulu         36
Europe/Madrid            35
America/Sao_Paulo        33
dtype: int64



In [37]:

    
clean_tz = df['tz'].fillna('Missing')



In [38]:

    
clean_tz[clean_tz=='']='Unknown'



In [39]:

    
tz_counts = clean_tz.value_counts()



In [40]:

    
tz_counts[:10]









    Out[40]:





America/New_York       1251
Unknown                 521
America/Chicago         400
America/Los_Angeles     382
America/Denver          191
Missing                 120
Europe/London            74
Asia/Tokyo               37
Pacific/Honolulu         36
Europe/Madrid            35
dtype: int64



In [41]:

    
tz_counts[:10].plot(kind='bar', rot=0)









    Out[41]:





<matplotlib.axes.AxesSubplot at 0xb273d6c>



In [42]:

    
tz_counts[:10].plot(kind='barh', rot=0)









    Out[42]:





<matplotlib.axes.AxesSubplot at 0xb228cac>



In [43]:

    
df['a'][:5]









    Out[43]:





0    Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKi...
1                               GoogleMaps/RochesterNY
2    Mozilla/4.0 (compatible; MSIE 8.0; Windows NT ...
3    Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8)...
4    Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKi...
Name: a, dtype: object



In [44]:

    
s = df['a'][0]



In [45]:

    
s









    Out[45]:





u'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.78 Safari/535.11'



In [47]:

    
s.split()[0]









    Out[47]:





u'Mozilla/5.0'



In [53]:

    
results = pd.Series([x.split()[0] for x in df['a'].dropna()])



In [52]:

    
#df.tz
# same as below
df['tz'][:5]









    Out[52]:





0     America/New_York
1       America/Denver
2     America/New_York
3    America/Sao_Paulo
4     America/New_York
Name: tz, dtype: object



In [54]:

    
results[:5]









    Out[54]:





0               Mozilla/5.0
1    GoogleMaps/RochesterNY
2               Mozilla/4.0
3               Mozilla/5.0
4               Mozilla/5.0
dtype: object



In [55]:

    
%load_ext rmagic



In [57]:

    
%%R
install.packages("nycflights13", repos='http://cran.us.r‐project.org')









    



/usr/local/lib/python2.7/dist-packages/ipython-1.1.0-py2.7.egg/IPython/extensions/rmagic.py:194: UserWarning: Installing package into ‘/home/bakuda/R/i686-pc-linux-gnu-library/3.2’
(as ‘lib’ is unspecified)

  value = ri.baseenv['eval'](ri.parse(line))



In [58]:

    
%%R
library(nycflights13)
write.csv(flights, "flights.csv")



In [59]:

    
flights = pd.read_csv("flights.csv", index_col=0)



In [60]:

    
flights.shape









    Out[60]:





(336776, 16)



In [62]:

    
%%R
head(flights)



In [63]:

    
%%R
search()



In [100]:

    
#flights.head()



In [66]:

    
df.loc[:2,:]









    Out[66]:






  
    
      
      _heartbeat_
      a
      al
      c
      cy
      g
      gr
      h
      hc
      hh
      kw
      l
      ll
      nk
      r
      t
      tz
      u
    
  
  
    
      0
      NaN
       Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKi...
       en-US,en;q=0.8
       US
          Danvers
       A6qOVH
       MA
       wfLQtf
       1331822918
       1.usa.gov
       NaN
       orofrog
        [42.576698, -70.954903]
       1
       http://www.facebook.com/l/7AQEFzjSi/1.usa.gov/...
       1331923247
       America/New_York
             http://www.ncbi.nlm.nih.gov/pubmed/22415991
    
    
      1
      NaN
                                  GoogleMaps/RochesterNY
                  NaN
       US
            Provo
       mwszkS
       UT
       mwszkS
       1308262393
            j.mp
       NaN
         bitly
       [40.218102, -111.613297]
       0
                                http://www.AwareMap.com/
       1331923249
         America/Denver
             http://www.monroecounty.gov/etc/911/rss.php
    
    
      2
      NaN
       Mozilla/4.0 (compatible; MSIE 8.0; Windows NT ...
                en-US
       US
       Washington
       xxr3Qb
       DC
       xxr3Qb
       1331919941
       1.usa.gov
       NaN
         bitly
          [38.9007, -77.043098]
       1
                                    http://t.co/03elZC4Q
       1331923250
       America/New_York
       http://boxer.senate.gov/en/press/releases/0316...
    
  

3 rows × 18 columns



In [67]:

    
type(records)









    Out[67]:





list



In [68]:

    
type(records[0])









    Out[68]:





dict



In [69]:

    
df.columns









    Out[69]:





Index([u'_heartbeat_', u'a', u'al', u'c', u'cy', u'g', u'gr', u'h', u'hc', u'hh', u'kw', u'l', u'll', u'nk', u'r', u't', u'tz', u'u'], dtype='object')



In [74]:

    
df.shape









    Out[74]:





(3560, 18)



In [81]:

    
df.to_csv('/home/bakuda/pandas-book/ch02/bitly-data-for-R.csv', encoding='utf8', header=True, sep=',')



In [82]:

    
df = pd.DataFrame(records)



In [99]:

    
#df.loc[10:18]



In [103]:

    
results.value_counts()[:10].plot(kind='barh')









    Out[103]:





<matplotlib.axes.AxesSubplot at 0xe0531ec>



In [104]:

    
cdf = df[df['a'].notnull()]



In [105]:

    
cdf.shape









    Out[105]:





(3440, 18)



In [106]:

    
cdf.groupby('a')









    Out[106]:





<pandas.core.groupby.DataFrameGroupBy object at 0xd43caec>



In [111]:

    
results.value_counts()[:20].plot(kind='barh')









    Out[111]:





<matplotlib.axes.AxesSubplot at 0x1dbed58c>



In [112]:

    
os_types = np.where(cdf['a'].str.contains('Windows'), 'Windows', 'Not Windows')



In [114]:

    
os_types[:5]









    Out[114]:





array(['Windows', 'Not Windows', 'Windows', 'Not Windows', 'Windows'], 
      dtype='|S11')



In [116]:

    
by_tz_os = cdf.groupby(['tz',os_types])



In [118]:

    
by_tz_os.size()









    Out[118]:





tz                                         
                                Not Windows    245
                                Windows        276
Africa/Cairo                    Windows          3
Africa/Casablanca               Windows          1
Africa/Ceuta                    Windows          2
Africa/Johannesburg             Windows          1
Africa/Lusaka                   Windows          1
America/Anchorage               Not Windows      4
                                Windows          1
America/Argentina/Buenos_Aires  Not Windows      1
America/Argentina/Cordoba       Windows          1
America/Argentina/Mendoza       Windows          1
America/Bogota                  Not Windows      1
                                Windows          2
America/Caracas                 Windows          1
...
Europe/Skopje     Windows         1
Europe/Sofia      Windows         1
Europe/Stockholm  Not Windows     2
                  Windows        12
Europe/Uzhgorod   Windows         1
Europe/Vienna     Not Windows     3
                  Windows         3
Europe/Vilnius    Windows         2
Europe/Volgograd  Windows         1
Europe/Warsaw     Not Windows     1
                  Windows        15
Europe/Zurich     Not Windows     4
Pacific/Auckland  Not Windows     3
                  Windows         8
Pacific/Honolulu  Windows        36
Length: 149, dtype: int64



In [119]:

    
agg_counts = by_tz_os.size().unstack().fillna(0)



In [120]:

    
agg_counts[:10]









    Out[120]:






  
    
      
      Not Windows
      Windows
    
    
      tz
      
      
    
  
  
    
      
       245
       276
    
    
      Africa/Cairo
         0
         3
    
    
      Africa/Casablanca
         0
         1
    
    
      Africa/Ceuta
         0
         2
    
    
      Africa/Johannesburg
         0
         1
    
    
      Africa/Lusaka
         0
         1
    
    
      America/Anchorage
         4
         1
    
    
      America/Argentina/Buenos_Aires
         1
         0
    
    
      America/Argentina/Cordoba
         0
         1
    
    
      America/Argentina/Mendoza
         0
         1
    
  

10 rows × 2 columns



In [121]:

    
# Unstack is like R::tidyr:spread()
by_tz_os.size().unstack()









    Out[121]:






  
    
      
      Not Windows
      Windows
    
    
      tz
      
      
    
  
  
    
      
       245
       276
    
    
      Africa/Cairo
       NaN
         3
    
    
      Africa/Casablanca
       NaN
         1
    
    
      Africa/Ceuta
       NaN
         2
    
    
      Africa/Johannesburg
       NaN
         1
    
    
      Africa/Lusaka
       NaN
         1
    
    
      America/Anchorage
         4
         1
    
    
      America/Argentina/Buenos_Aires
         1
       NaN
    
    
      America/Argentina/Cordoba
       NaN
         1
    
    
      America/Argentina/Mendoza
       NaN
         1
    
    
      America/Bogota
         1
         2
    
    
      America/Caracas
       NaN
         1
    
    
      America/Chicago
       115
       285
    
    
      America/Chihuahua
         1
         1
    
    
      America/Costa_Rica
       NaN
         1
    
    
      America/Denver
       132
        59
    
    
      America/Edmonton
         2
         4
    
    
      America/Guayaquil
         2
       NaN
    
    
      America/Halifax
         1
         3
    
    
      America/Indianapolis
         8
        12
    
    
      America/La_Paz
       NaN
         1
    
    
      America/Lima
       NaN
         1
    
    
      America/Los_Angeles
       130
       252
    
    
      America/Managua
       NaN
         3
    
    
      America/Mazatlan
         1
       NaN
    
    
      America/Mexico_City
         7
         8
    
    
      America/Monterrey
         1
       NaN
    
    
      America/Montevideo
       NaN
         1
    
    
      America/Montreal
         3
         6
    
    
      America/New_York
       339
       912
    
    
      America/Phoenix
         3
        17
    
    
      America/Puerto_Rico
         9
         1
    
    
      America/Rainy_River
        10
        15
    
    
      America/Recife
       NaN
         2
    
    
      America/Santo_Domingo
         1
       NaN
    
    
      America/Sao_Paulo
        13
        20
    
    
      America/St_Kitts
         1
       NaN
    
    
      America/Tegucigalpa
       NaN
         1
    
    
      America/Vancouver
         6
         6
    
    
      America/Winnipeg
         3
         1
    
    
      Asia/Amman
       NaN
         2
    
    
      Asia/Bangkok
       NaN
         6
    
    
      Asia/Beirut
         1
         3
    
    
      Asia/Calcutta
         2
         7
    
    
      Asia/Dubai
         2
         2
    
    
      Asia/Harbin
       NaN
         3
    
    
      Asia/Hong_Kong
         4
         6
    
    
      Asia/Istanbul
       NaN
         9
    
    
      Asia/Jakarta
         2
         1
    
    
      Asia/Jerusalem
         2
         1
    
    
      Asia/Karachi
       NaN
         3
    
    
      Asia/Kuala_Lumpur
         1
         2
    
    
      Asia/Kuching
       NaN
         1
    
    
      Asia/Manila
       NaN
         1
    
    
      Asia/Nicosia
       NaN
         1
    
    
      Asia/Novosibirsk
       NaN
         1
    
    
      Asia/Pontianak
         1
       NaN
    
    
      Asia/Riyadh
       NaN
         1
    
    
      Asia/Seoul
         4
         1
    
    
      Asia/Tokyo
         2
        35
    
    
      
      ...
      ...
    
  

97 rows × 2 columns



In [123]:

    
cdf.groupby(['tz',os_types]).size().unstack().fillna(0)[:5]









    Out[123]:






  
    
      
      Not Windows
      Windows
    
    
      tz
      
      
    
  
  
    
      
       245
       276
    
    
      Africa/Cairo
         0
         3
    
    
      Africa/Casablanca
         0
         1
    
    
      Africa/Ceuta
         0
         2
    
    
      Africa/Johannesburg
         0
         1
    
  

5 rows × 2 columns



In [124]:

    
indexer = agg_counts.sum(1).argsort()



In [125]:

    
indexer[:5]









    Out[125]:





tz
                       24
Africa/Cairo           20
Africa/Casablanca      21
Africa/Ceuta           92
Africa/Johannesburg    87
dtype: int64



In [126]:

    
count_subset = agg_counts.take(indexer)[-10:]



In [127]:

    
count_subset









    Out[127]:






  
    
      
      Not Windows
      Windows
    
    
      tz
      
      
    
  
  
    
      America/Sao_Paulo
        13
        20
    
    
      Europe/Madrid
        16
        19
    
    
      Pacific/Honolulu
         0
        36
    
    
      Asia/Tokyo
         2
        35
    
    
      Europe/London
        43
        31
    
    
      America/Denver
       132
        59
    
    
      America/Los_Angeles
       130
       252
    
    
      America/Chicago
       115
       285
    
    
      
       245
       276
    
    
      America/New_York
       339
       912
    
  

10 rows × 2 columns



In [130]:

    
count_subset.plot(kind='barh', stacked=True)









    Out[130]:





<matplotlib.axes.AxesSubplot at 0x1cd3e0cc>



In [146]:

    
cd "/home/bakuda/pandas-book/ch02/movielens/"









    



/home/bakuda/pandas-book/ch02/movielens



In [148]:

    
import pandas as pd
unames = ['user_id', 'gender', 'age', 'occupation', 'zip']
users = pd.read_table('./users.dat', sep='::', header=None,
names=unames)
rnames = ['user_id', 'movie_id', 'rating', 'timestamp']
ratings = pd.read_table('./ratings.dat', sep='::', header=None,
names=rnames)
mnames = ['movie_id', 'title', 'genres']
movies = pd.read_table('./movies.dat', sep='::', header=None,
names=mnames)



In [149]:

    
users[:5]









    Out[149]:






  
    
      
      user_id
      gender
      age
      occupation
      zip
    
  
  
    
      0
       1
       F
        1
       10
       48067
    
    
      1
       2
       M
       56
       16
       70072
    
    
      2
       3
       M
       25
       15
       55117
    
    
      3
       4
       M
       45
        7
       02460
    
    
      4
       5
       M
       25
       20
       55455
    
  

5 rows × 5 columns



In [153]:

    
ratings[:5]









    Out[153]:






  
    
      
      user_id
      movie_id
      rating
      timestamp
    
  
  
    
      0
       1
       1193
       5
       978300760
    
    
      1
       1
        661
       3
       978302109
    
    
      2
       1
        914
       3
       978301968
    
    
      3
       1
       3408
       4
       978300275
    
    
      4
       1
       2355
       5
       978824291
    
  

5 rows × 4 columns



In [152]:

    
movies[:5]









    Out[152]:






  
    
      
      movie_id
      title
      genres
    
  
  
    
      0
       1
                         Toy Story (1995)
        Animation|Children's|Comedy
    
    
      1
       2
                           Jumanji (1995)
       Adventure|Children's|Fantasy
    
    
      2
       3
                  Grumpier Old Men (1995)
                     Comedy|Romance
    
    
      3
       4
                 Waiting to Exhale (1995)
                       Comedy|Drama
    
    
      4
       5
       Father of the Bride Part II (1995)
                             Comedy
    
  

5 rows × 3 columns



In [154]:

    
data = pd.merge(pd.merge(ratings, users), movies)



In [155]:

    
data.shape









    Out[155]:





(1000209, 10)



In [156]:

    
data[:10]









    Out[156]:






  
    
      
      user_id
      movie_id
      rating
      timestamp
      gender
      age
      occupation
      zip
      title
      genres
    
  
  
    
      0
        1
       1193
       5
       978300760
       F
        1
       10
       48067
       One Flew Over the Cuckoo's Nest (1975)
       Drama
    
    
      1
        2
       1193
       5
       978298413
       M
       56
       16
       70072
       One Flew Over the Cuckoo's Nest (1975)
       Drama
    
    
      2
       12
       1193
       4
       978220179
       M
       25
       12
       32793
       One Flew Over the Cuckoo's Nest (1975)
       Drama
    
    
      3
       15
       1193
       4
       978199279
       M
       25
        7
       22903
       One Flew Over the Cuckoo's Nest (1975)
       Drama
    
    
      4
       17
       1193
       5
       978158471
       M
       50
        1
       95350
       One Flew Over the Cuckoo's Nest (1975)
       Drama
    
    
      5
       18
       1193
       4
       978156168
       F
       18
        3
       95825
       One Flew Over the Cuckoo's Nest (1975)
       Drama
    
    
      6
       19
       1193
       5
       982730936
       M
        1
       10
       48073
       One Flew Over the Cuckoo's Nest (1975)
       Drama
    
    
      7
       24
       1193
       5
       978136709
       F
       25
        7
       10023
       One Flew Over the Cuckoo's Nest (1975)
       Drama
    
    
      8
       28
       1193
       3
       978125194
       F
       25
        1
       14607
       One Flew Over the Cuckoo's Nest (1975)
       Drama
    
    
      9
       33
       1193
       5
       978557765
       M
       45
        3
       55421
       One Flew Over the Cuckoo's Nest (1975)
       Drama
    
  

10 rows × 10 columns



In [158]:

    
data[:1]









    Out[158]:






  
    
      
      user_id
      movie_id
      rating
      timestamp
      gender
      age
      occupation
      zip
      title
      genres
    
  
  
    
      0
       1
       1193
       5
       978300760
       F
       1
       10
       48067
       One Flew Over the Cuckoo's Nest (1975)
       Drama
    
  

1 rows × 10 columns



In [159]:

    
data.ix[0]









    Out[159]:





user_id                                            1
movie_id                                        1193
rating                                             5
timestamp                                  978300760
gender                                             F
age                                                1
occupation                                        10
zip                                            48067
title         One Flew Over the Cuckoo's Nest (1975)
genres                                         Drama
Name: 0, dtype: object



In [ ]:

	_heartbeat_	a	al	c	cy	g	gr	h	hc	hh	kw	l	ll	nk	r	t	tz	u
0	NaN	Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKi...	en-US,en;q=0.8	US	Danvers	A6qOVH	MA	wfLQtf	1331822918	1.usa.gov	NaN	orofrog	[42.576698, -70.954903]	1	http://www.facebook.com/l/7AQEFzjSi/1.usa.gov/...	1331923247	America/New_York	http://www.ncbi.nlm.nih.gov/pubmed/22415991
1	NaN	GoogleMaps/RochesterNY	NaN	US	Provo	mwszkS	UT	mwszkS	1308262393	j.mp	NaN	bitly	[40.218102, -111.613297]	0	http://www.AwareMap.com/	1331923249	America/Denver	http://www.monroecounty.gov/etc/911/rss.php
2	NaN	Mozilla/4.0 (compatible; MSIE 8.0; Windows NT ...	en-US	US	Washington	xxr3Qb	DC	xxr3Qb	1331919941	1.usa.gov	NaN	bitly	[38.9007, -77.043098]	1	http://t.co/03elZC4Q	1331923250	America/New_York	http://boxer.senate.gov/en/press/releases/0316...

	Not Windows	Windows
tz
	245	276
Africa/Cairo	0	3
Africa/Casablanca	0	1
Africa/Ceuta	0	2
Africa/Johannesburg	0	1
Africa/Lusaka	0	1
America/Anchorage	4	1
America/Argentina/Buenos_Aires	1	0
America/Argentina/Cordoba	0	1
America/Argentina/Mendoza	0	1

	Not Windows	Windows
tz
America/Sao_Paulo	13	20
Europe/Madrid	16	19
Pacific/Honolulu	0	36
Asia/Tokyo	2	35
Europe/London	43	31
America/Denver	132	59
America/Los_Angeles	130	252
America/Chicago	115	285
	245	276
America/New_York	339	912

	user_id	movie_id	rating	timestamp
0	1	1193	5	978300760
1	1	661	3	978302109
2	1	914	3	978301968
3	1	3408	4	978300275
4	1	2355	5	978824291

	movie_id	title	genres
0	1	Toy Story (1995)	Animation\|Children's\|Comedy
1	2	Jumanji (1995)	Adventure\|Children's\|Fantasy
2	3	Grumpier Old Men (1995)	Comedy\|Romance
3	4	Waiting to Exhale (1995)	Comedy\|Drama
4	5	Father of the Bride Part II (1995)	Comedy

	user_id	movie_id	rating	timestamp	gender	age	occupation	zip	title	genres
0	1	1193	5	978300760	F	1	10	48067	One Flew Over the Cuckoo's Nest (1975)	Drama
1	2	1193	5	978298413	M	56	16	70072	One Flew Over the Cuckoo's Nest (1975)	Drama
2	12	1193	4	978220179	M	25	12	32793	One Flew Over the Cuckoo's Nest (1975)	Drama
3	15	1193	4	978199279	M	25	7	22903	One Flew Over the Cuckoo's Nest (1975)	Drama
4	17	1193	5	978158471	M	50	1	95350	One Flew Over the Cuckoo's Nest (1975)	Drama
5	18	1193	4	978156168	F	18	3	95825	One Flew Over the Cuckoo's Nest (1975)	Drama
6	19	1193	5	982730936	M	1	10	48073	One Flew Over the Cuckoo's Nest (1975)	Drama
7	24	1193	5	978136709	F	25	7	10023	One Flew Over the Cuckoo's Nest (1975)	Drama
8	28	1193	3	978125194	F	25	1	14607	One Flew Over the Cuckoo's Nest (1975)	Drama
9	33	1193	5	978557765	M	45	3	55421	One Flew Over the Cuckoo's Nest (1975)	Drama