Loading data with odo


In [1]:
import pandas as pd
df = pd.read_csv('data/github_archive_sample.csv')
df.head()
# df.to_sql(...)


Out[1]:
created_at type_ user repo commits
0 2015-01-01 00:00:00+00:00 PushEvent davidjhulse davidjhulse/davesbingrewardsbot 1
1 2015-01-01 00:00:00+00:00 PushEvent jmoon018 jmoon018/rshell-unit-tester 1
2 2015-01-01 00:00:01+00:00 CreateEvent christoferpeterson christoferpeterson/Vadek NaN
3 2015-01-01 00:00:01+00:00 PushEvent JakeWharton square/okhttp 8
4 2015-01-01 00:00:03+00:00 PushEvent git4ruby git4ruby/movie_review1 1

In [2]:
from odo import odo
odo('data/github_archive_sample.csv', 'sqlite:///data/db.sqlite::github_archive')


Out[2]:
Table('github_archive', MetaData(bind=Engine(sqlite:///data/db.sqlite)), Column('created_at', DATETIME(), table=<github_archive>), Column('type_', TEXT(), table=<github_archive>), Column('user', TEXT(), table=<github_archive>), Column('repo', TEXT(), table=<github_archive>), Column('commits', FLOAT(), table=<github_archive>), schema=None)

In [3]:
odo('data/github_archive_sample.csv.gz', 'jsonlines://data/github_archive_sample.json.gz')


Out[3]:
<odo.backends.json.JSONLines at 0x11b2bed0>

In [4]:
from odo import resource, discover
csv = resource('data/github_archive_sample.csv.gz')
print discover(csv)


var * {
  created_at: ?datetime,
  type_: ?string,
  user: ?string,
  repo: ?string,
  commits: ?float64
  }


In [5]:
discover(resource('sqlite:///data/Chinook_Sqlite.sqlite::Artist'))


Out[5]:
dshape("var * {ArtistId: int32, Name: ?string[120]}")

In [6]:
odo('sqlite:///data/Chinook_Sqlite.sqlite::Artist', 'data/chinook_artist.csv')


Out[6]:
<odo.backends.csv.CSV at 0x11afedb0>