In [20]:
import pandas as pd
import psycopg2
import paramiko
import os
import numpy as np
import math

In [23]:
data_dir='../data/' 
"""Path to local data directory"""

username='cy290e'
hostname='169.229.154.119'
db_name='craigslist'
password='' #password to database.  IMPORTANT: do not save passwords in the notebook
"""Postgres connection parameters"""

# establish postgres connection
conn = psycopg2.connect("dbname={d} user={u} host={h} password={pw}".format(d=db_name, u=username, h=hostname, pw=password))
cur = conn.cursor()


---------------------------------------------------------------------------
OperationalError                          Traceback (most recent call last)
<ipython-input-23-92d7cd62e944> in <module>()
      9 
     10 # establish postgres connection
---> 11 conn = psycopg2.connect("dbname={d} user={u} host={h} password={pw}".format(d=db_name, u=username, h=hostname, pw=password))
     12 cur = conn.cursor()

C:\Program Files\Anaconda3\lib\site-packages\psycopg2\__init__.py in connect(dsn, connection_factory, cursor_factory, **kwargs)
    128 
    129     dsn = _ext.make_dsn(dsn, **kwargs)
--> 130     conn = _connect(dsn, connection_factory=connection_factory, **kwasync)
    131     if cursor_factory is not None:
    132         conn.cursor_factory = cursor_factory

OperationalError: fe_sendauth: no password supplied

In [16]:



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-16-725ac5850a60> in <module>()
      3 
      4 with SSHTunnelForwarder(
----> 5     (REMOTE_SERVER_IP, 443),
      6     ssh_username="",
      7     ssh_pkey= local_key_dir,

NameError: name 'REMOTE_SERVER_IP' is not defined

In [6]:



---------------------------------------------------------------------------
ImportError                               Traceback (most recent call last)
<ipython-input-6-6471456e2c1e> in <module>()
      1 import sys
----> 2 import chilkat
      3 
      4 sftp = chilkat.CkSFtp()
      5 

ImportError: No module named 'chilkat'

In [9]:
data = pd.read_csv(r'..\data\cl_census_CA.csv')
data.head()


Out[9]:
listing_id date rent bedrooms sqft rent_sqft fips_block state mpo_id cars_tot ... race_of_head_4 race_of_head_5 race_of_head_6 race_of_head_7 race_of_head_8 race_of_head_9 recent_mover_0 recent_mover_1 tenure_1 tenure_2
0 5873877617 2016-11-13 925.0 2.0 874.0 1.058352 60470010021060 CA 6197202.0 4603.0 ... NaN 8.0 527.0 1.0 198.0 42.0 1516.0 628.0 1310.0 834.0
1 5873876292 2016-11-13 735.0 0.0 650.0 1.130769 60070012001017 CA 6198000.0 646.0 ... NaN NaN 6.0 1.0 3.0 16.0 371.0 44.0 98.0 317.0
2 5873889346 2016-11-13 1675.0 3.0 1000.0 1.675000 60790125023017 CA 6199200.0 1902.0 ... NaN NaN 10.0 NaN 30.0 21.0 712.0 257.0 477.0 492.0
3 5873893871 2016-11-13 1818.0 2.0 1084.0 1.677122 60830020061208 CA 6196600.0 1058.0 ... NaN 2.0 21.0 1.0 8.0 8.0 427.0 41.0 420.0 48.0
4 5849643168 2016-11-13 1050.0 2.0 1100.0 0.954545 60890113004008 CA 6198100.0 786.0 ... NaN 3.0 NaN 2.0 26.0 23.0 287.0 205.0 202.0 290.0

5 rows × 29 columns


In [8]:
data.columns.values


Out[8]:
array(['listing_id', 'date', 'rent', 'bedrooms', 'sqft', 'rent_sqft',
       'fips_block', 'state', 'mpo_id', 'cars_tot', 'children_tot',
       'persons_tot', 'workers_tot', 'age_of_head_med', 'income_med',
       'hhs_tot', 'race_of_head_1', 'race_of_head_2', 'race_of_head_3',
       'race_of_head_4', 'race_of_head_5', 'race_of_head_6',
       'race_of_head_7', 'race_of_head_8', 'race_of_head_9',
       'recent_mover_0', 'recent_mover_1', 'tenure_1', 'tenure_2'], dtype=object)