In [2]:
import pandas as pd
surveys_df = pd.read_csv("surveys.csv",keep_default_na=False, na_values=[""])

In [4]:
species_df = pd.read_csv("species.csv",keep_default_na=False, na_values=[""])

In [5]:
# read in first 10 lines of surveys table
survey_sub = surveys_df.head(10)
# grab the last 10 rows 
survey_sub_last10 = surveys_df.tail(10)
#reset the index values to the second dataframe appends properly
survey_sub_last10=survey_sub_last10.reset_index(drop=True)
# drop=True option avoids adding new index column with old index values

In [6]:
# stack the DataFrames on top of each other
vertical_stack = pd.concat([survey_sub, survey_sub_last10], axis=0)

# place the DataFrames side by side
horizontal_stack = pd.concat([survey_sub, survey_sub_last10], axis=1)

In [7]:
# Write DataFrame to CSV
vertical_stack.to_csv('out.csv', index=False)

In [8]:
# for kicks read our output back into python and make sure all looks good
new_output = pd.read_csv('out.csv', keep_default_na=False, na_values=[""])

In [11]:
# read in first 10 lines of surveys table
survey_sub = surveys_df.head(10)

# import a small subset of the species data designed for this part of the lesson.
# It is stored in the data folder.
species_sub = pd.read_csv('speciesSubset.csv', keep_default_na=False, na_values=[""])

In [12]:
species_sub.columns


Out[12]:
Index(['species_id', 'genus', 'species', 'taxa'], dtype='object')

In [13]:
survey_sub.columns


Out[13]:
Index(['record_id', 'month', 'day', 'year', 'plot_id', 'species_id', 'sex',
       'hindfoot_length', 'weight'],
      dtype='object')

In [14]:
merged_inner = pd.merge(left=survey_sub,right=species_sub, left_on='species_id', right_on='species_id')

In [15]:
merged_inner.shape


Out[15]:
(8, 12)

In [16]:
merged_inner


Out[16]:
record_id month day year plot_id species_id sex hindfoot_length weight genus species taxa
0 1 7 16 1977 2 NL M 32.0 NaN Neotoma albigula Rodent
1 2 7 16 1977 3 NL M 33.0 NaN Neotoma albigula Rodent
2 3 7 16 1977 2 DM F 37.0 NaN Dipodomys merriami Rodent
3 4 7 16 1977 7 DM M 36.0 NaN Dipodomys merriami Rodent
4 5 7 16 1977 3 DM M 35.0 NaN Dipodomys merriami Rodent
5 8 7 16 1977 1 DM M 37.0 NaN Dipodomys merriami Rodent
6 9 7 16 1977 1 DM F 34.0 NaN Dipodomys merriami Rodent
7 7 7 16 1977 2 PE F NaN NaN Peromyscus eremicus Rodent

In [17]:
merged_left = pd.merge(left=survey_sub,right=species_sub, how='left', left_on='species_id', right_on='species_id')
merged_left


Out[17]:
record_id month day year plot_id species_id sex hindfoot_length weight genus species taxa
0 1 7 16 1977 2 NL M 32.0 NaN Neotoma albigula Rodent
1 2 7 16 1977 3 NL M 33.0 NaN Neotoma albigula Rodent
2 3 7 16 1977 2 DM F 37.0 NaN Dipodomys merriami Rodent
3 4 7 16 1977 7 DM M 36.0 NaN Dipodomys merriami Rodent
4 5 7 16 1977 3 DM M 35.0 NaN Dipodomys merriami Rodent
5 6 7 16 1977 1 PF M 14.0 NaN NaN NaN NaN
6 7 7 16 1977 2 PE F NaN NaN Peromyscus eremicus Rodent
7 8 7 16 1977 1 DM M 37.0 NaN Dipodomys merriami Rodent
8 9 7 16 1977 1 DM F 34.0 NaN Dipodomys merriami Rodent
9 10 7 16 1977 6 PF F 20.0 NaN NaN NaN NaN

In [18]:
merged_left[ pd.isnull(merged_left.genus) ]


Out[18]:
record_id month day year plot_id species_id sex hindfoot_length weight genus species taxa
5 6 7 16 1977 1 PF M 14.0 NaN NaN NaN NaN
9 10 7 16 1977 6 PF F 20.0 NaN NaN NaN NaN

In [ ]: