In [2]:
import pandas as pd
surveys_df = pd.read_csv("surveys.csv",keep_default_na=False, na_values=[""])
In [4]:
species_df = pd.read_csv("species.csv",keep_default_na=False, na_values=[""])
In [5]:
# read in first 10 lines of surveys table
survey_sub = surveys_df.head(10)
# grab the last 10 rows
survey_sub_last10 = surveys_df.tail(10)
#reset the index values to the second dataframe appends properly
survey_sub_last10=survey_sub_last10.reset_index(drop=True)
# drop=True option avoids adding new index column with old index values
In [6]:
# stack the DataFrames on top of each other
vertical_stack = pd.concat([survey_sub, survey_sub_last10], axis=0)
# place the DataFrames side by side
horizontal_stack = pd.concat([survey_sub, survey_sub_last10], axis=1)
In [7]:
# Write DataFrame to CSV
vertical_stack.to_csv('out.csv', index=False)
In [8]:
# for kicks read our output back into python and make sure all looks good
new_output = pd.read_csv('out.csv', keep_default_na=False, na_values=[""])
In [11]:
# read in first 10 lines of surveys table
survey_sub = surveys_df.head(10)
# import a small subset of the species data designed for this part of the lesson.
# It is stored in the data folder.
species_sub = pd.read_csv('speciesSubset.csv', keep_default_na=False, na_values=[""])
In [12]:
species_sub.columns
Out[12]:
In [13]:
survey_sub.columns
Out[13]:
In [14]:
merged_inner = pd.merge(left=survey_sub,right=species_sub, left_on='species_id', right_on='species_id')
In [15]:
merged_inner.shape
Out[15]:
In [16]:
merged_inner
Out[16]:
In [17]:
merged_left = pd.merge(left=survey_sub,right=species_sub, how='left', left_on='species_id', right_on='species_id')
merged_left
Out[17]:
In [18]:
merged_left[ pd.isnull(merged_left.genus) ]
Out[18]:
In [ ]: