notebook.community

Edit and run



In [2]:

    
import pandas as pd
surveys_df = pd.read_csv("surveys.csv",keep_default_na=False, na_values=[""])



In [4]:

    
species_df = pd.read_csv("species.csv",keep_default_na=False, na_values=[""])



In [5]:

    
# read in first 10 lines of surveys table
survey_sub = surveys_df.head(10)
# grab the last 10 rows 
survey_sub_last10 = surveys_df.tail(10)
#reset the index values to the second dataframe appends properly
survey_sub_last10=survey_sub_last10.reset_index(drop=True)
# drop=True option avoids adding new index column with old index values



In [6]:

    
# stack the DataFrames on top of each other
vertical_stack = pd.concat([survey_sub, survey_sub_last10], axis=0)

# place the DataFrames side by side
horizontal_stack = pd.concat([survey_sub, survey_sub_last10], axis=1)



In [7]:

    
# Write DataFrame to CSV
vertical_stack.to_csv('out.csv', index=False)



In [8]:

    
# for kicks read our output back into python and make sure all looks good
new_output = pd.read_csv('out.csv', keep_default_na=False, na_values=[""])



In [11]:

    
# read in first 10 lines of surveys table
survey_sub = surveys_df.head(10)

# import a small subset of the species data designed for this part of the lesson.
# It is stored in the data folder.
species_sub = pd.read_csv('speciesSubset.csv', keep_default_na=False, na_values=[""])



In [12]:

    
species_sub.columns









    Out[12]:





Index(['species_id', 'genus', 'species', 'taxa'], dtype='object')



In [13]:

    
survey_sub.columns









    Out[13]:





Index(['record_id', 'month', 'day', 'year', 'plot_id', 'species_id', 'sex',
       'hindfoot_length', 'weight'],
      dtype='object')



In [14]:

    
merged_inner = pd.merge(left=survey_sub,right=species_sub, left_on='species_id', right_on='species_id')



In [15]:

    
merged_inner.shape









    Out[15]:





(8, 12)



In [16]:

    
merged_inner









    Out[16]:







  
    
      
      record_id
      month
      day
      year
      plot_id
      species_id
      sex
      hindfoot_length
      weight
      genus
      species
      taxa
    
  
  
    
      0
      1
      7
      16
      1977
      2
      NL
      M
      32.0
      NaN
      Neotoma
      albigula
      Rodent
    
    
      1
      2
      7
      16
      1977
      3
      NL
      M
      33.0
      NaN
      Neotoma
      albigula
      Rodent
    
    
      2
      3
      7
      16
      1977
      2
      DM
      F
      37.0
      NaN
      Dipodomys
      merriami
      Rodent
    
    
      3
      4
      7
      16
      1977
      7
      DM
      M
      36.0
      NaN
      Dipodomys
      merriami
      Rodent
    
    
      4
      5
      7
      16
      1977
      3
      DM
      M
      35.0
      NaN
      Dipodomys
      merriami
      Rodent
    
    
      5
      8
      7
      16
      1977
      1
      DM
      M
      37.0
      NaN
      Dipodomys
      merriami
      Rodent
    
    
      6
      9
      7
      16
      1977
      1
      DM
      F
      34.0
      NaN
      Dipodomys
      merriami
      Rodent
    
    
      7
      7
      7
      16
      1977
      2
      PE
      F
      NaN
      NaN
      Peromyscus
      eremicus
      Rodent



In [17]:

    
merged_left = pd.merge(left=survey_sub,right=species_sub, how='left', left_on='species_id', right_on='species_id')
merged_left









    Out[17]:







  
    
      
      record_id
      month
      day
      year
      plot_id
      species_id
      sex
      hindfoot_length
      weight
      genus
      species
      taxa
    
  
  
    
      0
      1
      7
      16
      1977
      2
      NL
      M
      32.0
      NaN
      Neotoma
      albigula
      Rodent
    
    
      1
      2
      7
      16
      1977
      3
      NL
      M
      33.0
      NaN
      Neotoma
      albigula
      Rodent
    
    
      2
      3
      7
      16
      1977
      2
      DM
      F
      37.0
      NaN
      Dipodomys
      merriami
      Rodent
    
    
      3
      4
      7
      16
      1977
      7
      DM
      M
      36.0
      NaN
      Dipodomys
      merriami
      Rodent
    
    
      4
      5
      7
      16
      1977
      3
      DM
      M
      35.0
      NaN
      Dipodomys
      merriami
      Rodent
    
    
      5
      6
      7
      16
      1977
      1
      PF
      M
      14.0
      NaN
      NaN
      NaN
      NaN
    
    
      6
      7
      7
      16
      1977
      2
      PE
      F
      NaN
      NaN
      Peromyscus
      eremicus
      Rodent
    
    
      7
      8
      7
      16
      1977
      1
      DM
      M
      37.0
      NaN
      Dipodomys
      merriami
      Rodent
    
    
      8
      9
      7
      16
      1977
      1
      DM
      F
      34.0
      NaN
      Dipodomys
      merriami
      Rodent
    
    
      9
      10
      7
      16
      1977
      6
      PF
      F
      20.0
      NaN
      NaN
      NaN
      NaN



In [18]:

    
merged_left[ pd.isnull(merged_left.genus) ]









    Out[18]:







  
    
      
      record_id
      month
      day
      year
      plot_id
      species_id
      sex
      hindfoot_length
      weight
      genus
      species
      taxa
    
  
  
    
      5
      6
      7
      16
      1977
      1
      PF
      M
      14.0
      NaN
      NaN
      NaN
      NaN
    
    
      9
      10
      7
      16
      1977
      6
      PF
      F
      20.0
      NaN
      NaN
      NaN
      NaN



In [ ]:

	record_id	month	day	year	plot_id	species_id	sex	hindfoot_length	weight	genus	species	taxa
0	1	7	16	1977	2	NL	M	32.0	NaN	Neotoma	albigula	Rodent
1	2	7	16	1977	3	NL	M	33.0	NaN	Neotoma	albigula	Rodent
2	3	7	16	1977	2	DM	F	37.0	NaN	Dipodomys	merriami	Rodent
3	4	7	16	1977	7	DM	M	36.0	NaN	Dipodomys	merriami	Rodent
4	5	7	16	1977	3	DM	M	35.0	NaN	Dipodomys	merriami	Rodent
5	8	7	16	1977	1	DM	M	37.0	NaN	Dipodomys	merriami	Rodent
6	9	7	16	1977	1	DM	F	34.0	NaN	Dipodomys	merriami	Rodent
7	7	7	16	1977	2	PE	F	NaN	NaN	Peromyscus	eremicus	Rodent