In [1]:
# Import tree-level data from PNW_FIADB2011 forest inventory plots
# Summarize the data into 2" diameter classes
# Export a csv that has the total trees per acre by each species and 2" diameter class
# for each "condition" (~inventory plot)
In [2]:
# import the packages
import pandas as pd
import numpy as np
import csv
import time
import json
In [3]:
# read in the treelists for the PNW_FIADB2011 dataset and IDBv2.0 dataset
PNWFIADB2011 = pd.read_csv("G:\projects\ForestPlanner_2015\Data\Processed\PNW_FIADB2011_TREELIST_2015-11-06.csv")
IDB2pt0 = pd.read_csv("G:\projects\ForestPlanner_2015\Data\Processed\IDB2pt0_TREELIST_2015-11-04.csv")
In [5]:
print str(PNWFIADB2011.COND_ID.nunique()) + " unique PNWFIADB 2011 conditions"
print str(IDB2pt0.COND_ID.nunique()) + " unique IDB 2.0 conditions"
In [6]:
# combine these datasets into one master treelist
treelist = PNWFIADB2011.append(IDB2pt0)
In [7]:
treelist.head()
Out[7]:
In [8]:
# read in the FVS species name crosswalk table
FVS_crosswalk = pd.read_csv("G:\projects\ForestPlanner_2015\Data\work\FVS_SpeciesCrosswalk_2015-11-06.csv")
In [9]:
# drop entries in the crosswalk without species_symbols
FVS_crosswalk.dropna(subset=["SPECIES_SYMBOL"], inplace = True)
# set species_symbol as index for FVS_crosswalk
FVS_crosswalk.set_index("SPECIES_SYMBOL", inplace = True)
In [10]:
FVS_crosswalk.columns.values
Out[10]:
In [11]:
# Drop columns from FVS_crosswalk we don't want or need
dropcols = ['FIA_SPCD', 'FVS_ALPHA_CODE', 'SCIENTIFIC NAME_FSVeg', 'AK', 'CI', 'CR', 'EM', 'IE', 'KT', 'NI', 'NC', 'TT', 'UT', 'WS']
FVS_crosswalk.drop(labels = dropcols, axis = 1, inplace = True)
FVS_crosswalk.head()
Out[11]:
In [12]:
fvs_symbols_missing = 0
for symbol in pd.unique(treelist.SPECIES_SYMBOL):
if symbol in FVS_crosswalk.index.tolist():
pass
else:
fvs_symbols_missing += 1
print(symbol + "not in FVS_crosswalk")
if fvs_symbols_missing == 0:
print("All species symbols from treelist found in FVS_crosswalk")
else:
print('\n' "Symbols missing from FVS_crosswalk: " + str(fvs_symbols_missing))
In [13]:
# Count number of unique values of columns in FVS_crosswalk for all species symbols found in treelist
# this shows how much mapping species_symbol to any of these columns will consolidate the number of options
# first do it on the index of fvs_crosswalk (species symbols)
print("Species symbols" + ": " + str(len(pd.unique(FVS_crosswalk.loc[FVS_crosswalk.index.isin(pd.unique(treelist.SPECIES_SYMBOL))].index))))
# then on all the other columns
for col in FVS_crosswalk.columns.values:
print(col + ": " + str(len(pd.unique(FVS_crosswalk[col].loc[FVS_crosswalk.index.isin(pd.unique(treelist.SPECIES_SYMBOL))]))))
In [14]:
# Over-write the common names in the treelist using the values from FVS_crosswalk
print(str(len(pd.unique(treelist.COMMON_NAME))) + " common_names before")
for symbol in pd.unique(treelist.SPECIES_SYMBOL):
treelist.COMMON_NAME.loc[treelist.SPECIES_SYMBOL == symbol] = FVS_crosswalk.COMMON_NAME.loc[symbol]
print(str(len(pd.unique(treelist.COMMON_NAME))) + " common_names after")
In [15]:
# create nested dictionary for all entries, top-level key is common_name
species_crosswalk = {}
# all common names set up as keys with empty nested dictionary for value
species_crosswalk = {common_name:{} for common_name in pd.unique(FVS_crosswalk['COMMON_NAME'])}
# for each common name (key) in the dictionary, fill in the values...
for key in species_crosswalk:
# create a nested dictionary where the name is the FVS_crosswalk column name, and the value is empty
species_crosswalk[key] = {column:'' for column in FVS_crosswalk.columns.values}
# for each common name (key) in the species_crosswalk dictionary, fill in the values...
for key in species_crosswalk:
# for each nested dictionary (which have FVS_crosswalk column names as subkeys)...
for subkey in species_crosswalk[key]:
# set the value for each nested dictionary based on the FVS_crosswalk table
species_crosswalk[key][subkey] = FVS_crosswalk[subkey].loc[FVS_crosswalk.COMMON_NAME == key].values[0]
In [16]:
# dump the species_crosswalk to a json file
with open('G:\projects\ForestPlanner_2015\Data\Processed\FVS_species_crosswalk_' + time.strftime("%Y-%m-%d") + '.json', 'w') as dumpsite:
json.dump(species_crosswalk, dumpsite)
In [17]:
print str(treelist.COND_ID.nunique()) + " unique conditions (~inventory plots)"
print str(len(treelist)) + " trees"
In [18]:
# Set COND_ID as index
treelist.set_index("COND_ID", inplace = True)
In [19]:
print(str(len(pd.unique(treelist.COMMON_NAME))) + " COMMON_NAMEs")
print(str(len(pd.unique(treelist.SPECIES_SYMBOL))) + " SPECIES_SYMBOLs")
print(str(len(pd.unique(treelist.SPCD))) + " SPCDs")
print(str(len(pd.unique(treelist.GENUS))) + " GENUSes")
print(str(len(pd.unique(treelist.SPP_GRP))) + " SPP_GRPs")
In [20]:
# How many null values are there for each column?
treelist.isnull().sum()
Out[20]:
In [21]:
sorted(pd.unique(treelist.COMMON_NAME))
Out[21]:
In [22]:
# remove trees without a DIA or TPA_UNADJ value
treelist.dropna(axis = 0, how = 'any', subset = ["TPA_UNADJ", "DIA"], inplace = True)
In [23]:
# How many null values are there for each column now?
treelist.isnull().sum()
Out[23]:
In [24]:
# identify the 2" diameter class for each tree
# takes in a tree DBH and returns the min-max range for 2" diameter class for each tree
def minDBH(dbh):
return int(np.floor(dbh/2)*2)
def maxDBH(dbh):
# if a tree has an even DBH (e.g., 2.0), don't let maxDBH = minDBH...
if np.ceil(dbh/2)*2 == np.floor(dbh/2)*2:
return int(np.ceil(dbh/2)*2 +2)
else:
return int(np.ceil(dbh/2)*2)
In [25]:
# calculate the min and max DBHs for all the trees
treelist["minDBH"] = treelist.DIA.apply(minDBH)
treelist["maxDBH"] = treelist.DIA.apply(maxDBH)
In [26]:
# Add a basal area field
treelist["BA_ft2_ac"] = (treelist.DIA ** 2) * 0.005454 * treelist.TPA_UNADJ
In [27]:
# create columns for each variant
# for each tree in the treelist, populate them with with the species code FVS uses for that variant
variants = ['BM', 'CA', 'EC', 'PN', 'SO', 'WC']
for variant in variants:
treelist[variant] = treelist['COMMON_NAME'].apply(lambda common_name: species_crosswalk[common_name][variant])
In [28]:
# create tree_live treelist by filtering out dead trees (statuscd = 2)
treelive = treelist.loc[treelist.STATUSCD == 1]
In [29]:
treelive.head()
Out[29]:
In [30]:
# Calculate total live TPA of each species within each 2" diameter class
# create a blank dataframe that will hold treelive_summaries for all variants
treelive_summary = pd.DataFrame()
# create a tree_live summary for each variant
for variant in variants:
# groupby COND_ID, COMMON_NAME, variant (column name for species codes to which the tree is assigned in that variant), DBH class
# return the sum of TPA in each of these groups
grouped = pd.DataFrame(treelive.groupby([treelive.index, "PLOT", "COMMON_NAME", variant, "minDBH"])["TPA_UNADJ", "BA_ft2_ac", "HT", "DIA", "TOTAGE"].agg([np.sum, np.mean, 'count'])).reset_index()
grouped.rename(columns={'COND_ID': 'cond_id'}, inplace=True)
grouped.set_index("cond_id", inplace=True)
# flatten the column names (groupby returned multi-level column names)
grouped.columns = ['_'.join(col).strip() if col[1] != '' else col[0] for col in grouped.columns.values]
# add a column for this dataframe that stored which variant it represents
grouped["variant"] = variant
# create a 'varname' column that concatenates common_name and diameter class
grouped["varname"] = grouped.COMMON_NAME + "_" + grouped.minDBH.map(str)
# rename the columns with what the IDB_summary schema in Forest Planner is expecting
grouped.rename(columns={'PLOT': 'plot_id', variant: 'FVS_Spp_Code', 'COMMON_NAME': 'fia_forest_type_name',
'minDBH': 'calc_dbh_class', 'TPA_UNADJ_sum': 'sumoftpa', 'TPA_UNADJ_mean': 'avgoftpa',
'TPA_UNADJ_count': 'calc_tree_count', 'BA_ft2_ac_sum': 'sumofba_ft2_ac',
'BA_ft2_ac_mean': 'avgofba_ft2_ac', 'HT_mean': 'avgofht_ft', 'DIA_mean': 'avgofdbh_in',
'TOTAGE_mean': 'avgofage_bh'}, inplace=True)
# Map columns to the data types the TREELIVE_SUMMARY db schema expects (for those not already appropriately formatted)
# mapping to floats
for column in ['calc_dbh_class', 'sumoftpa', 'avgoftpa']:
grouped[column] = grouped[column].map(float)
grouped.reset_index(inplace = True)
# append the treelive_summary for this variant to the
treelive_summary = treelive_summary.append(grouped, ignore_index = True)
In [31]:
treelive_summary.head()
Out[31]:
In [32]:
treelive_summary.dtypes
Out[32]:
In [33]:
# Add columns to treelive_summary to record total BA of that COND_ID, count of species-x-size class in that COND_ID, and
# the % of total basal area for that COND_ID found in this species-x-size class
# Create a dataframe that has the total basal area and count of species size classes for each COND_ID
cond_summary = pd.DataFrame(treelive_summary.loc[treelive_summary.variant == 'BM'].groupby(['cond_id'])['sumofba_ft2_ac'].agg(['sum', 'count']))
#cond_summary.set_index("cond_id", inplace=True)
# lookup the total BA for each COND_ID in the cond_sumamry dataframe, apply it to every species-x-size class in the treelive_summary
treelive_summary['total_ba_ft2_ac'] = treelive_summary.reset_index()['cond_id'].apply(lambda id: cond_summary.at[id,'sum'])
# lookup the number of species-x-size classes in the cond_sumamry dataframe, apply it to every species-x-size class in the treelive_summary
treelive_summary['count_speciessizeclasses'] = treelive_summary.reset_index()['cond_id'].apply(lambda id: cond_summary.at[id,'count']).map(int)
# Calculate the % of total basal area found in each species-x-size class
treelive_summary['pct_of_totalba'] = treelive_summary['sumofba_ft2_ac']/treelive_summary['total_ba_ft2_ac']
In [34]:
treelive_summary.dtypes
Out[34]:
In [40]:
treelive_summary[['cond_id', 'calc_dbh_class', 'fia_forest_type_name', 'avgofdbh_in', 'avgofht_ft']].loc[(treelive_summary.cond_id == 521) & (treelive_summary.fia_forest_type_name == "Pacific silver fir")]
Out[40]:
In [35]:
# write the treelive_summary to a CSV
cols_to_write = ['cond_id', 'plot_id', 'variant', 'varname','fia_forest_type_name', 'FVS_Spp_Code', 'calc_dbh_class', 'calc_tree_count', 'sumoftpa', 'avgoftpa', 'sumofba_ft2_ac', 'avgofba_ft2_ac', 'avgofht_ft', 'avgofdbh_in', 'avgofage_bh', 'total_ba_ft2_ac', 'count_speciessizeclasses', 'pct_of_totalba']
treelive_summary.to_csv("G:\projects\ForestPlanner_2015\Data\Processed\TREELIVE_SUMMARY_" + time.strftime("%Y-%m-%d") + ".csv", columns = cols_to_write, header = True, index = True, index_label = "class_id", quoting=csv.QUOTE_NONNUMERIC)
print("printed " + str(len(treelive_summary)/len(variants)) + " species-x-dbh classes")
print("from "+ str(len(pd.unique(treelive_summary.cond_id))) + " conditions to:")
print("G:\projects\ForestPlanner_2015\Data\Processed\TREELIVE_SUMMARY_" + time.strftime("%Y-%m-%d") + ".csv")