In [2]:
#Read in Redlist
import pandas as pd
import py_entitymatching as em
import math
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import style
import re
style.use('ggplot')
# Read in csv as dataframe
CI = em.read_csv_metadata('countryInfo.txt', sep='\t', lineterminator='\r')
ar = em.read_csv_metadata("../finalArkives.csv", encoding="ISO-8859-1", key="id")
CI1 = CI[['#ISO','Country']]
CI1.rename(columns={'#ISO':'id'}, inplace=True)
CI1.head()
Out[2]:
In [ ]:
names = []
for index, row in CI1.iterrows():
name = str(row['id'])
name = name.replace('\n','')
names.append(name)
CI1['id'] = names
CI1 = CI1.set_index(['id'])
CI1.head()
In [ ]:
#Read in ARKIVE
# Read in csv as dataframe
#CI1.loc[-1] = ['UK', 'United Kingdom']
# glance at first few rows
CI1.head()
In [ ]:
ar.columns
In [ ]:
# Concatenate nickname and common_name fields in arkive
# author: @andrewedstrom
# remove parenthesis contents
countries = []
for index, row in ar.iterrows():
nickname = str(row[' countries']) #look here if errors start happening
country = str(re.findall(r"'(.*?)'", nickname, re.DOTALL))
#print(country)
country = country.replace(',', '')
countries.append(country)
ar['country'] = countries
ar.columns
#ar.head()
In [ ]:
#ar.drop([' countries'], axis = 1, inplace=True)
ar.columns
In [ ]:
ar.head()
In [ ]:
# Remove parentheticals from arkive genus
# author: @andrewedstrom
# Next, actually remove scientific name and replace with common name(s)
#ar = arBeforeRemovingNames # in case script now run completely in order
#ar = ar[ar.animalName.str.contains("\(") == True] # update df to contain only tuples that contain common/nick names
error = 0 # make sure there's no wonky cases
# remove parenthesis contents
finalC = []
for index, row in ar.iterrows():
list_country = str(row['country'])
country = re.findall(r"'(.*?)'", list_country, re.DOTALL)
cout = []
for n in country:
ct = str(CI1.loc[n])
ct = ct.split('Name')[0]
ct = ct.replace('Country','')
ct = ct.lstrip().rstrip()
cout.append(ct)
cout = str(cout)
finalC.append(cout)
ar['countries'] = finalC
In [ ]:
#ar.head()
#ar.drop(['country'], axis = 1, inplace=True)
#ar = em.read_csv_metadata("countryKeys.csv", encoding="ISO-8859-1", key="id")
#ar.to_csv('finalArkives.csv')
ar.head()
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]: