Here's the website
In [1]:
import pandas as pd
import numpy as np
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
In [2]:
# seed deposit events data
# file is actually tab-delimited, not xls, so open with read_table() instead of read_excel()
# This pulls the data from the Seed Vault website
deposits = pd.read_table('http://www.nordgen.org/sgsv/download.php?file=/scope/sgsv/files/sgsv_by_deposit_date.xls', header=0)
In [3]:
deposits.head(3)
Out[3]:
In [4]:
# create new dataset with depositors' totals
totals = deposits.groupby("depositor_name", as_index=False).sum()
In [5]:
# the top 10 depositors with the most seeds
most = totals.sort_values(by="seeds", ascending=False).head(10)
most
Out[5]:
In [6]:
most.sort_values(by="seeds", ascending=True).plot(x="depositor_name", y="seeds", kind="barh", title="Top Seed Depositors", legend=False);