In [1]:
import pandas as pd
import matplotlib.pyplot as plt
In [2]:
df = pd.read_csv("83f8a712c4cf05ce7492cfade8170169cca301a5", compression="gzip")
In [3]:
df.head()
Out[3]:
In [4]:
len(df)
Out[4]:
In [5]:
df["Fulltext Searchable"] = df["Fulltext Searchable"].str.strip()
df.loc[df["Fulltext Searchable"] == "0", "Fulltext Searchable"] = "No"
df.loc[df["Fulltext Searchable"] == "no", "Fulltext Searchable"] = "No"
In [6]:
df.groupby("Fulltext Searchable").size()
Out[6]:
In [7]:
ax = df.groupby("Fulltext Searchable").size().plot(kind="pie",
title="Fulltext Searchable (Collection, N=4328)",
figsize=(10, 6))
ax.set_ylabel(None);
In [8]:
df.sort_values(by="Record Count", ascending=False).loc[:, ["Collection Name", "Record Count"]].head(20)
Out[8]:
In [9]:
df["Fulltext Searchable"] = df["Fulltext Searchable"].str.strip()
In [10]:
df.groupby("Fulltext Searchable").size()
Out[10]:
In [11]:
df[df["Fulltext Searchable"] == "Yes"].sort_values(by="Record Count", ascending=False).loc[:,
["Collection Name", "Provider Name", "Fulltext Searchable", "Record Count"]].head(30)
Out[11]:
In [12]:
df[df["Fulltext Searchable"] == "No"].sort_values(by="Record Count", ascending=False).loc[:,
["Collection Name", "Provider Name", "Fulltext Searchable", "Record Count"]].head(30)
Out[12]:
In [ ]: