In [71]:
import pandas as pd
import numpy as np
In [235]:
df = pd.read_csv("../data/cdc/chlamydia.csv", usecols={"FIPS", "Rate"})
df_gon = pd.read_csv("../data/cdc/gonorrhea.csv", usecols={"FIPS", "Rate"})
df_syp = pd.read_csv("../data/cdc/syphilis.csv", usecols={"FIPS", "Rate"})
In [236]:
df.dtypes
Out[236]:
In [237]:
df_2 = pd.read_csv("../app/static/js/unemployment.tsv", delim_whitespace=True)
In [238]:
df
Out[238]:
In [239]:
df_2.dtypes
Out[239]:
In [240]:
df.shape
Out[240]:
In [241]:
df_2.shape
Out[241]:
In [242]:
df_2[df_2["id"].isin(df["FIPS"].values)].shape
Out[242]:
In [243]:
df[df["FIPS"].isin(df_2["id"].values)].shape
Out[243]:
In [244]:
df["Rate"].sort_values()
Out[244]:
In [245]:
df[df['Rate'] == "Data not available"]
Out[245]:
In [246]:
df['Rate'] = df['Rate'].str.replace('Data not available','316.71')
df_gon['Rate'] = df_gon['Rate'].str.replace('Data not available','65.30')
df_syp['Rate'] = df_syp['Rate'].str.replace('Data not available','2.64')
In [247]:
df
Out[247]:
In [248]:
df_test = df.convert_objects(convert_numeric=True).copy()
df_test_gon = df_gon.convert_objects(convert_numeric=True).copy()
df_test_syp = df_syp.convert_objects(convert_numeric=True).copy()
df_test.dtypes
Out[248]:
In [249]:
df_test.describe()
Out[249]:
In [250]:
df_test.dtypes, df_test_gon.dtypes, df_test_syp.dtypes
Out[250]:
In [251]:
df_test_syp["Rate"].mean()
Out[251]:
In [255]:
df_test.fillna(value=316.71, inplace=True)
df_test_gon.fillna(value=65.30, inplace=True)
df_test_syp.fillna(value=2.64, inplace=True)
In [256]:
df_test.to_csv("../app/static/js/chlamydia.csv", index=False)
df_test_gon.to_csv("../app/static/js/gonorrhea.csv", index=False)
df_test_syp.to_csv("../app/static/js/syphilis.csv", index=False)
In [261]:
df_merged = pd.merge(df_test, df_test_gon, left_on='FIPS', right_on='FIPS', how='inner', sort=False).convert_objects(convert_numeric=True)
In [263]:
df_all = pd.merge(df_merged, df_test_syp, left_on='FIPS', right_on='FIPS', how='inner', sort=False).convert_objects(convert_numeric=True)
In [264]:
df_all.head()
Out[264]:
In [265]:
df_all.to_csv("../app/static/js/stds.csv", index=False)
In [ ]: