In [1]:
import re;
import pandas as pd;
import numpy as np;
raw_dir='raw_data/'
output_dir='output/'
In [2]:
# Format our cells
def format_cols(col, val):
if col == 'district':
return val \
.str.title() \
.str.replace(r' Isd$', ' ISD') \
.str.replace(r' Cisd$', ' CISD')
elif col == 'county':
return val.str.title()
elif col == 'grade':
return val.replace(['N/R'], "Not rated")
# This adds slugs for our schools app
# To the AF ratings data we get from TEA
# First step is to create the csv files with slugs
# That's done in the Django app
# To do this, run:
# python manage.py exportslugs
# The AF ratings from TEA are manually converted to a csv
# Data is available here: https://tea.texas.gov/2019accountability.aspx
df_ratings = pd.read_csv(raw_dir + 'state_overall_full.csv')
# This is the sheet we pulled from Django DB
df_slugs = pd.read_csv(raw_dir + 'campus_district_slugs.csv')
# The columns we'll be selecting for output
final_cols = [
"type",
"id",
"name",
"campus",
"district",
"county",
"overall",
"overall_score",
"1_achievement",
"1_achievement_score",
"2_progress",
"2_progress_score",
"3_gaps",
"3_gaps_score",
"student_pop",
"esc_region",
"region",
"perc_econ_disadvantage",
"type_detail",
"absolute_url",
]
# Rename columns from TEA ratings file
df_ratings = df_ratings.rename(columns={
"Campus": "campus",
"District": "district",
"County": "county",
"Overall Rating *": "overall",
"Overall Score": "overall_score",
"Student Achievement Rating *": "1_achievement",
"Student Achievement Score": "1_achievement_score",
"School Progress Rating *": "2_progress",
"School Progress Score": "2_progress_score",
"Closing the Gaps Rating *": "3_gaps",
"Closing the Gaps Score": "3_gaps_score",
"Number of Students": "student_pop",
"ESC Region": "esc_region",
"Region": "region",
"% Economically Disadvantaged": "perc_econ_disadvantage",
"School Type": "type_detail",
})
print("Setting type, id for each district, campus")
# Set properties to either based on if they are campus or district
# We do this by checking if there's a campus number and returning a value
# This indicates it's a campus, not a district
# If not, return something else cause it's a district
df_ratings["type"] = df_ratings.apply(lambda x: 'campus' if pd.notnull(x['Campus Number']) else 'district', axis=1)
df_ratings["id"] = df_ratings.apply(lambda x: x['Campus Number'] if pd.notnull(x['Campus Number']) else x['District Number'], axis=1)
df_ratings["name"] = df_ratings.apply(lambda x: x['campus'] if pd.notnull(x['Campus Number']) else x['district'], axis=1)
print("Formatting columns")
df_ratings["district"] = format_cols('district', df_ratings["district"])
df_ratings["county"] = format_cols('county', df_ratings["county"])
# Our grade columns
grade_cols = ["overall", "1_achievement", "2_progress", "3_gaps"]
for col in grade_cols:
df_ratings[col] = format_cols('grade', df_ratings[col])
print("Joining with slugs csv")
df_ratings_slugs = df_ratings.merge(df_slugs, left_on="id", right_on='tea_id', how='left')
print("Filtering and outputting to csv")
df_ratings_slugs = df_ratings_slugs[final_cols]
df_ratings_slugs.to_csv(output_dir + 'af_ratings_slugs.csv', index=False)
print('done')
In [ ]: