In [1]:
import re;
import pandas as pd;
import numpy as np;

raw_dir='raw_data/'
output_dir='output/'

In [2]:
# Format our cells
def format_cols(col, val):
    if col == 'district':
        return val \
            .str.title() \
            .str.replace(r' Isd$', ' ISD') \
            .str.replace(r' Cisd$', ' CISD')
    elif col == 'county':
        return val.str.title()
    elif col == 'grade':
        return val.replace(['N/R'], "Not rated")
    
# This adds slugs for our schools app
# To the AF ratings data we get from TEA

# First step is to create the csv files with slugs
# That's done in the Django app
# To do this, run:
# python manage.py exportslugs

# The AF ratings from TEA are manually converted to a csv
# Data is available here: https://tea.texas.gov/2019accountability.aspx
df_ratings = pd.read_csv(raw_dir + 'state_overall_full.csv')

# This is the sheet we pulled from Django DB
df_slugs = pd.read_csv(raw_dir + 'campus_district_slugs.csv')
    
# The columns we'll be selecting for output
final_cols = [
    "type",
    "id",
    "name",
    "campus",
    "district",
    "county",
    "overall",
    "overall_score",
    "1_achievement",
    "1_achievement_score",
    "2_progress",
    "2_progress_score",
    "3_gaps",
    "3_gaps_score",
    "student_pop",
    "esc_region",
    "region",
    "perc_econ_disadvantage",
    "type_detail",
    "absolute_url",
]

# Rename columns from TEA ratings file
df_ratings = df_ratings.rename(columns={
                                "Campus": "campus",
                                "District": "district",
                                "County": "county",
                                "Overall Rating *": "overall",
                                "Overall Score": "overall_score",
                                "Student Achievement Rating *": "1_achievement",
                                "Student Achievement Score": "1_achievement_score",
                                "School Progress Rating *": "2_progress",
                                "School Progress Score": "2_progress_score",
                                "Closing the Gaps Rating *": "3_gaps",
                                "Closing the Gaps Score": "3_gaps_score",
                                "Number of Students": "student_pop",
                                "ESC Region": "esc_region",
                                "Region": "region",
                                "% Economically Disadvantaged": "perc_econ_disadvantage",
                                "School Type": "type_detail",
                            })

print("Setting type, id for each district, campus")
# Set properties to either based on if they are campus or district
# We do this by checking if there's a campus number and returning a value
# This indicates it's a campus, not a district
# If not, return something else cause it's a district
df_ratings["type"] = df_ratings.apply(lambda x: 'campus' if pd.notnull(x['Campus Number']) else 'district', axis=1)
df_ratings["id"] = df_ratings.apply(lambda x: x['Campus Number'] if pd.notnull(x['Campus Number']) else x['District Number'], axis=1)
df_ratings["name"] = df_ratings.apply(lambda x: x['campus'] if pd.notnull(x['Campus Number']) else x['district'], axis=1)

print("Formatting columns")

df_ratings["district"] = format_cols('district', df_ratings["district"])
df_ratings["county"] = format_cols('county', df_ratings["county"])

# Our grade columns
grade_cols = ["overall", "1_achievement", "2_progress", "3_gaps"]
for col in grade_cols:
    df_ratings[col] = format_cols('grade', df_ratings[col])
    
print("Joining with slugs csv")
df_ratings_slugs = df_ratings.merge(df_slugs, left_on="id", right_on='tea_id', how='left')

print("Filtering and outputting to csv")
df_ratings_slugs = df_ratings_slugs[final_cols]
df_ratings_slugs.to_csv(output_dir + 'af_ratings_slugs.csv', index=False)

print('done')


Setting type, id for each district, campus
Formatting columns
Joining with slugs csv
Filtering and outputting to csv
done

In [ ]: