In [ ]:
# This adds slugs for our schools app
# To the AF ratings data we get from TEA
# First step is to create the csv files with slugs
# That's done in the Django app
# For districts, run:
# python manage.py exportslugs --shape=district
# For campuses, run:
# python manage.py exportslugs --shape=campus
# The AF ratings csv is from TEA and is saved here:
# Google sheet here: https://docs.google.com/spreadsheets/d/1W8-eEgFmq4_zxfdN08cxyNe902mjWkso9coKB75Tok8/edit#gid=0
shapes = ['campus', 'district']
df_ratings_slugs = pd.DataFrame()
# Titlecase with exceptions
def title_except(s, exceptions):
word_list = re.split(' ', s) # re.split behaves as expected
final = [word_list[0].capitalize()]
for word in word_list[1:]:
final.append(word if word in exceptions else word.capitalize())
return " ".join(final)
# Format our cells
def format_cols(col, val, *args):
if col == 'campus':
return val \
.str.replace(r' H S$', ' High School') \
.str.replace(r' J H$', 'Junior High') \
.str.replace(r' EL$', ' Elementary') \
.str.replace(r' PRI$', ' Primary') \
.str.replace(r' INT$', ' Intermediate') \
.str.title()
elif col == 'district':
return val \
.str.title() \
.str.replace(r' Isd$', ' ISD') \
.str.replace(r' Cisd$', ' CISD')
if col == 'county':
return val.str.title()
for shape in shapes:
print("Working on " + shape)
if shape == 'district':
ratings_file = 'DIST'
elif shape == 'campus':
ratings_file = 'CAMP'
# Read our files
df_ratings = pd.read_excel(raw_dir + ratings_file + 'RATE.xlsx')
df_slugs = pd.read_csv(raw_dir + shape + '_slugs.csv')
# The columns we'll be selecting later
final_cols = [
"type", # 0
"id", # 1
"name", # 2
"campus", # 3
"district", # 4
"county", # 5
"overall", # 6
"1_achievement", # 7
"2_progress", # 8
"2a_tests", # 9
"2b_relative", # 10
"3_gaps", # 11
"student_pop", # 12
"is_charter", # 13
"is_single_campus", # 14
"absolute_url" # 15
]
df_ratings_slugs = df_ratings_slugs.reindex(columns=final_cols)
# Set the type column
# Either district or campus
df_ratings[final_cols[0]] = shape
# Rename columns from TEA ratings file
df_ratings = df_ratings.rename(columns={
shape.upper(): final_cols[1],
"CAMPNAME": final_cols[3],
"DISTNAME": final_cols[4],
"CNTYNAME": final_cols[5],
"D_RATING": final_cols[6],
"DD1G": final_cols[7],
"DD2G": final_cols[8],
"DD2AG": final_cols[9],
"DD2BG": final_cols[10],
"DD3G": final_cols[11],
"DPETALLC": final_cols[12],
"DFLCHART": final_cols[13],
"DFLSCD": final_cols[14]
})
# Change TEA ID column from ratings csv so we can merge
df_ratings[final_cols[1]] = df_ratings[final_cols[1]].str.replace("'", "").astype('int')
# Format some of the column values
# if shape == 'campus':
# df_ratings[final_cols[3]] = format_cols('campus', df_ratings[final_cols[3]], shape)
# df_ratings[final_cols[4]] = format_cols('district', df_ratings[final_cols[4]], shape)
df_ratings[final_cols[5]] = format_cols('county', df_ratings[final_cols[5]], shape)
# Set name column to formatted campus or district name
if shape == 'campus':
df_ratings[final_cols[2]] = df_ratings[final_cols[3]]
elif shape == 'district':
df_ratings[final_cols[2]] = df_ratings[final_cols[4]]
# Join them with slug file
c_df_ratings_slugs = df_ratings.merge(df_slugs, left_on=final_cols[1], right_on='tea_id')
# Output
# We'll stack of our spreadsheets into one sheet
df_ratings_slugs = df_ratings_slugs.append(c_df_ratings_slugs, ignore_index=True)
print("Filtering and outputting to csv")
# Now that they're stacked,
# Filter just the columns we want
df_ratings_slugs = df_ratings_slugs[final_cols].sort_values(by=final_cols[1])
# And output to csv
df_ratings_slugs.to_csv(output_dir + 'af_ratings_slugs.csv', index=False)
print('done')