In [ ]:

    
# This adds slugs for our schools app
# To the AF ratings data we get from TEA

# First step is to create the csv files with slugs
# That's done in the Django app
# For districts, run:
# python manage.py exportslugs --shape=district
# For campuses, run: 
# python manage.py exportslugs --shape=campus

# The AF ratings csv is from TEA and is saved here:
# Google sheet here: https://docs.google.com/spreadsheets/d/1W8-eEgFmq4_zxfdN08cxyNe902mjWkso9coKB75Tok8/edit#gid=0

shapes = ['campus', 'district']
df_ratings_slugs = pd.DataFrame()

# Titlecase with exceptions
def title_except(s, exceptions):
    word_list = re.split(' ', s)       # re.split behaves as expected
    final = [word_list[0].capitalize()]
    for word in word_list[1:]:
        final.append(word if word in exceptions else word.capitalize())
    return " ".join(final)

# Format our cells
def format_cols(col, val, *args):
    if col == 'campus':
        return val \
            .str.replace(r' H S$', ' High School') \
            .str.replace(r' J H$', 'Junior High') \
            .str.replace(r' EL$', ' Elementary') \
            .str.replace(r' PRI$', ' Primary') \
            .str.replace(r' INT$', ' Intermediate') \
            .str.title()
    elif col == 'district':
        return val \
            .str.title() \
            .str.replace(r' Isd$', ' ISD') \
            .str.replace(r' Cisd$', ' CISD')
    if col == 'county':
        return val.str.title()


for shape in shapes:
    print("Working on " + shape)
    
    if shape == 'district':
        ratings_file = 'DIST'
    elif shape == 'campus':
        ratings_file = 'CAMP'
        
    # Read our files
    df_ratings = pd.read_excel(raw_dir + ratings_file + 'RATE.xlsx')
    df_slugs = pd.read_csv(raw_dir + shape + '_slugs.csv')
    
    # The columns we'll be selecting later
    final_cols = [
        "type", # 0
        "id", # 1
        "name", # 2
        "campus", # 3
        "district", # 4
        "county", # 5
        "overall", # 6
        "1_achievement", # 7
        "2_progress", # 8
        "2a_tests", # 9
        "2b_relative", # 10
        "3_gaps", # 11
        "student_pop", # 12
        "is_charter", # 13
        "is_single_campus", # 14
        "absolute_url" # 15
    ]
    
    df_ratings_slugs = df_ratings_slugs.reindex(columns=final_cols)
    
    # Set the type column
    # Either district or campus
    df_ratings[final_cols[0]] = shape
    
    # Rename columns from TEA ratings file
    df_ratings = df_ratings.rename(columns={
                                        shape.upper(): final_cols[1],
                                        "CAMPNAME": final_cols[3],
                                        "DISTNAME": final_cols[4],
                                        "CNTYNAME": final_cols[5],
                                        "D_RATING": final_cols[6],
                                        "DD1G": final_cols[7],
                                        "DD2G": final_cols[8],
                                        "DD2AG": final_cols[9],
                                        "DD2BG": final_cols[10],
                                        "DD3G": final_cols[11],
                                        "DPETALLC": final_cols[12],
                                        "DFLCHART": final_cols[13],
                                        "DFLSCD": final_cols[14]
                                    })
    
    # Change TEA ID column from ratings csv so we can merge
    df_ratings[final_cols[1]] = df_ratings[final_cols[1]].str.replace("'", "").astype('int')
    
    # Format some of the column values
#     if shape == 'campus':
#         df_ratings[final_cols[3]] = format_cols('campus', df_ratings[final_cols[3]], shape)
#     df_ratings[final_cols[4]] = format_cols('district', df_ratings[final_cols[4]], shape)
    df_ratings[final_cols[5]] = format_cols('county', df_ratings[final_cols[5]], shape)
    
    # Set name column to formatted campus or district name
    if shape == 'campus':
        df_ratings[final_cols[2]] = df_ratings[final_cols[3]]
    elif shape == 'district':
        df_ratings[final_cols[2]] = df_ratings[final_cols[4]]
        
    # Join them with slug file
    c_df_ratings_slugs = df_ratings.merge(df_slugs, left_on=final_cols[1], right_on='tea_id')
    
    # Output
    # We'll stack of our spreadsheets into one sheet
    df_ratings_slugs = df_ratings_slugs.append(c_df_ratings_slugs, ignore_index=True)
    
print("Filtering and outputting to csv")

# Now that they're stacked,
# Filter just the columns we want
df_ratings_slugs = df_ratings_slugs[final_cols].sort_values(by=final_cols[1])
    
# And output to csv
df_ratings_slugs.to_csv(output_dir + 'af_ratings_slugs.csv', index=False)
print('done')