In [1]:
# TO DO:
#  * Add NWEA targets
#  * Add baseline to counts
#  * Add conditional formatting to counts

# KNOWN ISSUES:
#  * Salesforce won't accept multiple rows for the same student id, so can't upload scores from multiple time periods.
#  * Testing grade level for prior year assessments (date=7/1/2017) is incorrect. Will batch-adjust update these at some point.

from cyschoolhousesuite import *
import xlwings as xw
import os
import numpy as np
import csv

In [2]:
### NEW ###
def gather_CYSH_Assessments_from_Schools(sch_ref_path="Z:\\ChiPrivate\\Chicago Data and Evaluation\\SY18\\SY18 Team Placement\\SY18 FINAL Team Placements.xlsx"):
    """
    Reads in a spreadsheet of 'School', 'Informal School Name' (file-friendly), and 'cyconnect folder'.
    Iterates through schools to gather each school's assessment spreadsheet.
    Update file_path construction to suite your cyconnect file structure.
    """
    sch_ref_df = pd.read_excel(sch_ref_path, sheetname="School Data")
    all_read_dfs = []
    for index, row in sch_ref_df.iterrows():
        file_path = "Z:\\" + row["cyconnect Folder"] + "\\School Data\\CYSH Assessments - " + row['Informal School Name'] + ".xlsx"
        try:
            read_df = pd.read_excel(file_path, index_col=None, skiprows=1)
            read_df = read_df[~read_df['School'].isnull()]
            all_read_dfs.append(read_df)
        except:
            print("Failed: Loading " 
                  + file_path)
            continue
    all_read_dfs = pd.concat(all_read_dfs, axis = 0, ignore_index=True)
    return all_read_dfs

def prep_course_performance_record_df():
    """
    Get cyschoolhouse report of Course Performance FLs to use as official source of Local Student ID
    This is just to get student details, not which section or program stduents are in, so we can drop duplicates on CY-ID
    """
    CP_df = get_report("00O1a000002rm2S")
    
    CP_df.rename(columns={'Student Id':'CY-ID'}, inplace=True)
               
    cols = ['Account Name',
            'Local Student ID',
            'CY-ID',
            'Student: Student Name',
            'Student: Grade',
            'Program: Program Name']

    CP_df = CP_df[CP_df.columns.intersection(cols)]

    enrollment_df = CP_df[['CY-ID', 'Program: Program Name']].pivot_table(index='CY-ID', columns='Program: Program Name', aggfunc=len, fill_value=np.nan)
    enrollment_df = enrollment_df.reset_index(level='CY-ID')
    enrollment_df.rename(columns={'Coaching: Attendance': 'ADA Enrollment',
                                  'Tutoring: Literacy': 'ELA Enrollment',
                                  'Tutoring: Math': 'MATH Enrollment'}, inplace=True)
    
    CP_df.drop(['Program: Program Name'],axis=1, inplace=True)
    CP_df.drop_duplicates(subset='CY-ID', inplace=True)

    CP_df = CP_df.merge(enrollment_df, how='left', on='CY-ID')
    
    return CP_df

def download_one_school_template(school_name, df, cysh_assmnt_name, driver):
    # generate assessment by CY-ID
    driver.get("https://c.na24.visual.force.com/apex/AssessmentWizard")
    
    # Enter school name
    driver.find_element_by_xpath("//select[@name='thePage:theform:groupBlock:j_id110:schoolSelect']/option[text()="
                                 + '"' + school_name + '"]').click()
    sleep(1)
    
    # click search and wait
    driver.find_element_by_xpath(".//input[contains(@value, 'Search')]").click()
    WebDriverWait(driver, 30).until(EC.presence_of_element_located((By.XPATH, ".//input[contains(@aria-controls, 'srchstdtable')]")))
    
    # Check boxes for each student
    failed_ids = []
    for y in df['CY-ID'].unique():
        try:
            driver.find_element_by_xpath(".//input[contains(@aria-controls, 'srchstdtable')]").clear()
            driver.find_element_by_xpath(".//input[contains(@aria-controls, 'srchstdtable')]").send_keys(y)
            driver.find_element_by_xpath(".//input[contains(@onclick, '" + y + "')]").click()
        except:
            print('Error: filling template, could not select student: ' + y)
            # set some variable
            failed_ids.append(y)
            if len(failed_ids) == len(df['CY-ID'].unique()):
                print('Error: filling template, could not select any students from school')
                df = []
                return df, failed_ids
            else:
                pass
    
    # Add selected students
    driver.find_element_by_xpath('//*[@id="thePage:theform:groupBlock:j_id157"]').click()
    WebDriverWait(driver, 30).until(EC.presence_of_element_located((By.XPATH, ".//input[contains(@value, 'Next') and contains(@type, 'button')]")))
    # Click Next
    driver.find_element_by_xpath(".//input[contains(@value, 'Next') and contains(@type, 'button')]").click()
    WebDriverWait(driver, 30).until(EC.presence_of_element_located((By.XPATH, ".//a[contains(@class, 'btn black_btn') and contains(text(), 'Download Template')]")))

    # Choose Assessment type
    driver.find_element_by_xpath("//select[@name='thePage:theform:j_id160:j_id162']/option[text()='" 
                                 + cysh_assmnt_name
                                 + "']").click()
    sleep(3)
    
    # Download
    driver.find_element_by_xpath(".//a[contains(@class, 'btn black_btn') and contains(text(), 'Download Template')]").click()
    
    # Accept pop-up to download assessment
    EC.alert_is_present
    driver.switch_to.alert.accept()
    sleep(4)
    list_of_files = [os.path.join(temp_path, x) for x in os.listdir(temp_path) if x.endswith(".csv")]
    latest_file = max(list_of_files , key = os.path.getctime)
    df = pd.read_csv(latest_file, header=1)
    n_missing_ids = len(df[df['Student USID'].astype(object)=='null'])
    if n_missing_ids > 0:
        print('Warning: ' + str(n_missing_ids) + ' students have missing CPS IDs in cyschoolhouse')
    if n_missing_ids == len(df['Student USID']):
        print('Error: all students missing IDs in cyschoolhouse')
        df = []
        return df, failed_ids
    df = df[df['Student USID'].astype(object) != 'null']
    df['Student USID'] = df['Student USID'].astype(float)
    return df, failed_ids

def upload_assessment_csv(upload_csv_path, driver):
    driver.get("https://c.na24.visual.force.com/apex/AssessmentWizard")
    driver.find_element_by_xpath(".//input[contains(@value, 'Import Assessments')]").click()
    WebDriverWait(driver, 30).until(EC.presence_of_element_located((By.XPATH, '//*[@id="thePage:theForm:j_id116:j_id117"]')))
    driver.find_element_by_xpath('//*[@id="thePage:theForm:j_id116:j_id117"]').send_keys(upload_csv_path)
    driver.find_element_by_xpath(".//input[contains(@value, 'Import')]").click()
    WebDriverWait(driver, 30).until(EC.presence_of_element_located((By.XPATH, '//*[@id="dataDiv"]')))
    driver.find_element_by_xpath(".//input[contains(@value, 'Save')]").click()
    sleep(5)
    
def fill_template(template_df, one_school_df, assessment_type):
    df_cols = ['CPS ID', 'Grade', 'Score', 'Assessment Date']
    
    df = template_df.merge(one_school_df[df_cols], how='left', left_on='Student USID', right_on='CPS ID')
    
    df['Date Administered (Date_Administered__c)'] = df['Assessment Date']

    if "ADA" in assessment_type:
        df['Average Daily Attendance (Average_Daily_Attendance__c)'] = df['Score']
        df.merge(assmt_schdl[['Date', 'Time Period']], left_on='Assessment Date', right_on='Date')
        df['Time Period (Time_Period__c)'] = df['Time Period']
        df = df.drop('Time Period', axis=1)
        
    if "NWEA" in assessment_type:
        df['0 to 300 Scaled Score (X0_to_300_Scaled_Score__c)'] = df['Score']
        df['Testing Grade Level (Testing_Grade_Level__c)'] = df['Grade']

    df = df.drop(df_cols, axis=1)
    
    return df

In [40]:
##########################
# Gather New Assessments #
##########################

# Load Assessments, Dates, make 'Assessment_Date' key
assmt_schdl = pd.read_excel('input_files\\CHI-Assessment-Schedule.xlsx')
assmt_schdl['Date'] = assmt_schdl['Date'].dt.strftime('%m/%d/%Y')
assmt_schdl['Key'] = assmt_schdl['Assessment'] + "_" + assmt_schdl['Date']
assmt_schdl['Friendly Name'] = assmt_schdl['Subject'] + " " + assmt_schdl['Time Period']

In [4]:
# Gather cyconnect spreadsheets
SD_df = gather_CYSH_Assessments_from_Schools(sch_ref_path="Z:\\ChiPrivate\\Chicago Data and Evaluation\\SY18\\SY18 Team Placement\\SY18 FINAL Team Placements.xlsx")
SD_df.to_excel("Z:\\ChiPrivate\\Chicago Reports and Evaluation\\SY18\\SY18 Performance Data Audits\\CYSH Assessments - Whole Site.xlsx", index=False)
SD_df.drop(['Att', 'ELA', 'Math'], axis=1, inplace=True)

# Rename columns
SD_df.columns = list(SD_df)[0:5] + list(assmt_schdl['Key'])

id_cols = ['School', 'Student Name', 'CPS ID', 'CY-ID', 'Grade']

# melt so each row is an assessment score and assessment name
SD_df = SD_df.melt(id_vars=id_cols, var_name='Assessment', value_name='Score')
SD_df = SD_df[~SD_df['Score'].isnull()]
SD_df['Assessment'], SD_df['Assessment Date'] = SD_df['Assessment'].str.split('_', 1).str
SD_df['Key'] = SD_df['CY-ID'] + "_" + SD_df['Assessment'] + "_" + SD_df['Assessment Date']

assmt_df = get_report("00O1a000002rlOS")
assmt_df.rename(columns={'Student_Id':'CY-ID'}, inplace=True)
assmt_df['Date Administered'] = pd.to_datetime(assmt_df['Date Administered'])
assmt_df['Date Administered'] = assmt_df['Date Administered'].dt.strftime('%m/%d/%Y')
assmt_df['Key'] = assmt_df['CY-ID'] + "_" + assmt_df['Assessment Type: Display Value'] + "_" + assmt_df['Date Administered']

# remove CY-ID_assessment type_assessment date that occur in assessment report from CYSH
SD_df = SD_df[~SD_df['Key'].isin(assmt_df['Key'])]

print("{} new assessments".format(len(SD_df)))


0 new assessments

In [145]:
##########################
# Upload New Assessments #
##########################

driver = get_driver()
open_cyschoolhouse18(driver)

all_failed_ids = []

for assessment_type in SD_df['Assessment'].unique():
    one_assessment_df = SD_df.loc[SD_df['Assessment']==assessment_type]
    
    for school_name in one_assessment_df['School'].unique():
        print('Starting: {}: {}'.format(assessment_type, school_name))
        one_school_df = one_assessment_df.loc[one_assessment_df['School']==school_name]
        
        if len(one_school_df.loc[one_school_df['CY-ID'].duplicated()]) > 0:
            print('Warning: {} entries were duplicated on CY-ID. Kept one. Re-run to upload the other.'.format(len(one_school_df.loc[one_school_df['CY-ID'].duplicated()])))
            one_school_df.drop_duplicates('CY-ID', inplace=True)
        
        if len(one_school_df.loc[one_school_df['CPS ID'].duplicated(keep=False)]) > 0:
            print('Warning: {} entries were duplicated on CPS ID. Removed from analysis.'.format(len(one_school_df.loc[one_school_df['CPS ID'].duplicated()])))
            one_school_df.drop_duplicates('CPS ID', inplace=True, keep=False)
        
        template_df, failed_ids = download_one_school_template(school_name=school_name, 
                                                               df=one_school_df, 
                                                               cysh_assmnt_name=assessment_type, 
                                                               driver=driver)
        all_failed_ids = all_failed_ids + failed_ids
        
        if len(template_df) == 0:
            continue
        
        # Load downloaded template file to get header
        list_of_files = [os.path.join(temp_path, x) for x in os.listdir(temp_path) if x.endswith(".csv")]
        latest_file = max(list_of_files , key = os.path.getctime)
        og_header = [list(pd.read_csv(latest_file, header=0))[0:2]]
        
        try:
            template_df = fill_template(template_df, one_school_df, assessment_type)
        except:
            print("Failed: filling template")
            continue
            
        try:
            # overwrite csv with filled template
            template_df.to_csv(latest_file, index=False)

            # load csv again to add header back in
            with open(latest_file) as f:
                reader = csv.reader(f)
                for row in reader:
                    og_header.append(row)

            # Write to csv again, now with added header row
            final_df = pd.DataFrame(og_header)
            final_df.to_csv(latest_file, index=False, header=False)
        except:
            print("Failed: writing .csv")
            continue
                
        try:
            # Upload to cyschoolhouse
            upload_assessment_csv(upload_csv_path=latest_file,
                                  driver=driver)
            #os.remove(latest_file)
        except:
            print("Failed: upoading .csv")
            continue
            
        print("Complete!")
        
    #print("Starting Failed ID's")
    #for student_id in all_failed_ids:
    #    one_student_df = one_assessment_df[one_assessment_df['CY-ID']==student_id]
    #    template_df = download_one_student_school_template(school_name=school_name,
    #                                                       df=one_school_df, 
    #                                                       cysh_assmnt_name=assessment_type, 
    #                                                       driver=driver)
    #

driver.quit()


Starting: NWEA - ELA: Amos Alonzo Stagg Elementary School of Excellence
Complete!

In [ ]:
#########################
# Refresh Spreadshseets #
#########################

# Distribute current CYSH info to School Data folders
CP_df = prep_course_performance_record_df()

# Load all Assessments in cyschoolhouse
assmt_df = get_report("00O1a000002rlOS")
assmt_df.rename(columns={'Student_Id':'CY-ID'}, inplace=True)
assmt_df['Date Administered'] = pd.to_datetime(assmt_df['Date Administered']).dt.strftime('%m/%d/%Y')
assmt_df['Key'] = assmt_df['Assessment Type: Display Value'] + "_" + assmt_df['Date Administered']
assmt_df = assmt_df[assmt_df['Key'].isin(assmt_schdl['Key'])]
assmt_df['Value'] = assmt_df[['Average Daily Attendance', '0 to 300 Scaled Score']].sum(axis=1)

# Pivot assmt_df and merge to CP_df
assmt_pivot = assmt_df[['CY-ID','Key','Value']].pivot_table(index='CY-ID', columns='Key', values='Value', fill_value=np.nan)
assmt_pivot = assmt_pivot.reset_index(level='CY-ID')
assmt_pivot = CP_df.merge(assmt_pivot, how='left', on="CY-ID")
cols = [x for x in list(assmt_pivot) if x not in list(assmt_schdl['Key'])] + list(assmt_schdl['Key'])
cols_df = pd.DataFrame(columns=cols)
assmt_pivot = pd.concat([cols_df,assmt_pivot])[cols]
assmt_pivot.rename(columns=dict(zip(list(assmt_schdl['Key']), list(assmt_schdl['Friendly Name']))), inplace=True)

# Add NWEA Baseline cols
assmt_pivot['NWEA ELA Baseline'] = assmt_pivot[['NWEA ELA Prior Yr', 'NWEA ELA Fall']].bfill(axis=1).iloc[:, 0]
assmt_pivot['NWEA MATH Baseline'] = assmt_pivot[['NWEA MATH Prior Yr', 'NWEA MATH Fall']].bfill(axis=1).iloc[:, 0]

# Count number of assessments for enrolled students
assmt_counts_ADA = assmt_pivot[assmt_pivot['ADA Enrollment']>0].groupby(by='Account Name')[[x for x in assmt_pivot.columns if 'ADA' in x]].count()
assmt_counts_ELA = assmt_pivot[assmt_pivot['ELA Enrollment']>0].groupby(by='Account Name')[[x for x in assmt_pivot.columns if 'ELA' in x]].count()
assmt_counts_MATH = assmt_pivot[assmt_pivot['MATH Enrollment']>0].groupby(by='Account Name')[[x for x in assmt_pivot.columns if 'MATH' in x.upper()]].count()
assmt_counts = pd.concat([assmt_counts_ADA, assmt_counts_ELA, assmt_counts_MATH], axis=1)

# resort cols for baseline placement
cols = [x for x in list(assmt_counts) if x not in ['NWEA ELA Baseline', 'NWEA MATH Baseline']]
cols = cols[0:8] + ['NWEA ELA Baseline'] + cols[8:13] + ['NWEA MATH Baseline'] + cols[13:]
assmt_counts = assmt_counts[cols]

In [72]:
# Write counts to Excel
counts_path = 'Z:\\ChiPrivate\\Chicago Data and Evaluation\\SY18\\CYSH Assessment and Enrollment Counts.xlsx'
wb = xw.Book(counts_path)
sht = wb.sheets['Sheet1']
sht.range('B2:S27').clear_contents()
sht.range('B1').options(header=True, index=True).value = assmt_counts
wb.save(counts_path)
wb.close()

In [11]:
# Prep for school folders
cols = ['ELA Enrollment', 'MATH Enrollment', 'ADA Enrollment']
assmt_pivot[cols] = assmt_pivot[cols].replace((1, np.nan),('Yes', ''))

sch_ref_path = "Z:\\ChiPrivate\\Chicago Data and Evaluation\\SY18\\SY18 Team Placement\\SY18 FINAL Team Placements.xlsx"
sch_ref_df = pd.read_excel(sch_ref_path, sheetname="School Data")
sch_ref_df = sch_ref_df[pd.notnull(sch_ref_df['School'])]
assmt_pivot.sort_values(by='Student: Student Name', inplace=True)

In [9]:
# Add NWEA targets here
nwea_targets = pd.read_csv('C:\\Users\\CLuedtke\\GitHub\\cy-automation-library\\cyautomation\\cyschoolhouse\\input_files\\NWEA_Target_Scores.csv')
nwea_targets

# Remove NWEA baselines here


Out[9]:
Subject Ending Grade StartRIT R22 Key
0 Math 3 100 22 Math3100
1 Math 3 101 22 Math3101
2 Math 3 102 22 Math3102
3 Math 3 103 21 Math3103
4 Math 3 104 21 Math3104
5 Math 3 105 21 Math3105
6 Math 3 106 21 Math3106
7 Math 3 107 21 Math3107
8 Math 3 108 21 Math3108
9 Math 3 109 21 Math3109
10 Math 3 110 21 Math3110
11 Math 3 111 21 Math3111
12 Math 3 112 20 Math3112
13 Math 3 113 20 Math3113
14 Math 3 114 20 Math3114
15 Math 3 115 20 Math3115
16 Math 3 116 20 Math3116
17 Math 3 117 20 Math3117
18 Math 3 118 20 Math3118
19 Math 3 119 20 Math3119
20 Math 3 120 20 Math3120
21 Math 3 121 19 Math3121
22 Math 3 122 19 Math3122
23 Math 3 123 19 Math3123
24 Math 3 124 19 Math3124
25 Math 3 125 19 Math3125
26 Math 3 126 19 Math3126
27 Math 3 127 19 Math3127
28 Math 3 128 19 Math3128
29 Math 3 129 19 Math3129
... ... ... ... ... ...
3582 Reading 8 371 -20 Reading8371
3583 Reading 8 372 -20 Reading8372
3584 Reading 8 373 -20 Reading8373
3585 Reading 8 374 -20 Reading8374
3586 Reading 8 375 -20 Reading8375
3587 Reading 8 376 -20 Reading8376
3588 Reading 8 377 -20 Reading8377
3589 Reading 8 378 -21 Reading8378
3590 Reading 8 379 -21 Reading8379
3591 Reading 8 380 -21 Reading8380
3592 Reading 8 381 -21 Reading8381
3593 Reading 8 382 -21 Reading8382
3594 Reading 8 383 -21 Reading8383
3595 Reading 8 384 -21 Reading8384
3596 Reading 8 385 -22 Reading8385
3597 Reading 8 386 -22 Reading8386
3598 Reading 8 387 -22 Reading8387
3599 Reading 8 388 -22 Reading8388
3600 Reading 8 389 -22 Reading8389
3601 Reading 8 390 -22 Reading8390
3602 Reading 8 391 -22 Reading8391
3603 Reading 8 392 -23 Reading8392
3604 Reading 8 393 -23 Reading8393
3605 Reading 8 394 -23 Reading8394
3606 Reading 8 395 -23 Reading8395
3607 Reading 8 396 -23 Reading8396
3608 Reading 8 397 -23 Reading8397
3609 Reading 8 398 -23 Reading8398
3610 Reading 8 399 -24 Reading8399
3611 Reading 8 400 -24 Reading8400

3612 rows × 5 columns


In [104]:
# Write Assessment data to school folders
wb = xw.Book("Z:\\ChiPrivate\\Chicago Reports and Evaluation\\SY18\\Eval Management\\Templates\\CYSH Assessments Template.xlsx")
sht = wb.sheets['Assessment Data']

for x in assmt_pivot["Account Name"].unique():
    sht.range('A3:Z300').clear_contents()
    sht.range('A3').options(index=False, header=False).value = assmt_pivot[assmt_pivot['Account Name'] == x]
    wb.save("Z:\\" 
            + sch_ref_df['cyconnect Folder'][sch_ref_df['School'] == x].values[0]
            + "\\School Data\\CYSH Assessments - "
            + sch_ref_df['Informal School Name'][sch_ref_df['School'] == x].values[0]
            + ".xlsx")

wb.close()

In [ ]:
## Delete files in School Data folders
#for x in sch_ref_df['School'].unique():
#    os.remove("Z:\\" 
#              + sch_ref_df['cyconnect Folder'][sch_ref_df['School'] == x].values[0]
#              + "\\School Data\\SY18 cyschoolhouse Assessment Check - "
#              + sch_ref_df['Informal School Name'][sch_ref_df['School'] == x].values[0]
#              + ".xlsx")