In [1]:
# TO DO:
# * Add NWEA targets
# * Add baseline to counts
# * Add conditional formatting to counts
# KNOWN ISSUES:
# * Salesforce won't accept multiple rows for the same student id, so can't upload scores from multiple time periods.
# * Testing grade level for prior year assessments (date=7/1/2017) is incorrect. Will batch-adjust update these at some point.
from cyschoolhousesuite import *
import xlwings as xw
import os
import numpy as np
import csv
In [2]:
### NEW ###
def gather_CYSH_Assessments_from_Schools(sch_ref_path="Z:\\ChiPrivate\\Chicago Data and Evaluation\\SY18\\SY18 Team Placement\\SY18 FINAL Team Placements.xlsx"):
"""
Reads in a spreadsheet of 'School', 'Informal School Name' (file-friendly), and 'cyconnect folder'.
Iterates through schools to gather each school's assessment spreadsheet.
Update file_path construction to suite your cyconnect file structure.
"""
sch_ref_df = pd.read_excel(sch_ref_path, sheetname="School Data")
all_read_dfs = []
for index, row in sch_ref_df.iterrows():
file_path = "Z:\\" + row["cyconnect Folder"] + "\\School Data\\CYSH Assessments - " + row['Informal School Name'] + ".xlsx"
try:
read_df = pd.read_excel(file_path, index_col=None, skiprows=1)
read_df = read_df[~read_df['School'].isnull()]
all_read_dfs.append(read_df)
except:
print("Failed: Loading "
+ file_path)
continue
all_read_dfs = pd.concat(all_read_dfs, axis = 0, ignore_index=True)
return all_read_dfs
def prep_course_performance_record_df():
"""
Get cyschoolhouse report of Course Performance FLs to use as official source of Local Student ID
This is just to get student details, not which section or program stduents are in, so we can drop duplicates on CY-ID
"""
CP_df = get_report("00O1a000002rm2S")
CP_df.rename(columns={'Student Id':'CY-ID'}, inplace=True)
cols = ['Account Name',
'Local Student ID',
'CY-ID',
'Student: Student Name',
'Student: Grade',
'Program: Program Name']
CP_df = CP_df[CP_df.columns.intersection(cols)]
enrollment_df = CP_df[['CY-ID', 'Program: Program Name']].pivot_table(index='CY-ID', columns='Program: Program Name', aggfunc=len, fill_value=np.nan)
enrollment_df = enrollment_df.reset_index(level='CY-ID')
enrollment_df.rename(columns={'Coaching: Attendance': 'ADA Enrollment',
'Tutoring: Literacy': 'ELA Enrollment',
'Tutoring: Math': 'MATH Enrollment'}, inplace=True)
CP_df.drop(['Program: Program Name'],axis=1, inplace=True)
CP_df.drop_duplicates(subset='CY-ID', inplace=True)
CP_df = CP_df.merge(enrollment_df, how='left', on='CY-ID')
return CP_df
def download_one_school_template(school_name, df, cysh_assmnt_name, driver):
# generate assessment by CY-ID
driver.get("https://c.na24.visual.force.com/apex/AssessmentWizard")
# Enter school name
driver.find_element_by_xpath("//select[@name='thePage:theform:groupBlock:j_id110:schoolSelect']/option[text()="
+ '"' + school_name + '"]').click()
sleep(1)
# click search and wait
driver.find_element_by_xpath(".//input[contains(@value, 'Search')]").click()
WebDriverWait(driver, 30).until(EC.presence_of_element_located((By.XPATH, ".//input[contains(@aria-controls, 'srchstdtable')]")))
# Check boxes for each student
failed_ids = []
for y in df['CY-ID'].unique():
try:
driver.find_element_by_xpath(".//input[contains(@aria-controls, 'srchstdtable')]").clear()
driver.find_element_by_xpath(".//input[contains(@aria-controls, 'srchstdtable')]").send_keys(y)
driver.find_element_by_xpath(".//input[contains(@onclick, '" + y + "')]").click()
except:
print('Error: filling template, could not select student: ' + y)
# set some variable
failed_ids.append(y)
if len(failed_ids) == len(df['CY-ID'].unique()):
print('Error: filling template, could not select any students from school')
df = []
return df, failed_ids
else:
pass
# Add selected students
driver.find_element_by_xpath('//*[@id="thePage:theform:groupBlock:j_id157"]').click()
WebDriverWait(driver, 30).until(EC.presence_of_element_located((By.XPATH, ".//input[contains(@value, 'Next') and contains(@type, 'button')]")))
# Click Next
driver.find_element_by_xpath(".//input[contains(@value, 'Next') and contains(@type, 'button')]").click()
WebDriverWait(driver, 30).until(EC.presence_of_element_located((By.XPATH, ".//a[contains(@class, 'btn black_btn') and contains(text(), 'Download Template')]")))
# Choose Assessment type
driver.find_element_by_xpath("//select[@name='thePage:theform:j_id160:j_id162']/option[text()='"
+ cysh_assmnt_name
+ "']").click()
sleep(3)
# Download
driver.find_element_by_xpath(".//a[contains(@class, 'btn black_btn') and contains(text(), 'Download Template')]").click()
# Accept pop-up to download assessment
EC.alert_is_present
driver.switch_to.alert.accept()
sleep(4)
list_of_files = [os.path.join(temp_path, x) for x in os.listdir(temp_path) if x.endswith(".csv")]
latest_file = max(list_of_files , key = os.path.getctime)
df = pd.read_csv(latest_file, header=1)
n_missing_ids = len(df[df['Student USID'].astype(object)=='null'])
if n_missing_ids > 0:
print('Warning: ' + str(n_missing_ids) + ' students have missing CPS IDs in cyschoolhouse')
if n_missing_ids == len(df['Student USID']):
print('Error: all students missing IDs in cyschoolhouse')
df = []
return df, failed_ids
df = df[df['Student USID'].astype(object) != 'null']
df['Student USID'] = df['Student USID'].astype(float)
return df, failed_ids
def upload_assessment_csv(upload_csv_path, driver):
driver.get("https://c.na24.visual.force.com/apex/AssessmentWizard")
driver.find_element_by_xpath(".//input[contains(@value, 'Import Assessments')]").click()
WebDriverWait(driver, 30).until(EC.presence_of_element_located((By.XPATH, '//*[@id="thePage:theForm:j_id116:j_id117"]')))
driver.find_element_by_xpath('//*[@id="thePage:theForm:j_id116:j_id117"]').send_keys(upload_csv_path)
driver.find_element_by_xpath(".//input[contains(@value, 'Import')]").click()
WebDriverWait(driver, 30).until(EC.presence_of_element_located((By.XPATH, '//*[@id="dataDiv"]')))
driver.find_element_by_xpath(".//input[contains(@value, 'Save')]").click()
sleep(5)
def fill_template(template_df, one_school_df, assessment_type):
df_cols = ['CPS ID', 'Grade', 'Score', 'Assessment Date']
df = template_df.merge(one_school_df[df_cols], how='left', left_on='Student USID', right_on='CPS ID')
df['Date Administered (Date_Administered__c)'] = df['Assessment Date']
if "ADA" in assessment_type:
df['Average Daily Attendance (Average_Daily_Attendance__c)'] = df['Score']
df.merge(assmt_schdl[['Date', 'Time Period']], left_on='Assessment Date', right_on='Date')
df['Time Period (Time_Period__c)'] = df['Time Period']
df = df.drop('Time Period', axis=1)
if "NWEA" in assessment_type:
df['0 to 300 Scaled Score (X0_to_300_Scaled_Score__c)'] = df['Score']
df['Testing Grade Level (Testing_Grade_Level__c)'] = df['Grade']
df = df.drop(df_cols, axis=1)
return df
In [40]:
##########################
# Gather New Assessments #
##########################
# Load Assessments, Dates, make 'Assessment_Date' key
assmt_schdl = pd.read_excel('input_files\\CHI-Assessment-Schedule.xlsx')
assmt_schdl['Date'] = assmt_schdl['Date'].dt.strftime('%m/%d/%Y')
assmt_schdl['Key'] = assmt_schdl['Assessment'] + "_" + assmt_schdl['Date']
assmt_schdl['Friendly Name'] = assmt_schdl['Subject'] + " " + assmt_schdl['Time Period']
In [4]:
# Gather cyconnect spreadsheets
SD_df = gather_CYSH_Assessments_from_Schools(sch_ref_path="Z:\\ChiPrivate\\Chicago Data and Evaluation\\SY18\\SY18 Team Placement\\SY18 FINAL Team Placements.xlsx")
SD_df.to_excel("Z:\\ChiPrivate\\Chicago Reports and Evaluation\\SY18\\SY18 Performance Data Audits\\CYSH Assessments - Whole Site.xlsx", index=False)
SD_df.drop(['Att', 'ELA', 'Math'], axis=1, inplace=True)
# Rename columns
SD_df.columns = list(SD_df)[0:5] + list(assmt_schdl['Key'])
id_cols = ['School', 'Student Name', 'CPS ID', 'CY-ID', 'Grade']
# melt so each row is an assessment score and assessment name
SD_df = SD_df.melt(id_vars=id_cols, var_name='Assessment', value_name='Score')
SD_df = SD_df[~SD_df['Score'].isnull()]
SD_df['Assessment'], SD_df['Assessment Date'] = SD_df['Assessment'].str.split('_', 1).str
SD_df['Key'] = SD_df['CY-ID'] + "_" + SD_df['Assessment'] + "_" + SD_df['Assessment Date']
assmt_df = get_report("00O1a000002rlOS")
assmt_df.rename(columns={'Student_Id':'CY-ID'}, inplace=True)
assmt_df['Date Administered'] = pd.to_datetime(assmt_df['Date Administered'])
assmt_df['Date Administered'] = assmt_df['Date Administered'].dt.strftime('%m/%d/%Y')
assmt_df['Key'] = assmt_df['CY-ID'] + "_" + assmt_df['Assessment Type: Display Value'] + "_" + assmt_df['Date Administered']
# remove CY-ID_assessment type_assessment date that occur in assessment report from CYSH
SD_df = SD_df[~SD_df['Key'].isin(assmt_df['Key'])]
print("{} new assessments".format(len(SD_df)))
In [145]:
##########################
# Upload New Assessments #
##########################
driver = get_driver()
open_cyschoolhouse18(driver)
all_failed_ids = []
for assessment_type in SD_df['Assessment'].unique():
one_assessment_df = SD_df.loc[SD_df['Assessment']==assessment_type]
for school_name in one_assessment_df['School'].unique():
print('Starting: {}: {}'.format(assessment_type, school_name))
one_school_df = one_assessment_df.loc[one_assessment_df['School']==school_name]
if len(one_school_df.loc[one_school_df['CY-ID'].duplicated()]) > 0:
print('Warning: {} entries were duplicated on CY-ID. Kept one. Re-run to upload the other.'.format(len(one_school_df.loc[one_school_df['CY-ID'].duplicated()])))
one_school_df.drop_duplicates('CY-ID', inplace=True)
if len(one_school_df.loc[one_school_df['CPS ID'].duplicated(keep=False)]) > 0:
print('Warning: {} entries were duplicated on CPS ID. Removed from analysis.'.format(len(one_school_df.loc[one_school_df['CPS ID'].duplicated()])))
one_school_df.drop_duplicates('CPS ID', inplace=True, keep=False)
template_df, failed_ids = download_one_school_template(school_name=school_name,
df=one_school_df,
cysh_assmnt_name=assessment_type,
driver=driver)
all_failed_ids = all_failed_ids + failed_ids
if len(template_df) == 0:
continue
# Load downloaded template file to get header
list_of_files = [os.path.join(temp_path, x) for x in os.listdir(temp_path) if x.endswith(".csv")]
latest_file = max(list_of_files , key = os.path.getctime)
og_header = [list(pd.read_csv(latest_file, header=0))[0:2]]
try:
template_df = fill_template(template_df, one_school_df, assessment_type)
except:
print("Failed: filling template")
continue
try:
# overwrite csv with filled template
template_df.to_csv(latest_file, index=False)
# load csv again to add header back in
with open(latest_file) as f:
reader = csv.reader(f)
for row in reader:
og_header.append(row)
# Write to csv again, now with added header row
final_df = pd.DataFrame(og_header)
final_df.to_csv(latest_file, index=False, header=False)
except:
print("Failed: writing .csv")
continue
try:
# Upload to cyschoolhouse
upload_assessment_csv(upload_csv_path=latest_file,
driver=driver)
#os.remove(latest_file)
except:
print("Failed: upoading .csv")
continue
print("Complete!")
#print("Starting Failed ID's")
#for student_id in all_failed_ids:
# one_student_df = one_assessment_df[one_assessment_df['CY-ID']==student_id]
# template_df = download_one_student_school_template(school_name=school_name,
# df=one_school_df,
# cysh_assmnt_name=assessment_type,
# driver=driver)
#
driver.quit()
In [ ]:
#########################
# Refresh Spreadshseets #
#########################
# Distribute current CYSH info to School Data folders
CP_df = prep_course_performance_record_df()
# Load all Assessments in cyschoolhouse
assmt_df = get_report("00O1a000002rlOS")
assmt_df.rename(columns={'Student_Id':'CY-ID'}, inplace=True)
assmt_df['Date Administered'] = pd.to_datetime(assmt_df['Date Administered']).dt.strftime('%m/%d/%Y')
assmt_df['Key'] = assmt_df['Assessment Type: Display Value'] + "_" + assmt_df['Date Administered']
assmt_df = assmt_df[assmt_df['Key'].isin(assmt_schdl['Key'])]
assmt_df['Value'] = assmt_df[['Average Daily Attendance', '0 to 300 Scaled Score']].sum(axis=1)
# Pivot assmt_df and merge to CP_df
assmt_pivot = assmt_df[['CY-ID','Key','Value']].pivot_table(index='CY-ID', columns='Key', values='Value', fill_value=np.nan)
assmt_pivot = assmt_pivot.reset_index(level='CY-ID')
assmt_pivot = CP_df.merge(assmt_pivot, how='left', on="CY-ID")
cols = [x for x in list(assmt_pivot) if x not in list(assmt_schdl['Key'])] + list(assmt_schdl['Key'])
cols_df = pd.DataFrame(columns=cols)
assmt_pivot = pd.concat([cols_df,assmt_pivot])[cols]
assmt_pivot.rename(columns=dict(zip(list(assmt_schdl['Key']), list(assmt_schdl['Friendly Name']))), inplace=True)
# Add NWEA Baseline cols
assmt_pivot['NWEA ELA Baseline'] = assmt_pivot[['NWEA ELA Prior Yr', 'NWEA ELA Fall']].bfill(axis=1).iloc[:, 0]
assmt_pivot['NWEA MATH Baseline'] = assmt_pivot[['NWEA MATH Prior Yr', 'NWEA MATH Fall']].bfill(axis=1).iloc[:, 0]
# Count number of assessments for enrolled students
assmt_counts_ADA = assmt_pivot[assmt_pivot['ADA Enrollment']>0].groupby(by='Account Name')[[x for x in assmt_pivot.columns if 'ADA' in x]].count()
assmt_counts_ELA = assmt_pivot[assmt_pivot['ELA Enrollment']>0].groupby(by='Account Name')[[x for x in assmt_pivot.columns if 'ELA' in x]].count()
assmt_counts_MATH = assmt_pivot[assmt_pivot['MATH Enrollment']>0].groupby(by='Account Name')[[x for x in assmt_pivot.columns if 'MATH' in x.upper()]].count()
assmt_counts = pd.concat([assmt_counts_ADA, assmt_counts_ELA, assmt_counts_MATH], axis=1)
# resort cols for baseline placement
cols = [x for x in list(assmt_counts) if x not in ['NWEA ELA Baseline', 'NWEA MATH Baseline']]
cols = cols[0:8] + ['NWEA ELA Baseline'] + cols[8:13] + ['NWEA MATH Baseline'] + cols[13:]
assmt_counts = assmt_counts[cols]
In [72]:
# Write counts to Excel
counts_path = 'Z:\\ChiPrivate\\Chicago Data and Evaluation\\SY18\\CYSH Assessment and Enrollment Counts.xlsx'
wb = xw.Book(counts_path)
sht = wb.sheets['Sheet1']
sht.range('B2:S27').clear_contents()
sht.range('B1').options(header=True, index=True).value = assmt_counts
wb.save(counts_path)
wb.close()
In [11]:
# Prep for school folders
cols = ['ELA Enrollment', 'MATH Enrollment', 'ADA Enrollment']
assmt_pivot[cols] = assmt_pivot[cols].replace((1, np.nan),('Yes', ''))
sch_ref_path = "Z:\\ChiPrivate\\Chicago Data and Evaluation\\SY18\\SY18 Team Placement\\SY18 FINAL Team Placements.xlsx"
sch_ref_df = pd.read_excel(sch_ref_path, sheetname="School Data")
sch_ref_df = sch_ref_df[pd.notnull(sch_ref_df['School'])]
assmt_pivot.sort_values(by='Student: Student Name', inplace=True)
In [9]:
# Add NWEA targets here
nwea_targets = pd.read_csv('C:\\Users\\CLuedtke\\GitHub\\cy-automation-library\\cyautomation\\cyschoolhouse\\input_files\\NWEA_Target_Scores.csv')
nwea_targets
# Remove NWEA baselines here
Out[9]:
In [104]:
# Write Assessment data to school folders
wb = xw.Book("Z:\\ChiPrivate\\Chicago Reports and Evaluation\\SY18\\Eval Management\\Templates\\CYSH Assessments Template.xlsx")
sht = wb.sheets['Assessment Data']
for x in assmt_pivot["Account Name"].unique():
sht.range('A3:Z300').clear_contents()
sht.range('A3').options(index=False, header=False).value = assmt_pivot[assmt_pivot['Account Name'] == x]
wb.save("Z:\\"
+ sch_ref_df['cyconnect Folder'][sch_ref_df['School'] == x].values[0]
+ "\\School Data\\CYSH Assessments - "
+ sch_ref_df['Informal School Name'][sch_ref_df['School'] == x].values[0]
+ ".xlsx")
wb.close()
In [ ]:
## Delete files in School Data folders
#for x in sch_ref_df['School'].unique():
# os.remove("Z:\\"
# + sch_ref_df['cyconnect Folder'][sch_ref_df['School'] == x].values[0]
# + "\\School Data\\SY18 cyschoolhouse Assessment Check - "
# + sch_ref_df['Informal School Name'][sch_ref_df['School'] == x].values[0]
# + ".xlsx")