In [100]:
# --- importing libs.
from astropy.table import Table
from astropy.table import join
import numpy as np
import pylab as plt
import pandas
%matplotlib inline
In [101]:
d = Table.read('/Users/khawkins/Desktop/AHW/LtaP/students.csv',format='csv') #load in data for students
d.colnames #print columns
enthu = np.array(d['On a scale of 1 to 5 stars, how excited are you about becoming a scientist?']) #grab student enthusasium
In [102]:
# --- check the feilds that are unique amoung the students
fld_student = np.array(np.unique(d['If I could be any type of scientist when I grow up, I would want to study:']))
fld_student_all = d['If I could be any type of scientist when I grow up, I would want to study:']
In [105]:
#load data for scientists_1.csv and figure out the columns
scidat = pandas.read_csv('./scientists_1.csv')
print(scidat.columns)
In [106]:
# -- grabs the interests of the scientists
fld_sci = scidat['We will match you with a pen pal who has expressed an interest in at least one of the following subjects. Which topic is most relevant to your work?']
print(np.array(np.unique(fld_sci)))
print(scidat['Date Created'])
In [107]:
#--- build the set of preselected interests
preselect_list = ['chemicals','weather','rocks','plants','engineering','energy','animals','machines',\
'the human body','medicine','oceans','the environment','space','the brain','matter',\
'computers','cells']
In [ ]:
In [109]:
#----The by-hand work by Sara, Kevin, and Keith to match an unqiue student feilds to the preselected ones. (Mainly done to force the 'other' option into the \
#the most approp. preselected interest)
student_by_hand = np.array(['the brain', 'medicine','medicine','the human body','matter','medical', 'computers','computers','space','space','animals','animals','the brain','cells','chemicals',\
'chemicals','computers','computers','animals','medicine','the human body','energy','energy','energy',\
'engineering','the human body','the human body','medicine','engineering','machines','matter','medicine','the human body',
'engineering','space','oceans','plants','machines','rocks','medicine', 'space','matter','the brain','the environment',\
'the human body','space','chemicals','computers','computers','rocks','oceans','the environment','animals','space'])
for i in np.arange(len(fld_student)):
print('%s | %s\n'%(fld_student[i],student_by_hand[i]))
In [ ]:
In [110]:
#--- How many students have preselected options vs have
preselect_list = np.array(preselect_list)
bad = 0
good =1
for i in np.arange(len(fld_student_all)):
ind = np.where(preselect_list == fld_student_all[i])[0]
if len(ind) == 0:
#print('The child choose a non-preselected list option')
non_preselected += 1
else:
#print('OK')
good+=1
print(good,non_preselected)
fld_sci = np.array(fld_sci)
print(len(fld_sci))
In [111]:
allsci_match = []
for i in np.arange(len(fld_student_all)):
ind = np.where(fld_sci == fld_student_all[i])[0]
if len(ind) == 0:
allsci_match.append(0)
else:
allsci_match.append(len(ind))
allsci_match = np.array(allsci_match)
print(allsci_match)
print(min(allsci_match))
nomatch = np.where(allsci_match == 0)[0]
print(len(nomatch))
plt.hist(allsci_match[allsci_match != 0],bins=30,histtype='step',lw=3)
Out[111]:
In [ ]:
In [ ]: