This notebook will be for obtaining the interest feilds for school children (and scientists if there is time


In [100]:
# --- importing libs.
from astropy.table import Table
from astropy.table import  join
import numpy as np
import pylab as plt
import pandas
%matplotlib inline

In [101]:
d = Table.read('/Users/khawkins/Desktop/AHW/LtaP/students.csv',format='csv') #load in data for students
d.colnames #print columns
enthu = np.array(d['On a scale of 1 to 5 stars, how excited are you about becoming a scientist?']) #grab student enthusasium

In [102]:
# --- check the feilds that are unique amoung the students 
fld_student = np.array(np.unique(d['If I could be any type of scientist when I grow up, I would want to study:']))
fld_student_all = d['If I could be any type of scientist when I grow up, I would want to study:']

In [105]:
#load data for scientists_1.csv and figure out the columns
scidat = pandas.read_csv('./scientists_1.csv')
print(scidat.columns)


Index(['Entry Id', 'Email', 'Name', 'Last',
       'Mailing Address (starting September 2017)', 'Address Line 2', 'City',
       'State  Province / Region', 'Postal / Zip Code', 'Country',
       'Occupation', 'Field',
       'We will match you with a pen pal who has expressed an interest in at least one of the following subjects. Which topic is most relevant to your work?',
       'If we can't find a match with your first choice, which topic is also relevant to your work?',
       'Did you have a pen pal in our program in the 2016-2017 school year?',
       'Would you like to write to the same pen pal again this year, if possible?',
       'Do you have any major commitments coming up this school year that might get in the way of being a good pen pal? (e.g. dissertation writing, extended field work)',
       'How do you plan to work around any obstacles to sending your letters on time?',
       'By submitting this form, you are making a commitment to being a pen pal for the entire school year (September through June). If we are not able to match you right away, we will keep you on our waiting list and email you as soon as there is a match available.\n\nWould you like to be matched with a pen pal?',
       'Date Created', 'Created By', 'Last Updated', 'Updated By',
       'IP Address', 'Last Page Accessed', 'Completion Status'],
      dtype='object')

In [106]:
# -- grabs the interests of the scientists
fld_sci = scidat['We will match you with a pen pal who has expressed an interest in at least one of the following subjects. Which topic is most relevant to your work?']
print(np.array(np.unique(fld_sci)))
print(scidat['Date Created'])


['Animals/medicine' 'Bacteria' 'Bacteria, generally microbiology'
 'Bacteria/viruses' 'Drinking water' 'Endocrinology/embryonic development '
 'Evolution, fossils, paleontology' 'General biology and/or heredity'
 'Genetics' 'Health' 'Human behavior' 'Human behavior or evolution'
 'Infectious disease' 'Infectious diseases' 'Insects'
 'Marine mammals and conservation' 'Medicine+computers'
 'Microbes (bacteria, fungi, etc)' 'Microbiology' 'Microbiology, bacteria!'
 'Psychology' 'Public Health' 'Social sciences, special education, autism'
 'Structural biology' 'Viruses/ bacteria' 'Water' 'Wildlife' 'animals'
 'archaeology' 'atoms and subatomic particles' 'bacteria'
 'beaches and sand' 'bee microbiome'
 'biomaterials for drug delivery, micro/nanofabrication, medical devices'
 'cancer immunology' 'cells' 'chemicals'
 'clean transportation technologies' 'computers' 'energy' 'engineering'
 'food' 'forensic science' 'genes' 'genetics'
 'genetics (especially how embryos use genes to grow)' 'germs' 'health'
 'health / disease' 'human culture and society' 'infectious disease'
 'insects' 'machines' 'math' 'matter' 'medicine' 'microbiomes'
 'music and sound' 'oceans' 'planets, Mars, Venus' 'plants'
 'proteins, cell division' 'psychology'
 'psychology/development/infants/cognitive science/the brain' 'rocks'
 'science writing' 'space' 'spinal cord' 'the brain' 'the earth'
 'the environment' 'the environment or oceans or rivers' 'the human body'
 'the planet' 'virus' 'viruses' 'viruses or bacteria' 'weather']
0      2017-07-12 09:47:45
1      2017-07-13 08:55:34
2      2017-07-13 08:55:37
3      2017-07-13 08:57:28
4      2017-07-13 08:57:32
5      2017-07-13 08:59:00
6      2017-07-13 08:59:19
7      2017-07-13 09:00:17
8      2017-07-13 09:01:33
9      2017-07-13 09:01:58
10     2017-07-13 09:04:13
11     2017-07-13 09:04:57
12     2017-07-13 09:05:17
13     2017-07-13 09:05:33
14     2017-07-13 09:05:47
15     2017-07-13 09:05:51
16     2017-07-13 09:06:21
17     2017-07-13 09:06:25
18     2017-07-13 09:07:45
19     2017-07-13 09:07:57
20     2017-07-13 09:08:52
21     2017-07-13 09:09:27
22     2017-07-13 09:09:59
23     2017-07-13 09:10:03
24     2017-07-13 09:10:52
25     2017-07-13 09:11:13
26     2017-07-13 09:13:25
27     2017-07-13 09:14:22
28     2017-07-13 09:14:25
29     2017-07-13 09:14:34
              ...         
710    2017-08-02 10:25:29
711    2017-08-02 14:31:51
712    2017-08-02 19:28:45
713    2017-08-02 19:33:19
714    2017-08-03 09:27:20
715    2017-08-03 09:58:31
716    2017-08-03 15:41:36
717    2017-08-03 20:47:13
718    2017-08-04 13:33:09
719    2017-08-04 14:44:48
720    2017-08-07 05:17:18
721    2017-08-07 09:50:53
722    2017-08-07 12:44:34
723    2017-08-07 19:30:16
724    2017-08-07 21:02:13
725    2017-08-08 10:48:28
726    2017-08-10 14:16:13
727    2017-08-15 13:01:09
728    2017-08-15 13:31:15
729    2017-08-16 12:27:31
730    2017-08-17 15:00:24
731    2017-08-19 13:30:07
732    2017-08-21 15:08:54
733    2017-08-22 11:29:39
734    2017-08-22 12:09:10
735    2017-08-22 16:11:48
736    2017-08-22 17:02:26
737    2017-08-23 16:32:01
738    2017-08-24 19:12:51
739    2017-08-24 20:30:42
Name: Date Created, Length: 740, dtype: object

In [107]:
#--- build the set of preselected interests
preselect_list = ['chemicals','weather','rocks','plants','engineering','energy','animals','machines',\
                  'the human body','medicine','oceans','the environment','space','the brain','matter',\
                  'computers','cells']

In [ ]:


In [109]:
#----The by-hand work by Sara, Kevin, and Keith to match an unqiue student feilds to the preselected ones. (Mainly done to force the 'other' option into the \
#the most approp. preselected interest)
student_by_hand = np.array(['the brain', 'medicine','medicine','the human body','matter','medical',   'computers','computers','space','space','animals','animals','the brain','cells','chemicals',\
                  'chemicals','computers','computers','animals','medicine','the human body','energy','energy','energy',\
                  'engineering','the human body','the human body','medicine','engineering','machines','matter','medicine','the human body',
                  'engineering','space','oceans','plants','machines','rocks','medicine', 'space','matter','the brain','the environment',\
                  'the human body','space','chemicals','computers','computers','rocks','oceans','the environment','animals','space'])


for i in np.arange(len(fld_student)):
    print('%s | %s\n'%(fld_student[i],student_by_hand[i]))


Art science | the brain

Docter | medicine

Doctor | medicine

Forensic Scientist | the human body

Matter,Machines,Space,Computers, and cells. | matter

Medical Examiner | medical

Technolygy | computers

Video games | computers

aliens | space

aliens and other monsters | space

anaimals | animals

animals | animals

ballet | the brain

cells | cells

chemestry | chemicals

chemicals | chemicals

compers | computers

computers | computers

dinosaurs | animals

disease | medicine

disecting | the human body

egner | energy

electricity | energy

energy | energy

engineering | engineering

forensic Scientist | the human body

forensics | the human body

i love all science genetics | medicine

lego's | engineering

machines | machines

matter | matter

medicine | medicine

nail artist | the human body

navy jets | engineering

no | space

oceans | oceans

plants | plants

robotics | machines

rocks | rocks

something else: detective | medicine

space | space

speed | matter

the brain | the brain

the environment | the environment

the human body | the human body

the solar system | space

toxic | chemicals

video game | computers

video games | computers

volcanoes | rocks

water | oceans

weather | the environment

wildlife | animals

writer | space


In [ ]:


In [110]:
#--- How many students have preselected options vs have 
preselect_list = np.array(preselect_list)
bad = 0
good =1
for i in np.arange(len(fld_student_all)):
    ind = np.where(preselect_list == fld_student_all[i])[0]
    if len(ind) == 0:
        #print('The child choose a non-preselected list option')
        non_preselected += 1
    else:
        #print('OK')
        good+=1
print(good,non_preselected)
fld_sci = np.array(fld_sci)
print(len(fld_sci))


491 40
740

In [111]:
allsci_match = []
for i in np.arange(len(fld_student_all)):
    ind = np.where(fld_sci == fld_student_all[i])[0]
    if len(ind) == 0:
        allsci_match.append(0)
    else:
        allsci_match.append(len(ind))
allsci_match = np.array(allsci_match)
print(allsci_match)
print(min(allsci_match))
nomatch = np.where(allsci_match == 0)[0]
print(len(nomatch))
plt.hist(allsci_match[allsci_match != 0],bins=30,histtype='step',lw=3)


[ 40  40  40  20   0  40  20  18  18 100 107  18  40  20  31  20  18  20
  40 100  40  18  18 100 100  43  20  42  40 100  43  40   0   0  43  20
   0  18  43  10  43  20  60  18  40  60   3 107  52  31   3 100 107 107
  40  43  60  40   3  60 100   0  18  18  25  13 100  60   0  18  13  13
   0  18  18 100 100 100  43  18   3  43   3  10  31  20 100  40 100  18
  20 100 100  18  40   3 100  52  18  40  40 100  40 100  43   0  13   0
 100  43 100  43  43  43  25  18 100  18  43  31  18  18  13  18 100  40
  43 100  43  60  40   0  43 100 100   0   0  18  40  40  31  61   0   0
  60  61  18  31  13   0 100  43  31 100   5  18  25  40  43  40 100  43
  60 107 100  18  31  40 107 100  60  60   3  31 100  43  60  20   0  60
   0 100  31  43  18  10 100  18   0  18  60  61 100  61  60  60  40  60
  31  18  61 107   0  43 100   0 100 100  40 100 100 100  61  25 100 100
 100  40   0  31   3  61  18  25 100  43  31 100   0  18   3  60  25  10
 100 100  31 100  31  25  40  42  20  18   0   0  20 100 100 100  43  40
 100   5 100 100  25  43  61   3  25  31 100 100  25 100 100 100  31  43
 100  40  18   3   3  60  18   0  18  40 100  13 100 100  18  20 107   0
   3   0  18  42  10   0  40  40  18  40 100  61  60 100  60  18 100 100
   5  40  43 100  61  40 100  10  18 100  18  60 100  25  25  42  25  18
  42 100  18 100  10  31   0  42 100 100 100  31  43  18   0  20   0 100
 100  40  20 100  18  60  25 100  43   3  40  18  31  20   0 100  18  31
  31   0  60  25  43  18 100 100 100  13 100  60 100   0  31 100  31 100
  31  43 100  20  42  18  43  18 100  20 100 100  31  20  25  18  18  43
  20  18  13  60  43  60 100  43 100  25 100  60  43  60  20  18 100  31
  31   3 100  18  31 100  43   0 100  43 100  31   0  18   0  60  18  40
 100  18  18  20  18  18  20  20 100   3 100 100  31  18  20  40  20  31
   0 100  20  40  18 100  40  43  60  18  18   3  31  43  60  31  40  10
  31  60  18   3  40  40  40  40  40   0   3 100  43  18 100 100  40  43
  43  60  10 100 100  20  40  18  40   3 100  18  40 100 100  18  40  40
   3 100 100  40  60  18  18 100   3 100 100   3  43  40   0 100  18  40
  40   3   3  61 100 100 100 100]
0
40
Out[111]:
(array([  28.,    0.,   18.,    0.,  103.,    0.,   17.,    0.,   35.,
           0.,   58.,   52.,    0.,    0.,    2.,    0.,   45.,    0.,
           0.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,
         124.,    0.,    8.]),
 array([   3.        ,    6.46666667,    9.93333333,   13.4       ,
          16.86666667,   20.33333333,   23.8       ,   27.26666667,
          30.73333333,   34.2       ,   37.66666667,   41.13333333,
          44.6       ,   48.06666667,   51.53333333,   55.        ,
          58.46666667,   61.93333333,   65.4       ,   68.86666667,
          72.33333333,   75.8       ,   79.26666667,   82.73333333,
          86.2       ,   89.66666667,   93.13333333,   96.6       ,
         100.06666667,  103.53333333,  107.        ]),
 <a list of 1 Patch objects>)

In [ ]:


In [ ]: