In [1]:
# This function drops all the columns with too many 'PrivacySuppressed' and 'Null' or 'NaN'.
# The threshold used here

import pandas as pd
import numpy as np
# import os
# print(os.getcwd())

def getColumns(dataset, threshold=0.9):
    dataset.replace('PrivacvySuppressed', np.NaN)
    numOfRow = dataset.shape[0]
    nanList = dataset.isnull().sum().tolist()
    columnList = list(dataset.columns.values)
    colList = []
    for i in range(len(columnList)):
        column = columnList[i]
        if nanList[i] <= numOfRow * threshold:
            colList.append(column)
    return colList

def getRows(dataset, threshold=0.1):
    numOfRow = dataset.shape[0]
    numOfCol = dataset.shape[1]
    nanList = dataset.isnull().sum(axis=1).tolist()
    rowList = []
    schoolList = []
    idList = []
    for index, row in dataset.iterrows():
        if nanList[index] < numOfCol * threshold:
            rowList.append(index)
            schoolList.append(row['INSTNM'])
            idList.append(row['OPEID'])
    return set(idList)

raw11 = pd.read_csv('../../data/rawData/MERGED2010_11_PP.csv', sep=',', error_bad_lines=False, index_col=False, dtype='unicode')
print('read in raw data: 2011')
raw12 = pd.read_csv('../../data/rawData/MERGED2011_12_PP.csv', sep=',', error_bad_lines=False, index_col=False, dtype='unicode')
print('read in raw data: 2012')
raw13 = pd.read_csv('../../data/rawData/MERGED2012_13_PP.csv', sep=',', error_bad_lines=False, index_col=False, dtype='unicode')
print('read in raw data: 2013')
raw14 = pd.read_csv('../../data/rawData/MERGED2013_14_PP.csv', sep=',', error_bad_lines=False, index_col=False, dtype='unicode')
print('read in raw data: 2014')
raw15 = pd.read_csv('../../data/rawData/MERGED2014_15_PP.csv', sep=',', error_bad_lines=False, index_col=False, dtype='unicode')
print('read in raw data: 2015')
rawDataList = [raw11, raw12, raw13, raw14, raw15]
nameList = ['year11.csv', 'year12.csv','year13.csv','year14.csv','year15.csv']

# Use the newest data set of 20 as base for choosing columns
thresholdColumn = 0.9
columnList = getColumns(raw15, thresholdColumn)
print('This is the number of columns that we use with a NaN threshold of: ' + str(thresholdColumn))
print(len(columnList))
reducedColumnData = []

for i in range(len(rawDataList)):
	temp = rawDataList[i]
	reduced = temp[columnList]
	reducedColumnData.append(reduced)


read in raw data: 2011
read in raw data: 2012
read in raw data: 2013
read in raw data: 2014
read in raw data: 2015
This is the number of columns that we use with a NaN threshold of: 0.9
569

In [3]:
# Use the intersection of rows to choose rows
rowListSet = []
thresholdRow = 0.9
rowSet11 = getRows(reducedColumnData[0], thresholdRow)
rowSet12 = getRows(reducedColumnData[1], thresholdRow)
rowSet13 = getRows(reducedColumnData[2], thresholdRow)
rowSet14 = getRows(reducedColumnData[3], thresholdRow)
rowSet15 = getRows(reducedColumnData[4], thresholdRow)
rowList = set.intersection(rowSet11, rowSet12, rowSet13, rowSet14, rowSet15)
print('This is number of school OPEID that we use with a NaN threshold of: ' + str(thresholdRow))
print(len(rowList))


This is number of school OPEID that we use with a NaN threshold of: 0.9
6456

In [21]:
temp = reducedColumnData[0]
temp.columns.values


Out[21]:
array(['\ufeffUNITID', 'OPEID', 'OPEID6', 'INSTNM', 'CITY', 'STABBR',
       'ZIP', 'ACCREDAGENCY', 'INSTURL', 'NPCURL', 'HCM2', 'MAIN',
       'NUMBRANCH', 'PREDDEG', 'HIGHDEG', 'CONTROL', 'ST_FIPS', 'REGION',
       'LOCALE', 'LATITUDE', 'LONGITUDE', 'CCBASIC', 'CCUGPROF',
       'CCSIZSET', 'HBCU', 'PBI', 'ANNHI', 'TRIBAL', 'AANAPII', 'HSI',
       'NANTI', 'MENONLY', 'WOMENONLY', 'RELAFFIL', 'ADM_RATE',
       'ADM_RATE_ALL', 'SATVR25', 'SATVR75', 'SATMT25', 'SATMT75',
       'SATVRMID', 'SATMTMID', 'ACTCM25', 'ACTCM75', 'ACTEN25', 'ACTEN75',
       'ACTMT25', 'ACTMT75', 'ACTCMMID', 'ACTENMID', 'ACTMTMID', 'SAT_AVG',
       'SAT_AVG_ALL', 'PCIP01', 'PCIP03', 'PCIP04', 'PCIP05', 'PCIP09',
       'PCIP10', 'PCIP11', 'PCIP12', 'PCIP13', 'PCIP14', 'PCIP15',
       'PCIP16', 'PCIP19', 'PCIP22', 'PCIP23', 'PCIP24', 'PCIP25',
       'PCIP26', 'PCIP27', 'PCIP29', 'PCIP30', 'PCIP31', 'PCIP38',
       'PCIP39', 'PCIP40', 'PCIP41', 'PCIP42', 'PCIP43', 'PCIP44',
       'PCIP45', 'PCIP46', 'PCIP47', 'PCIP48', 'PCIP49', 'PCIP50',
       'PCIP51', 'PCIP52', 'PCIP54', 'CIP01CERT1', 'CIP01CERT2',
       'CIP01ASSOC', 'CIP01CERT4', 'CIP01BACHL', 'CIP03CERT1',
       'CIP03CERT2', 'CIP03ASSOC', 'CIP03CERT4', 'CIP03BACHL',
       'CIP04CERT1', 'CIP04CERT2', 'CIP04ASSOC', 'CIP04CERT4',
       'CIP04BACHL', 'CIP05CERT1', 'CIP05CERT2', 'CIP05ASSOC',
       'CIP05CERT4', 'CIP05BACHL', 'CIP09CERT1', 'CIP09CERT2',
       'CIP09ASSOC', 'CIP09CERT4', 'CIP09BACHL', 'CIP10CERT1',
       'CIP10CERT2', 'CIP10ASSOC', 'CIP10CERT4', 'CIP10BACHL',
       'CIP11CERT1', 'CIP11CERT2', 'CIP11ASSOC', 'CIP11CERT4',
       'CIP11BACHL', 'CIP12CERT1', 'CIP12CERT2', 'CIP12ASSOC',
       'CIP12CERT4', 'CIP12BACHL', 'CIP13CERT1', 'CIP13CERT2',
       'CIP13ASSOC', 'CIP13CERT4', 'CIP13BACHL', 'CIP14CERT1',
       'CIP14CERT2', 'CIP14ASSOC', 'CIP14CERT4', 'CIP14BACHL',
       'CIP15CERT1', 'CIP15CERT2', 'CIP15ASSOC', 'CIP15CERT4',
       'CIP15BACHL', 'CIP16CERT1', 'CIP16CERT2', 'CIP16ASSOC',
       'CIP16CERT4', 'CIP16BACHL', 'CIP19CERT1', 'CIP19CERT2',
       'CIP19ASSOC', 'CIP19CERT4', 'CIP19BACHL', 'CIP22CERT1',
       'CIP22CERT2', 'CIP22ASSOC', 'CIP22CERT4', 'CIP22BACHL',
       'CIP23CERT1', 'CIP23CERT2', 'CIP23ASSOC', 'CIP23CERT4',
       'CIP23BACHL', 'CIP24CERT1', 'CIP24CERT2', 'CIP24ASSOC',
       'CIP24CERT4', 'CIP24BACHL', 'CIP25CERT1', 'CIP25CERT2',
       'CIP25ASSOC', 'CIP25CERT4', 'CIP25BACHL', 'CIP26CERT1',
       'CIP26CERT2', 'CIP26ASSOC', 'CIP26CERT4', 'CIP26BACHL',
       'CIP27CERT1', 'CIP27CERT2', 'CIP27ASSOC', 'CIP27CERT4',
       'CIP27BACHL', 'CIP29CERT1', 'CIP29CERT2', 'CIP29ASSOC',
       'CIP29CERT4', 'CIP29BACHL', 'CIP30CERT1', 'CIP30CERT2',
       'CIP30ASSOC', 'CIP30CERT4', 'CIP30BACHL', 'CIP31CERT1',
       'CIP31CERT2', 'CIP31ASSOC', 'CIP31CERT4', 'CIP31BACHL',
       'CIP38CERT1', 'CIP38CERT2', 'CIP38ASSOC', 'CIP38CERT4',
       'CIP38BACHL', 'CIP39CERT1', 'CIP39CERT2', 'CIP39ASSOC',
       'CIP39CERT4', 'CIP39BACHL', 'CIP40CERT1', 'CIP40CERT2',
       'CIP40ASSOC', 'CIP40CERT4', 'CIP40BACHL', 'CIP41CERT1',
       'CIP41CERT2', 'CIP41ASSOC', 'CIP41CERT4', 'CIP41BACHL',
       'CIP42CERT1', 'CIP42CERT2', 'CIP42ASSOC', 'CIP42CERT4',
       'CIP42BACHL', 'CIP43CERT1', 'CIP43CERT2', 'CIP43ASSOC',
       'CIP43CERT4', 'CIP43BACHL', 'CIP44CERT1', 'CIP44CERT2',
       'CIP44ASSOC', 'CIP44CERT4', 'CIP44BACHL', 'CIP45CERT1',
       'CIP45CERT2', 'CIP45ASSOC', 'CIP45CERT4', 'CIP45BACHL',
       'CIP46CERT1', 'CIP46CERT2', 'CIP46ASSOC', 'CIP46CERT4',
       'CIP46BACHL', 'CIP47CERT1', 'CIP47CERT2', 'CIP47ASSOC',
       'CIP47CERT4', 'CIP47BACHL', 'CIP48CERT1', 'CIP48CERT2',
       'CIP48ASSOC', 'CIP48CERT4', 'CIP48BACHL', 'CIP49CERT1',
       'CIP49CERT2', 'CIP49ASSOC', 'CIP49CERT4', 'CIP49BACHL',
       'CIP50CERT1', 'CIP50CERT2', 'CIP50ASSOC', 'CIP50CERT4',
       'CIP50BACHL', 'CIP51CERT1', 'CIP51CERT2', 'CIP51ASSOC',
       'CIP51CERT4', 'CIP51BACHL', 'CIP52CERT1', 'CIP52CERT2',
       'CIP52ASSOC', 'CIP52CERT4', 'CIP52BACHL', 'CIP54CERT1',
       'CIP54CERT2', 'CIP54ASSOC', 'CIP54CERT4', 'CIP54BACHL',
       'DISTANCEONLY', 'UGDS', 'UGDS_WHITE', 'UGDS_BLACK', 'UGDS_HISP',
       'UGDS_ASIAN', 'UGDS_AIAN', 'UGDS_NHPI', 'UGDS_2MOR', 'UGDS_NRA',
       'UGDS_UNKN', 'PPTUG_EF', 'CURROPER', 'NPT4_PUB', 'NPT4_PRIV',
       'NPT41_PUB', 'NPT42_PUB', 'NPT43_PUB', 'NPT44_PUB', 'NPT45_PUB',
       'NPT41_PRIV', 'NPT42_PRIV', 'NPT43_PRIV', 'NPT44_PRIV',
       'NPT45_PRIV', 'NPT4_048_PUB', 'NPT4_048_PRIV', 'NPT4_3075_PUB',
       'NPT4_3075_PRIV', 'NPT4_75UP_PUB', 'NPT4_75UP_PRIV', 'NUM4_PUB',
       'NUM4_PRIV', 'NUM41_PUB', 'NUM42_PUB', 'NUM43_PUB', 'NUM44_PUB',
       'NUM45_PUB', 'NUM41_PRIV', 'NUM42_PRIV', 'NUM43_PRIV', 'NUM44_PRIV',
       'NUM45_PRIV', 'COSTT4_A', 'COSTT4_P', 'TUITIONFEE_IN',
       'TUITIONFEE_OUT', 'TUITIONFEE_PROG', 'TUITFTE', 'INEXPFTE',
       'AVGFACSAL', 'PFTFAC', 'PCTPELL', 'C150_4', 'C150_L4',
       'C150_4_POOLED', 'C150_L4_POOLED', 'POOLYRS', 'PFTFTUG1_EF',
       'D150_4', 'D150_L4', 'D150_4_POOLED', 'D150_L4_POOLED',
       'C150_4_WHITE', 'C150_4_BLACK', 'C150_4_HISP', 'C150_4_ASIAN',
       'C150_4_AIAN', 'C150_4_2MOR', 'C150_4_NRA', 'C150_4_UNKN',
       'C150_L4_WHITE', 'C150_L4_BLACK', 'C150_L4_HISP', 'C150_L4_ASIAN',
       'C150_L4_AIAN', 'C150_L4_2MOR', 'C150_L4_UNKN', 'C200_4', 'C200_L4',
       'D200_4', 'D200_L4', 'RET_FT4', 'RET_FTL4', 'RET_PT4', 'RET_PTL4',
       'C200_4_POOLED', 'C200_L4_POOLED', 'POOLYRS200', 'D200_4_POOLED',
       'D200_L4_POOLED', 'PCTFLOAN', 'CDR3', 'RPY_3YR_RT',
       'COMPL_RPY_3YR_RT', 'NONCOM_RPY_3YR_RT', 'LO_INC_RPY_3YR_RT',
       'MD_INC_RPY_3YR_RT', 'HI_INC_RPY_3YR_RT', 'DEP_RPY_3YR_RT',
       'IND_RPY_3YR_RT', 'PELL_RPY_3YR_RT', 'NOPELL_RPY_3YR_RT',
       'FEMALE_RPY_3YR_RT', 'MALE_RPY_3YR_RT', 'FIRSTGEN_RPY_3YR_RT',
       'NOTFIRSTGEN_RPY_3YR_RT', 'RPY_5YR_RT', 'COMPL_RPY_5YR_RT',
       'NONCOM_RPY_5YR_RT', 'LO_INC_RPY_5YR_RT', 'MD_INC_RPY_5YR_RT',
       'HI_INC_RPY_5YR_RT', 'DEP_RPY_5YR_RT', 'IND_RPY_5YR_RT',
       'PELL_RPY_5YR_RT', 'NOPELL_RPY_5YR_RT', 'FEMALE_RPY_5YR_RT',
       'MALE_RPY_5YR_RT', 'FIRSTGEN_RPY_5YR_RT', 'NOTFIRSTGEN_RPY_5YR_RT',
       'RPY_7YR_RT', 'COMPL_RPY_7YR_RT', 'NONCOM_RPY_7YR_RT',
       'LO_INC_RPY_7YR_RT', 'MD_INC_RPY_7YR_RT', 'HI_INC_RPY_7YR_RT',
       'DEP_RPY_7YR_RT', 'IND_RPY_7YR_RT', 'PELL_RPY_7YR_RT',
       'NOPELL_RPY_7YR_RT', 'FEMALE_RPY_7YR_RT', 'MALE_RPY_7YR_RT',
       'FIRSTGEN_RPY_7YR_RT', 'NOTFIRSTGEN_RPY_7YR_RT', 'INC_PCT_LO',
       'DEP_STAT_PCT_IND', 'DEP_INC_PCT_LO', 'IND_INC_PCT_LO',
       'PAR_ED_PCT_1STGEN', 'INC_PCT_M1', 'INC_PCT_M2', 'INC_PCT_H1',
       'INC_PCT_H2', 'DEP_INC_PCT_M1', 'DEP_INC_PCT_M2', 'DEP_INC_PCT_H1',
       'DEP_INC_PCT_H2', 'IND_INC_PCT_M1', 'IND_INC_PCT_M2',
       'IND_INC_PCT_H1', 'IND_INC_PCT_H2', 'PAR_ED_PCT_MS',
       'PAR_ED_PCT_HS', 'PAR_ED_PCT_PS', 'APPL_SCH_PCT_GE2',
       'APPL_SCH_PCT_GE3', 'APPL_SCH_PCT_GE4', 'APPL_SCH_PCT_GE5',
       'DEP_INC_AVG', 'IND_INC_AVG', 'DEBT_MDN', 'GRAD_DEBT_MDN',
       'WDRAW_DEBT_MDN', 'LO_INC_DEBT_MDN', 'MD_INC_DEBT_MDN',
       'HI_INC_DEBT_MDN', 'DEP_DEBT_MDN', 'IND_DEBT_MDN', 'PELL_DEBT_MDN',
       'NOPELL_DEBT_MDN', 'FEMALE_DEBT_MDN', 'MALE_DEBT_MDN',
       'FIRSTGEN_DEBT_MDN', 'NOTFIRSTGEN_DEBT_MDN', 'DEBT_N',
       'GRAD_DEBT_N', 'WDRAW_DEBT_N', 'LO_INC_DEBT_N', 'MD_INC_DEBT_N',
       'HI_INC_DEBT_N', 'DEP_DEBT_N', 'IND_DEBT_N', 'PELL_DEBT_N',
       'NOPELL_DEBT_N', 'FEMALE_DEBT_N', 'MALE_DEBT_N', 'FIRSTGEN_DEBT_N',
       'NOTFIRSTGEN_DEBT_N', 'GRAD_DEBT_MDN10YR', 'CUML_DEBT_N',
       'CUML_DEBT_P90', 'CUML_DEBT_P75', 'CUML_DEBT_P25', 'CUML_DEBT_P10',
       'INC_N', 'DEP_INC_N', 'IND_INC_N', 'DEP_STAT_N', 'PAR_ED_N',
       'APPL_SCH_N', 'RPY_3YR_N', 'COMPL_RPY_3YR_N', 'NONCOM_RPY_3YR_N',
       'LO_INC_RPY_3YR_N', 'MD_INC_RPY_3YR_N', 'HI_INC_RPY_3YR_N',
       'DEP_RPY_3YR_N', 'IND_RPY_3YR_N', 'PELL_RPY_3YR_N',
       'NOPELL_RPY_3YR_N', 'FEMALE_RPY_3YR_N', 'MALE_RPY_3YR_N',
       'FIRSTGEN_RPY_3YR_N', 'NOTFIRSTGEN_RPY_3YR_N', 'RPY_5YR_N',
       'COMPL_RPY_5YR_N', 'NONCOM_RPY_5YR_N', 'LO_INC_RPY_5YR_N',
       'MD_INC_RPY_5YR_N', 'HI_INC_RPY_5YR_N', 'DEP_RPY_5YR_N',
       'IND_RPY_5YR_N', 'PELL_RPY_5YR_N', 'NOPELL_RPY_5YR_N',
       'FEMALE_RPY_5YR_N', 'MALE_RPY_5YR_N', 'FIRSTGEN_RPY_5YR_N',
       'NOTFIRSTGEN_RPY_5YR_N', 'RPY_7YR_N', 'COMPL_RPY_7YR_N',
       'NONCOM_RPY_7YR_N', 'LO_INC_RPY_7YR_N', 'MD_INC_RPY_7YR_N',
       'HI_INC_RPY_7YR_N', 'DEP_RPY_7YR_N', 'IND_RPY_7YR_N',
       'PELL_RPY_7YR_N', 'NOPELL_RPY_7YR_N', 'FEMALE_RPY_7YR_N',
       'MALE_RPY_7YR_N', 'FIRSTGEN_RPY_7YR_N', 'NOTFIRSTGEN_RPY_7YR_N',
       'LOAN_EVER', 'PELL_EVER', 'AGE_ENTRY', 'FEMALE', 'MARRIED',
       'DEPENDENT', 'VETERAN', 'FIRST_GEN', 'FAMINC', 'MD_FAMINC',
       'FAMINC_IND', 'DEBT_MDN_SUPP', 'GRAD_DEBT_MDN_SUPP',
       'GRAD_DEBT_MDN10YR_SUPP', 'RPY_3YR_RT_SUPP',
       'LO_INC_RPY_3YR_RT_SUPP', 'MD_INC_RPY_3YR_RT_SUPP',
       'HI_INC_RPY_3YR_RT_SUPP', 'COMPL_RPY_3YR_RT_SUPP',
       'NONCOM_RPY_3YR_RT_SUPP', 'DEP_RPY_3YR_RT_SUPP',
       'IND_RPY_3YR_RT_SUPP', 'PELL_RPY_3YR_RT_SUPP',
       'NOPELL_RPY_3YR_RT_SUPP', 'FEMALE_RPY_3YR_RT_SUPP',
       'MALE_RPY_3YR_RT_SUPP', 'FIRSTGEN_RPY_3YR_RT_SUPP',
       'NOTFIRSTGEN_RPY_3YR_RT_SUPP', 'C150_L4_POOLED_SUPP',
       'C150_4_POOLED_SUPP', 'C200_L4_POOLED_SUPP', 'C200_4_POOLED_SUPP',
       'ALIAS', 'C100_4', 'D100_4', 'C100_L4', 'D100_L4', 'TRANS_4',
       'DTRANS_4', 'TRANS_L4', 'DTRANS_L4', 'ICLEVEL', 'UGDS_MEN',
       'UGDS_WOMEN', 'CDR3_DENOM'], dtype=object)

In [22]:
for i in range(0, len(reducedColumnData)):
    df = reducedColumnData[i]
    temp = df.loc[df['OPEID'].isin(rowList)]

In [23]:
reducedDataList = []
for i in range(0, len(reducedColumnData)):
    df = reducedColumnData[i]
    temp = df.loc[df['OPEID'].isin(rowList)]
    reducedDataList.append(temp)

In [31]:
reducedDataList[0].shape, reducedDataList[1].shape, reducedDataList[2].shape, reducedDataList[3].shape, reducedDataList[4].shape,


Out[31]:
((6462, 569), (6478, 569), (6486, 569), (6489, 569), (6485, 569))

In [30]:
# Output supportive dataset
# path = '../../data/reducedData/'
# for i in range(len(reducedDataList)):
#     filename = path + nameList[i]
#     temp = reducedDataList[i]
#     temp.to_csv(filename)
#     print(nameList[i] + ' converted.')


# Merge the data set
finalDF = pd.DataFrame()
year11 = reducedDataList[0]
year12 = reducedDataList[1]
year13 = reducedDataList[2]
year14 = reducedDataList[3]
year15 = reducedDataList[4]
for column in columnList:
#     print(type(year15[column][1]))
    if year15[column][1].isdigit() or isinstance(year15[column][1], float):
        print(year15[column].map(lambda x: int(x) * 0.4))
        print('nextcol')
#         finalDF[column] = year15[column].map(lambda x: int(x) * 0.4) + year14[column].map(lambda x: int(x) * 0.3)
    else:
        finalDF[column] = year11[column]
        
        
# Output as cleaned-up data
# finalDF.to_csv('../../data/cleanedUp.csv')


0          40261.6
1          40265.2
2          40276.0
3          40282.4
4          40289.6
5          40300.4
6          40304.0
7          40324.8
8          40332.0
9          40343.2
10         40374.8
11         40411.2
12         40429.2
13         40446.4
14         40457.2
15         40464.4
16         40475.6
17         40496.0
19         40514.4
20         40518.0
21         40520.4
22         40546.0
23         40574.0
24         40581.2
25         40584.8
26         40592.0
27         40599.6
28         40602.0
29         40605.6
30         40616.4
           ...    
7650    17974640.4
7651    17988000.4
7652    17988000.8
7653    17988001.2
7655    18015080.4
7656    18022840.4
7657    18022840.8
7658    18126520.4
7659    18126520.8
7660    18126521.2
7661    18126521.6
7662    18126522.0
7663    18126522.4
7664    18128600.4
7665    18128600.8
7666    18128601.2
7675    18356760.4
7676    18356760.8
7677    18356761.2
7678    18356761.6
7679    18356762.0
7680    18356762.4
7681    18356762.8
7682    18356763.2
7683    18358560.4
7684    18358560.8
7685    18358920.4
7686    18358920.8
7687    18358921.2
7688    18358921.6
Name: UNITID, dtype: float64
nextcol
0         40080.0
1         42080.0
2       1001360.0
3         42200.0
4         40200.0
5         42040.0
6         40280.0
7         40320.0
8        332400.0
9         40360.0
10        40480.0
11       487280.0
12       422160.0
13       521562.4
14        40600.0
15        42400.0
16        40120.0
17        40680.0
19        40720.0
20       314840.0
21       227960.0
22       384842.8
23        40760.0
24       879880.0
25       210400.0
26        40800.0
27        40840.0
28        40880.0
29        40520.0
30        40920.0
          ...    
7650    1636960.4
7651    1646920.4
7652    1646920.8
7653    1646921.2
7655      58389.2
7656     347767.6
7657     347762.0
7658      58372.0
7659      58372.4
7660      58373.6
7661      58377.6
7662      58385.2
7663      58385.6
7664      58384.8
7665      58393.2
7666      58394.0
7675      58376.0
7676      58378.4
7677      58380.8
7678      58383.6
7679      58386.0
7680      58388.4
7681      58388.8
7682      58398.4
7683      58378.0
7684      58381.6
7685      58396.0
7686      58396.8
7687      58397.6
7688      58398.0
Name: OPEID, dtype: float64
nextcol
0         400.8
1         420.8
2       10013.6
3         422.0
4         402.0
5         420.4
6         402.8
7         403.2
8        3324.0
9         403.6
10        404.8
11       4872.8
12       4221.6
13       5215.6
14        406.0
15        424.0
16        401.2
17        406.8
19        407.2
20       3148.4
21       2279.6
22       3848.4
23        407.6
24       8798.8
25       2104.0
26        408.0
27        408.4
28        408.8
29        405.2
30        409.2
         ...   
7650    16369.6
7651    16469.2
7652    16469.2
7653    16469.2
7655      583.6
7656     3477.6
7657     3477.6
7658      583.6
7659      583.6
7660      583.6
7661      583.6
7662      583.6
7663      583.6
7664      583.6
7665      583.6
7666      583.6
7675      583.6
7676      583.6
7677      583.6
7678      583.6
7679      583.6
7680      583.6
7681      583.6
7682      583.6
7683      583.6
7684      583.6
7685      583.6
7686      583.6
7687      583.6
7688      583.6
Name: OPEID6, dtype: float64
nextcol
0       0
1       0
2       0
3       0
4       0
5       0
6       0
7       0
8       0
9       0
10      0
11      0
12      0
13      0
14      0
15      0
16      0
17      0
19      0
20      0
21      0
22      0
23      0
24      0
25      0
26      0
27      0
28      0
29      0
30      0
       ..
7650    0
7651    0
7652    0
7653    0
7655    0
7656    0
7657    0
7658    0
7659    0
7660    0
7661    0
7662    0
7663    0
7664    0
7665    0
7666    0
7675    0
7676    0
7677    0
7678    0
7679    0
7680    0
7681    0
7682    0
7683    0
7684    0
7685    0
7686    0
7687    0
7688    0
Name: HCM2, dtype: float64
nextcol
0       0.4
1       0.4
2       0.4
3       0.4
4       0.4
5       0.4
6       0.4
7       0.4
8       0.4
9       0.4
10      0.4
11      0.4
12      0.4
13      0.0
14      0.4
15      0.4
16      0.4
17      0.4
19      0.4
20      0.4
21      0.4
22      0.0
23      0.4
24      0.4
25      0.4
26      0.4
27      0.4
28      0.4
29      0.4
30      0.4
       ... 
7650    0.0
7651    0.0
7652    0.0
7653    0.0
7655    0.0
7656    0.0
7657    0.0
7658    0.0
7659    0.0
7660    0.0
7661    0.0
7662    0.0
7663    0.0
7664    0.0
7665    0.0
7666    0.0
7675    0.0
7676    0.0
7677    0.0
7678    0.0
7679    0.0
7680    0.0
7681    0.0
7682    0.0
7683    0.0
7684    0.0
7685    0.0
7686    0.0
7687    0.0
7688    0.0
Name: MAIN, dtype: float64
nextcol
0        0.4
1        0.4
2        0.4
3        0.4
4        0.4
5        0.4
6        0.4
7        0.4
8        0.4
9        0.4
10       0.4
11       0.4
12       0.4
13       6.4
14       0.4
15       0.4
16       0.4
17       0.4
19       0.4
20       0.4
21       0.4
22       4.4
23       0.4
24       0.4
25       0.4
26       0.4
27       0.4
28       0.4
29       0.4
30       0.4
        ... 
7650     0.8
7651     1.6
7652     1.6
7653     1.6
7655    32.0
7656     9.2
7657     9.2
7658    32.0
7659    32.0
7660    32.0
7661    32.0
7662    32.0
7663    32.0
7664    32.0
7665    32.0
7666    32.0
7675    32.0
7676    32.0
7677    32.0
7678    32.0
7679    32.0
7680    32.0
7681    32.0
7682    32.0
7683    32.0
7684    32.0
7685    32.0
7686    32.0
7687    32.0
7688    32.0
Name: NUMBRANCH, dtype: float64
nextcol
0       1.2
1       1.2
2       1.2
3       1.2
4       1.2
5       1.2
6       0.8
7       1.2
8       1.2
9       1.2
10      1.2
11      0.8
12      1.2
13      0.8
14      0.8
15      0.8
16      1.2
17      0.8
19      0.8
20      0.8
21      0.8
22      0.4
23      1.2
24      1.2
25      0.8
26      1.2
27      0.8
28      0.8
29      0.8
30      1.2
       ... 
7650    0.0
7651    0.0
7652    0.0
7653    0.0
7655    0.0
7656    0.0
7657    0.0
7658    0.0
7659    0.0
7660    0.0
7661    0.0
7662    0.0
7663    0.0
7664    0.0
7665    0.0
7666    0.0
7675    0.0
7676    0.0
7677    0.0
7678    0.0
7679    0.0
7680    0.0
7681    0.0
7682    0.0
7683    0.0
7684    0.0
7685    0.0
7686    0.0
7687    0.0
7688    0.0
Name: PREDDEG, dtype: float64
nextcol
0       1.6
1       1.6
2       1.6
3       1.6
4       1.6
5       1.6
6       0.8
7       1.2
8       1.6
9       1.6
10      1.2
11      0.8
12      1.2
13      1.6
14      0.8
15      0.8
16      1.6
17      0.8
19      0.8
20      0.8
21      0.8
22      1.6
23      1.2
24      1.6
25      0.8
26      1.6
27      0.8
28      0.8
29      0.8
30      1.2
       ... 
7650    0.0
7651    0.0
7652    0.0
7653    0.0
7655    0.0
7656    0.0
7657    0.0
7658    0.0
7659    0.0
7660    0.0
7661    0.0
7662    0.0
7663    0.0
7664    0.0
7665    0.0
7666    0.0
7675    0.0
7676    0.0
7677    0.0
7678    0.0
7679    0.0
7680    0.0
7681    0.0
7682    0.0
7683    0.0
7684    0.0
7685    0.0
7686    0.0
7687    0.0
7688    0.0
Name: HIGHDEG, dtype: float64
nextcol
0       0.4
1       0.4
2       0.8
3       0.4
4       0.4
5       0.4
6       0.4
7       0.4
8       0.4
9       0.4
10      0.8
11      0.4
12      0.8
13      1.2
14      0.4
15      0.4
16      0.8
17      0.4
19      0.4
20      0.4
21      0.4
22      1.2
23      0.8
24      0.8
25      0.4
26      0.4
27      0.4
28      0.4
29      0.4
30      0.8
       ... 
7650    1.2
7651    1.2
7652    1.2
7653    1.2
7655    1.2
7656    1.2
7657    1.2
7658    1.2
7659    1.2
7660    1.2
7661    1.2
7662    1.2
7663    1.2
7664    1.2
7665    1.2
7666    1.2
7675    1.2
7676    1.2
7677    1.2
7678    1.2
7679    1.2
7680    1.2
7681    1.2
7682    1.2
7683    1.2
7684    1.2
7685    1.2
7686    1.2
7687    1.2
7688    1.2
Name: CONTROL, dtype: float64
nextcol
0        0.4
1        0.4
2        0.4
3        0.4
4        0.4
5        0.4
6        0.4
7        0.4
8        0.4
9        0.4
10       0.4
11       0.4
12       0.4
13       0.4
14       0.4
15       0.4
16       0.4
17       0.4
19       0.4
20       0.4
21       0.4
22       0.4
23       0.4
24       0.4
25       0.4
26       0.4
27       0.4
28       0.4
29       0.4
30       0.4
        ... 
7650    14.4
7651    15.6
7652    15.6
7653    15.6
7655     0.4
7656    22.0
7657    22.0
7658    14.8
7659    14.8
7660    14.8
7661    14.8
7662    14.8
7663    14.8
7664    13.6
7665    13.6
7666    13.6
7675     5.2
7676     5.2
7677     5.2
7678     5.2
7679     5.2
7680     5.2
7681     5.2
7682     5.2
7683    18.0
7684    18.0
7685    19.2
7686    19.2
7687    19.2
7688    19.2
Name: ST_FIPS, dtype: float64
nextcol
0       2.0
1       2.0
2       2.0
3       2.0
4       2.0
5       2.0
6       2.0
7       2.0
8       2.0
9       2.0
10      2.0
11      2.0
12      2.0
13      2.0
14      2.0
15      2.0
16      2.0
17      2.0
19      2.0
20      2.0
21      2.0
22      2.0
23      2.0
24      2.0
25      2.0
26      2.0
27      2.0
28      2.0
29      2.0
30      2.0
       ... 
7650    0.8
7651    1.2
7652    1.2
7653    1.2
7655    2.0
7656    1.2
7657    1.2
7658    2.0
7659    2.0
7660    2.0
7661    2.0
7662    2.0
7663    2.0
7664    0.8
7665    0.8
7666    0.8
7675    2.0
7676    2.0
7677    2.0
7678    2.0
7679    2.0
7680    2.0
7681    2.0
7682    2.0
7683    2.0
7684    2.0
7685    2.4
7686    2.4
7687    2.4
7688    2.4
Name: REGION, dtype: float64
nextcol
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-30-4e9d21125557> in <module>()
     18 #     print(type(year15[column][1]))
     19     if year15[column][1].isdigit() or isinstance(year15[column][1], float):
---> 20         print(year15[column].map(lambda x: int(x) * 0.4))
     21         print('nextcol')
     22 #         finalDF[column] = year15[column].map(lambda x: int(x) * 0.4) + year14[column].map(lambda x: int(x) * 0.3)

/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/pandas/core/series.py in map(self, arg, na_action)
   2043                                      index=self.index).__finalize__(self)
   2044         else:
-> 2045             mapped = map_f(values, arg)
   2046             return self._constructor(mapped,
   2047                                      index=self.index).__finalize__(self)

pandas/src/inference.pyx in pandas.lib.map_infer (pandas/lib.c:62187)()

<ipython-input-30-4e9d21125557> in <lambda>(x)
     18 #     print(type(year15[column][1]))
     19     if year15[column][1].isdigit() or isinstance(year15[column][1], float):
---> 20         print(year15[column].map(lambda x: int(x) * 0.4))
     21         print('nextcol')
     22 #         finalDF[column] = year15[column].map(lambda x: int(x) * 0.4) + year14[column].map(lambda x: int(x) * 0.3)

ValueError: cannot convert float NaN to integer

In [ ]: