In [1]:
from pyspark.sql import SparkSession

spark = SparkSession \
    .builder \
    .appName("Calculate Distances") \
    .getOrCreate()

In [2]:
import string
PATH_RAWDATA = '../rawdata/'
PATH_PROCESSEDDATA = '../processeddata/'
DRIV_DIST_FN='driv_dist.parquet'
PATH_BING = '../rawdata/bing_results/'
INCOME_FN = 'ACS_15_5YR_S1903b.csv'
FINANCIAL_FN = 'ACS_15_5YR_DP03b.csv'
ZCARBON_FN = 'zcarbon.csv'

In [3]:
dfDist = spark.read.parquet(PATH_PROCESSEDDATA+DRIV_DIST_FN)
dfDist.printSchema()
dfDist.count()


root
 |-- UATYPE: string (nullable = true)
 |-- ddist: double (nullable = true)
 |-- dist: double (nullable = true)
 |-- id: string (nullable = true)
 |-- m_house_unit: long (nullable = true)
 |-- m_id: long (nullable = true)
 |-- m_land: double (nullable = true)
 |-- m_lat_d: double (nullable = true)
 |-- m_lat_r: double (nullable = true)
 |-- m_long_d: double (nullable = true)
 |-- m_long_r: double (nullable = true)
 |-- m_pop: long (nullable = true)
 |-- m_water: double (nullable = true)
 |-- name: string (nullable = true)
 |-- z_house_unit: long (nullable = true)
 |-- z_id: long (nullable = true)
 |-- z_land: double (nullable = true)
 |-- z_lat_d: double (nullable = true)
 |-- z_lat_r: double (nullable = true)
 |-- z_long_d: double (nullable = true)
 |-- z_long_r: double (nullable = true)
 |-- z_pop: long (nullable = true)
 |-- z_water: double (nullable = true)

Out[3]:
111727

In [4]:
dfIncomesRaw = spark.read.load(PATH_RAWDATA+INCOME_FN, format="csv", delimiter=",", header=True, inferSchema=True)
dfIncomesRaw.count()
dfIncomesRaw.printSchema()

dfIncomes = dfIncomesRaw.select('GEOid2','HC01_EST_VC02','HC02_EST_VC02')\
                            .withColumnRenamed('GEOid2','z_id')\
                            .withColumnRenamed('HC01_EST_VC02','z_households')\
                            .withColumnRenamed('HC02_EST_VC02', 'z_med_inc')
dfIncomes = dfIncomes.select(dfIncomes.z_id.cast('long'),\
                             dfIncomes.z_households.cast('long'),\
                             dfIncomes.z_med_inc.cast('long'))
dfIncomes.printSchema()


root
 |-- GEOid: string (nullable = true)
 |-- GEOid2: integer (nullable = true)
 |-- GEOdisplaylabel: string (nullable = true)
 |-- HC01_EST_VC02: integer (nullable = true)
 |-- HC01_MOE_VC02: integer (nullable = true)
 |-- HC02_EST_VC02: integer (nullable = true)
 |-- HC02_MOE_VC02: integer (nullable = true)
 |-- HC01_EST_VC04: double (nullable = true)
 |-- HC01_MOE_VC04: double (nullable = true)
 |-- HC02_EST_VC04: integer (nullable = true)
 |-- HC02_MOE_VC04: integer (nullable = true)
 |-- HC01_EST_VC05: double (nullable = true)
 |-- HC01_MOE_VC05: double (nullable = true)
 |-- HC02_EST_VC05: integer (nullable = true)
 |-- HC02_MOE_VC05: integer (nullable = true)
 |-- HC01_EST_VC06: double (nullable = true)
 |-- HC01_MOE_VC06: double (nullable = true)
 |-- HC02_EST_VC06: integer (nullable = true)
 |-- HC02_MOE_VC06: integer (nullable = true)
 |-- HC01_EST_VC07: double (nullable = true)
 |-- HC01_MOE_VC07: double (nullable = true)
 |-- HC02_EST_VC07: integer (nullable = true)
 |-- HC02_MOE_VC07: integer (nullable = true)
 |-- HC01_EST_VC08: double (nullable = true)
 |-- HC01_MOE_VC08: double (nullable = true)
 |-- HC02_EST_VC08: integer (nullable = true)
 |-- HC02_MOE_VC08: integer (nullable = true)
 |-- HC01_EST_VC09: double (nullable = true)
 |-- HC01_MOE_VC09: double (nullable = true)
 |-- HC02_EST_VC09: integer (nullable = true)
 |-- HC02_MOE_VC09: integer (nullable = true)
 |-- HC01_EST_VC10: double (nullable = true)
 |-- HC01_MOE_VC10: double (nullable = true)
 |-- HC02_EST_VC10: integer (nullable = true)
 |-- HC02_MOE_VC10: integer (nullable = true)
 |-- HC01_EST_VC12: double (nullable = true)
 |-- HC01_MOE_VC12: double (nullable = true)
 |-- HC02_EST_VC12: integer (nullable = true)
 |-- HC02_MOE_VC12: integer (nullable = true)
 |-- HC01_EST_VC13: double (nullable = true)
 |-- HC01_MOE_VC13: double (nullable = true)
 |-- HC02_EST_VC13: integer (nullable = true)
 |-- HC02_MOE_VC13: integer (nullable = true)
 |-- HC01_EST_VC16: double (nullable = true)
 |-- HC01_MOE_VC16: double (nullable = true)
 |-- HC02_EST_VC16: integer (nullable = true)
 |-- HC02_MOE_VC16: integer (nullable = true)
 |-- HC01_EST_VC17: double (nullable = true)
 |-- HC01_MOE_VC17: double (nullable = true)
 |-- HC02_EST_VC17: integer (nullable = true)
 |-- HC02_MOE_VC17: integer (nullable = true)
 |-- HC01_EST_VC18: double (nullable = true)
 |-- HC01_MOE_VC18: double (nullable = true)
 |-- HC02_EST_VC18: integer (nullable = true)
 |-- HC02_MOE_VC18: integer (nullable = true)
 |-- HC01_EST_VC19: double (nullable = true)
 |-- HC01_MOE_VC19: double (nullable = true)
 |-- HC02_EST_VC19: integer (nullable = true)
 |-- HC02_MOE_VC19: integer (nullable = true)
 |-- HC01_EST_VC22: integer (nullable = true)
 |-- HC01_MOE_VC22: integer (nullable = true)
 |-- HC02_EST_VC22: integer (nullable = true)
 |-- HC02_MOE_VC22: integer (nullable = true)
 |-- HC01_EST_VC23: double (nullable = true)
 |-- HC01_MOE_VC23: double (nullable = true)
 |-- HC02_EST_VC23: integer (nullable = true)
 |-- HC02_MOE_VC23: integer (nullable = true)
 |-- HC01_EST_VC24: double (nullable = true)
 |-- HC01_MOE_VC24: double (nullable = true)
 |-- HC02_EST_VC24: integer (nullable = true)
 |-- HC02_MOE_VC24: integer (nullable = true)
 |-- HC01_EST_VC25: double (nullable = true)
 |-- HC01_MOE_VC25: double (nullable = true)
 |-- HC02_EST_VC25: integer (nullable = true)
 |-- HC02_MOE_VC25: integer (nullable = true)
 |-- HC01_EST_VC26: double (nullable = true)
 |-- HC01_MOE_VC26: double (nullable = true)
 |-- HC02_EST_VC26: integer (nullable = true)
 |-- HC02_MOE_VC26: integer (nullable = true)
 |-- HC01_EST_VC27: double (nullable = true)
 |-- HC01_MOE_VC27: double (nullable = true)
 |-- HC02_EST_VC27: integer (nullable = true)
 |-- HC02_MOE_VC27: integer (nullable = true)
 |-- HC01_EST_VC30: integer (nullable = true)
 |-- HC01_MOE_VC30: integer (nullable = true)
 |-- HC02_EST_VC30: integer (nullable = true)
 |-- HC02_MOE_VC30: integer (nullable = true)
 |-- HC01_EST_VC31: double (nullable = true)
 |-- HC01_MOE_VC31: double (nullable = true)
 |-- HC02_EST_VC31: integer (nullable = true)
 |-- HC02_MOE_VC31: integer (nullable = true)
 |-- HC01_EST_VC32: double (nullable = true)
 |-- HC01_MOE_VC32: double (nullable = true)
 |-- HC02_EST_VC32: integer (nullable = true)
 |-- HC02_MOE_VC32: integer (nullable = true)
 |-- HC01_EST_VC33: double (nullable = true)
 |-- HC01_MOE_VC33: double (nullable = true)
 |-- HC02_EST_VC33: integer (nullable = true)
 |-- HC02_MOE_VC33: integer (nullable = true)
 |-- HC01_EST_VC34: double (nullable = true)
 |-- HC01_MOE_VC34: double (nullable = true)
 |-- HC02_EST_VC34: integer (nullable = true)
 |-- HC02_MOE_VC34: integer (nullable = true)
 |-- HC01_EST_VC35: double (nullable = true)
 |-- HC01_MOE_VC35: double (nullable = true)
 |-- HC02_EST_VC35: integer (nullable = true)
 |-- HC02_MOE_VC35: integer (nullable = true)
 |-- HC01_EST_VC36: double (nullable = true)
 |-- HC01_MOE_VC36: double (nullable = true)
 |-- HC02_EST_VC36: integer (nullable = true)
 |-- HC02_MOE_VC36: integer (nullable = true)
 |-- HC01_EST_VC39: double (nullable = true)
 |-- HC01_MOE_VC39: string (nullable = true)
 |-- HC02_EST_VC39: string (nullable = true)
 |-- HC02_MOE_VC39: string (nullable = true)
 |-- HC01_EST_VC40: double (nullable = true)
 |-- HC01_MOE_VC40: string (nullable = true)
 |-- HC02_EST_VC40: string (nullable = true)
 |-- HC02_MOE_VC40: string (nullable = true)
 |-- HC01_EST_VC41: double (nullable = true)
 |-- HC01_MOE_VC41: string (nullable = true)
 |-- HC02_EST_VC41: string (nullable = true)
 |-- HC02_MOE_VC41: string (nullable = true)

root
 |-- z_id: long (nullable = true)
 |-- z_households: long (nullable = true)
 |-- z_med_inc: long (nullable = true)


In [5]:
dfFinancialsRaw = spark.read.load(PATH_RAWDATA+FINANCIAL_FN, format="csv", delimiter=",", header=True, inferSchema=True)
dfFinancialsRaw.count()
dfFinancialsRaw.printSchema()

dfFinancials = dfFinancialsRaw.select('Geoid2','HC03_VC161','HC03_VC28')
dfFinancials = dfFinancials.withColumnRenamed('Geoid2','z_id')\
                            .withColumnRenamed('HC03_VC161','z_pov')\
                            .withColumnRenamed('HC03_VC28','z_per_comm')


root
 |-- GEOid: string (nullable = true)
 |-- GEOid2: integer (nullable = true)
 |-- GEOdisplay-label: string (nullable = true)
 |-- HC01_VC03: integer (nullable = true)
 |-- HC02_VC03: integer (nullable = true)
 |-- HC03_VC03: integer (nullable = true)
 |-- HC04_VC03: string (nullable = true)
 |-- HC01_VC04: integer (nullable = true)
 |-- HC02_VC04: integer (nullable = true)
 |-- HC03_VC04: double (nullable = true)
 |-- HC04_VC04: double (nullable = true)
 |-- HC01_VC05: integer (nullable = true)
 |-- HC02_VC05: integer (nullable = true)
 |-- HC03_VC05: double (nullable = true)
 |-- HC04_VC05: double (nullable = true)
 |-- HC01_VC06: integer (nullable = true)
 |-- HC02_VC06: integer (nullable = true)
 |-- HC03_VC06: double (nullable = true)
 |-- HC04_VC06: double (nullable = true)
 |-- HC01_VC07: integer (nullable = true)
 |-- HC02_VC07: integer (nullable = true)
 |-- HC03_VC07: double (nullable = true)
 |-- HC04_VC07: double (nullable = true)
 |-- HC01_VC08: integer (nullable = true)
 |-- HC02_VC08: integer (nullable = true)
 |-- HC03_VC08: double (nullable = true)
 |-- HC04_VC08: double (nullable = true)
 |-- HC01_VC09: integer (nullable = true)
 |-- HC02_VC09: integer (nullable = true)
 |-- HC03_VC09: double (nullable = true)
 |-- HC04_VC09: double (nullable = true)
 |-- HC01_VC11: integer (nullable = true)
 |-- HC02_VC11: integer (nullable = true)
 |-- HC03_VC11: integer (nullable = true)
 |-- HC04_VC11: string (nullable = true)
 |-- HC01_VC12: string (nullable = true)
 |-- HC02_VC12: string (nullable = true)
 |-- HC03_VC12: double (nullable = true)
 |-- HC04_VC12: double (nullable = true)
 |-- HC01_VC14: integer (nullable = true)
 |-- HC02_VC14: integer (nullable = true)
 |-- HC03_VC14: integer (nullable = true)
 |-- HC04_VC14: string (nullable = true)
 |-- HC01_VC15: integer (nullable = true)
 |-- HC02_VC15: integer (nullable = true)
 |-- HC03_VC15: double (nullable = true)
 |-- HC04_VC15: double (nullable = true)
 |-- HC01_VC16: integer (nullable = true)
 |-- HC02_VC16: integer (nullable = true)
 |-- HC03_VC16: double (nullable = true)
 |-- HC04_VC16: double (nullable = true)
 |-- HC01_VC17: integer (nullable = true)
 |-- HC02_VC17: integer (nullable = true)
 |-- HC03_VC17: double (nullable = true)
 |-- HC04_VC17: double (nullable = true)
 |-- HC01_VC19: integer (nullable = true)
 |-- HC02_VC19: integer (nullable = true)
 |-- HC03_VC19: integer (nullable = true)
 |-- HC04_VC19: string (nullable = true)
 |-- HC01_VC20: integer (nullable = true)
 |-- HC02_VC20: integer (nullable = true)
 |-- HC03_VC20: double (nullable = true)
 |-- HC04_VC20: double (nullable = true)
 |-- HC01_VC22: integer (nullable = true)
 |-- HC02_VC22: integer (nullable = true)
 |-- HC03_VC22: integer (nullable = true)
 |-- HC04_VC22: string (nullable = true)
 |-- HC01_VC23: integer (nullable = true)
 |-- HC02_VC23: integer (nullable = true)
 |-- HC03_VC23: double (nullable = true)
 |-- HC04_VC23: double (nullable = true)
 |-- HC01_VC27: integer (nullable = true)
 |-- HC02_VC27: integer (nullable = true)
 |-- HC03_VC27: integer (nullable = true)
 |-- HC04_VC27: string (nullable = true)
 |-- HC01_VC28: integer (nullable = true)
 |-- HC02_VC28: integer (nullable = true)
 |-- HC03_VC28: double (nullable = true)
 |-- HC04_VC28: double (nullable = true)
 |-- HC01_VC29: integer (nullable = true)
 |-- HC02_VC29: integer (nullable = true)
 |-- HC03_VC29: double (nullable = true)
 |-- HC04_VC29: double (nullable = true)
 |-- HC01_VC30: integer (nullable = true)
 |-- HC02_VC30: integer (nullable = true)
 |-- HC03_VC30: double (nullable = true)
 |-- HC04_VC30: double (nullable = true)
 |-- HC01_VC31: integer (nullable = true)
 |-- HC02_VC31: integer (nullable = true)
 |-- HC03_VC31: double (nullable = true)
 |-- HC04_VC31: double (nullable = true)
 |-- HC01_VC32: integer (nullable = true)
 |-- HC02_VC32: integer (nullable = true)
 |-- HC03_VC32: double (nullable = true)
 |-- HC04_VC32: double (nullable = true)
 |-- HC01_VC33: integer (nullable = true)
 |-- HC02_VC33: integer (nullable = true)
 |-- HC03_VC33: double (nullable = true)
 |-- HC04_VC33: double (nullable = true)
 |-- HC01_VC36: double (nullable = true)
 |-- HC02_VC36: double (nullable = true)
 |-- HC03_VC36: string (nullable = true)
 |-- HC04_VC36: string (nullable = true)
 |-- HC01_VC40: integer (nullable = true)
 |-- HC02_VC40: integer (nullable = true)
 |-- HC03_VC40: integer (nullable = true)
 |-- HC04_VC40: string (nullable = true)
 |-- HC01_VC41: integer (nullable = true)
 |-- HC02_VC41: integer (nullable = true)
 |-- HC03_VC41: double (nullable = true)
 |-- HC04_VC41: double (nullable = true)
 |-- HC01_VC42: integer (nullable = true)
 |-- HC02_VC42: integer (nullable = true)
 |-- HC03_VC42: double (nullable = true)
 |-- HC04_VC42: double (nullable = true)
 |-- HC01_VC43: integer (nullable = true)
 |-- HC02_VC43: integer (nullable = true)
 |-- HC03_VC43: double (nullable = true)
 |-- HC04_VC43: double (nullable = true)
 |-- HC01_VC44: integer (nullable = true)
 |-- HC02_VC44: integer (nullable = true)
 |-- HC03_VC44: double (nullable = true)
 |-- HC04_VC44: double (nullable = true)
 |-- HC01_VC45: integer (nullable = true)
 |-- HC02_VC45: integer (nullable = true)
 |-- HC03_VC45: double (nullable = true)
 |-- HC04_VC45: double (nullable = true)
 |-- HC01_VC49: integer (nullable = true)
 |-- HC02_VC49: integer (nullable = true)
 |-- HC03_VC49: integer (nullable = true)
 |-- HC04_VC49: string (nullable = true)
 |-- HC01_VC50: integer (nullable = true)
 |-- HC02_VC50: integer (nullable = true)
 |-- HC03_VC50: double (nullable = true)
 |-- HC04_VC50: double (nullable = true)
 |-- HC01_VC51: integer (nullable = true)
 |-- HC02_VC51: integer (nullable = true)
 |-- HC03_VC51: double (nullable = true)
 |-- HC04_VC51: double (nullable = true)
 |-- HC01_VC52: integer (nullable = true)
 |-- HC02_VC52: integer (nullable = true)
 |-- HC03_VC52: double (nullable = true)
 |-- HC04_VC52: double (nullable = true)
 |-- HC01_VC53: integer (nullable = true)
 |-- HC02_VC53: integer (nullable = true)
 |-- HC03_VC53: double (nullable = true)
 |-- HC04_VC53: double (nullable = true)
 |-- HC01_VC54: integer (nullable = true)
 |-- HC02_VC54: integer (nullable = true)
 |-- HC03_VC54: double (nullable = true)
 |-- HC04_VC54: double (nullable = true)
 |-- HC01_VC55: integer (nullable = true)
 |-- HC02_VC55: integer (nullable = true)
 |-- HC03_VC55: double (nullable = true)
 |-- HC04_VC55: double (nullable = true)
 |-- HC01_VC56: integer (nullable = true)
 |-- HC02_VC56: integer (nullable = true)
 |-- HC03_VC56: double (nullable = true)
 |-- HC04_VC56: double (nullable = true)
 |-- HC01_VC57: integer (nullable = true)
 |-- HC02_VC57: integer (nullable = true)
 |-- HC03_VC57: double (nullable = true)
 |-- HC04_VC57: double (nullable = true)
 |-- HC01_VC58: integer (nullable = true)
 |-- HC02_VC58: integer (nullable = true)
 |-- HC03_VC58: double (nullable = true)
 |-- HC04_VC58: double (nullable = true)
 |-- HC01_VC59: integer (nullable = true)
 |-- HC02_VC59: integer (nullable = true)
 |-- HC03_VC59: double (nullable = true)
 |-- HC04_VC59: double (nullable = true)
 |-- HC01_VC60: integer (nullable = true)
 |-- HC02_VC60: integer (nullable = true)
 |-- HC03_VC60: double (nullable = true)
 |-- HC04_VC60: double (nullable = true)
 |-- HC01_VC61: integer (nullable = true)
 |-- HC02_VC61: integer (nullable = true)
 |-- HC03_VC61: double (nullable = true)
 |-- HC04_VC61: double (nullable = true)
 |-- HC01_VC62: integer (nullable = true)
 |-- HC02_VC62: integer (nullable = true)
 |-- HC03_VC62: double (nullable = true)
 |-- HC04_VC62: double (nullable = true)
 |-- HC01_VC66: integer (nullable = true)
 |-- HC02_VC66: integer (nullable = true)
 |-- HC03_VC66: integer (nullable = true)
 |-- HC04_VC66: string (nullable = true)
 |-- HC01_VC67: integer (nullable = true)
 |-- HC02_VC67: integer (nullable = true)
 |-- HC03_VC67: double (nullable = true)
 |-- HC04_VC67: double (nullable = true)
 |-- HC01_VC68: integer (nullable = true)
 |-- HC02_VC68: integer (nullable = true)
 |-- HC03_VC68: double (nullable = true)
 |-- HC04_VC68: double (nullable = true)
 |-- HC01_VC69: integer (nullable = true)
 |-- HC02_VC69: integer (nullable = true)
 |-- HC03_VC69: double (nullable = true)
 |-- HC04_VC69: double (nullable = true)
 |-- HC01_VC70: integer (nullable = true)
 |-- HC02_VC70: integer (nullable = true)
 |-- HC03_VC70: double (nullable = true)
 |-- HC04_VC70: double (nullable = true)
 |-- HC01_VC74: integer (nullable = true)
 |-- HC02_VC74: integer (nullable = true)
 |-- HC03_VC74: integer (nullable = true)
 |-- HC04_VC74: string (nullable = true)
 |-- HC01_VC75: integer (nullable = true)
 |-- HC02_VC75: integer (nullable = true)
 |-- HC03_VC75: double (nullable = true)
 |-- HC04_VC75: double (nullable = true)
 |-- HC01_VC76: integer (nullable = true)
 |-- HC02_VC76: integer (nullable = true)
 |-- HC03_VC76: double (nullable = true)
 |-- HC04_VC76: double (nullable = true)
 |-- HC01_VC77: integer (nullable = true)
 |-- HC02_VC77: integer (nullable = true)
 |-- HC03_VC77: double (nullable = true)
 |-- HC04_VC77: double (nullable = true)
 |-- HC01_VC78: integer (nullable = true)
 |-- HC02_VC78: integer (nullable = true)
 |-- HC03_VC78: double (nullable = true)
 |-- HC04_VC78: double (nullable = true)
 |-- HC01_VC79: integer (nullable = true)
 |-- HC02_VC79: integer (nullable = true)
 |-- HC03_VC79: double (nullable = true)
 |-- HC04_VC79: double (nullable = true)
 |-- HC01_VC80: integer (nullable = true)
 |-- HC02_VC80: integer (nullable = true)
 |-- HC03_VC80: double (nullable = true)
 |-- HC04_VC80: double (nullable = true)
 |-- HC01_VC81: integer (nullable = true)
 |-- HC02_VC81: integer (nullable = true)
 |-- HC03_VC81: double (nullable = true)
 |-- HC04_VC81: double (nullable = true)
 |-- HC01_VC82: integer (nullable = true)
 |-- HC02_VC82: integer (nullable = true)
 |-- HC03_VC82: double (nullable = true)
 |-- HC04_VC82: double (nullable = true)
 |-- HC01_VC83: integer (nullable = true)
 |-- HC02_VC83: integer (nullable = true)
 |-- HC03_VC83: double (nullable = true)
 |-- HC04_VC83: double (nullable = true)
 |-- HC01_VC84: integer (nullable = true)
 |-- HC02_VC84: integer (nullable = true)
 |-- HC03_VC84: double (nullable = true)
 |-- HC04_VC84: double (nullable = true)
 |-- HC01_VC85: integer (nullable = true)
 |-- HC02_VC85: integer (nullable = true)
 |-- HC03_VC85: string (nullable = true)
 |-- HC04_VC85: string (nullable = true)
 |-- HC01_VC86: integer (nullable = true)
 |-- HC02_VC86: integer (nullable = true)
 |-- HC03_VC86: string (nullable = true)
 |-- HC04_VC86: string (nullable = true)
 |-- HC01_VC89: integer (nullable = true)
 |-- HC02_VC89: integer (nullable = true)
 |-- HC03_VC89: double (nullable = true)
 |-- HC04_VC89: double (nullable = true)
 |-- HC01_VC90: integer (nullable = true)
 |-- HC02_VC90: integer (nullable = true)
 |-- HC03_VC90: string (nullable = true)
 |-- HC04_VC90: string (nullable = true)
 |-- HC01_VC91: integer (nullable = true)
 |-- HC02_VC91: integer (nullable = true)
 |-- HC03_VC91: double (nullable = true)
 |-- HC04_VC91: double (nullable = true)
 |-- HC01_VC92: integer (nullable = true)
 |-- HC02_VC92: integer (nullable = true)
 |-- HC03_VC92: string (nullable = true)
 |-- HC04_VC92: string (nullable = true)
 |-- HC01_VC93: integer (nullable = true)
 |-- HC02_VC93: integer (nullable = true)
 |-- HC03_VC93: double (nullable = true)
 |-- HC04_VC93: double (nullable = true)
 |-- HC01_VC94: integer (nullable = true)
 |-- HC02_VC94: integer (nullable = true)
 |-- HC03_VC94: string (nullable = true)
 |-- HC04_VC94: string (nullable = true)
 |-- HC01_VC97: integer (nullable = true)
 |-- HC02_VC97: integer (nullable = true)
 |-- HC03_VC97: double (nullable = true)
 |-- HC04_VC97: double (nullable = true)
 |-- HC01_VC98: integer (nullable = true)
 |-- HC02_VC98: integer (nullable = true)
 |-- HC03_VC98: string (nullable = true)
 |-- HC04_VC98: string (nullable = true)
 |-- HC01_VC99: integer (nullable = true)
 |-- HC02_VC99: integer (nullable = true)
 |-- HC03_VC99: double (nullable = true)
 |-- HC04_VC99: double (nullable = true)
 |-- HC01_VC100: integer (nullable = true)
 |-- HC02_VC100: integer (nullable = true)
 |-- HC03_VC100: string (nullable = true)
 |-- HC04_VC100: string (nullable = true)
 |-- HC01_VC101: integer (nullable = true)
 |-- HC02_VC101: integer (nullable = true)
 |-- HC03_VC101: double (nullable = true)
 |-- HC04_VC101: double (nullable = true)
 |-- HC01_VC103: integer (nullable = true)
 |-- HC02_VC103: integer (nullable = true)
 |-- HC03_VC103: integer (nullable = true)
 |-- HC04_VC103: string (nullable = true)
 |-- HC01_VC104: integer (nullable = true)
 |-- HC02_VC104: integer (nullable = true)
 |-- HC03_VC104: double (nullable = true)
 |-- HC04_VC104: double (nullable = true)
 |-- HC01_VC105: integer (nullable = true)
 |-- HC02_VC105: integer (nullable = true)
 |-- HC03_VC105: double (nullable = true)
 |-- HC04_VC105: double (nullable = true)
 |-- HC01_VC106: integer (nullable = true)
 |-- HC02_VC106: integer (nullable = true)
 |-- HC03_VC106: double (nullable = true)
 |-- HC04_VC106: double (nullable = true)
 |-- HC01_VC107: integer (nullable = true)
 |-- HC02_VC107: integer (nullable = true)
 |-- HC03_VC107: double (nullable = true)
 |-- HC04_VC107: double (nullable = true)
 |-- HC01_VC108: integer (nullable = true)
 |-- HC02_VC108: integer (nullable = true)
 |-- HC03_VC108: double (nullable = true)
 |-- HC04_VC108: double (nullable = true)
 |-- HC01_VC109: integer (nullable = true)
 |-- HC02_VC109: integer (nullable = true)
 |-- HC03_VC109: double (nullable = true)
 |-- HC04_VC109: double (nullable = true)
 |-- HC01_VC110: integer (nullable = true)
 |-- HC02_VC110: integer (nullable = true)
 |-- HC03_VC110: double (nullable = true)
 |-- HC04_VC110: double (nullable = true)
 |-- HC01_VC111: integer (nullable = true)
 |-- HC02_VC111: integer (nullable = true)
 |-- HC03_VC111: double (nullable = true)
 |-- HC04_VC111: double (nullable = true)
 |-- HC01_VC112: integer (nullable = true)
 |-- HC02_VC112: integer (nullable = true)
 |-- HC03_VC112: double (nullable = true)
 |-- HC04_VC112: double (nullable = true)
 |-- HC01_VC113: integer (nullable = true)
 |-- HC02_VC113: integer (nullable = true)
 |-- HC03_VC113: double (nullable = true)
 |-- HC04_VC113: double (nullable = true)
 |-- HC01_VC114: integer (nullable = true)
 |-- HC02_VC114: integer (nullable = true)
 |-- HC03_VC114: string (nullable = true)
 |-- HC04_VC114: string (nullable = true)
 |-- HC01_VC115: integer (nullable = true)
 |-- HC02_VC115: integer (nullable = true)
 |-- HC03_VC115: string (nullable = true)
 |-- HC04_VC115: string (nullable = true)
 |-- HC01_VC118: integer (nullable = true)
 |-- HC02_VC118: integer (nullable = true)
 |-- HC03_VC118: string (nullable = true)
 |-- HC04_VC118: string (nullable = true)
 |-- HC01_VC120: integer (nullable = true)
 |-- HC02_VC120: integer (nullable = true)
 |-- HC03_VC120: integer (nullable = true)
 |-- HC04_VC120: string (nullable = true)
 |-- HC01_VC121: integer (nullable = true)
 |-- HC02_VC121: integer (nullable = true)
 |-- HC03_VC121: string (nullable = true)
 |-- HC04_VC121: string (nullable = true)
 |-- HC01_VC122: integer (nullable = true)
 |-- HC02_VC122: integer (nullable = true)
 |-- HC03_VC122: string (nullable = true)
 |-- HC04_VC122: string (nullable = true)
 |-- HC01_VC124: integer (nullable = true)
 |-- HC02_VC124: integer (nullable = true)
 |-- HC03_VC124: string (nullable = true)
 |-- HC04_VC124: string (nullable = true)
 |-- HC01_VC125: integer (nullable = true)
 |-- HC02_VC125: integer (nullable = true)
 |-- HC03_VC125: string (nullable = true)
 |-- HC04_VC125: string (nullable = true)
 |-- HC01_VC126: integer (nullable = true)
 |-- HC02_VC126: integer (nullable = true)
 |-- HC03_VC126: string (nullable = true)
 |-- HC04_VC126: string (nullable = true)
 |-- HC01_VC130: integer (nullable = true)
 |-- HC02_VC130: integer (nullable = true)
 |-- HC03_VC130: integer (nullable = true)
 |-- HC04_VC130: string (nullable = true)
 |-- HC01_VC131: integer (nullable = true)
 |-- HC02_VC131: integer (nullable = true)
 |-- HC03_VC131: double (nullable = true)
 |-- HC04_VC131: double (nullable = true)
 |-- HC01_VC132: integer (nullable = true)
 |-- HC02_VC132: integer (nullable = true)
 |-- HC03_VC132: double (nullable = true)
 |-- HC04_VC132: double (nullable = true)
 |-- HC01_VC133: integer (nullable = true)
 |-- HC02_VC133: integer (nullable = true)
 |-- HC03_VC133: double (nullable = true)
 |-- HC04_VC133: double (nullable = true)
 |-- HC01_VC134: integer (nullable = true)
 |-- HC02_VC134: integer (nullable = true)
 |-- HC03_VC134: double (nullable = true)
 |-- HC04_VC134: double (nullable = true)
 |-- HC01_VC137: integer (nullable = true)
 |-- HC02_VC137: integer (nullable = true)
 |-- HC03_VC137: integer (nullable = true)
 |-- HC04_VC137: string (nullable = true)
 |-- HC01_VC138: integer (nullable = true)
 |-- HC02_VC138: integer (nullable = true)
 |-- HC03_VC138: double (nullable = true)
 |-- HC04_VC138: double (nullable = true)
 |-- HC01_VC141: integer (nullable = true)
 |-- HC02_VC141: integer (nullable = true)
 |-- HC03_VC141: integer (nullable = true)
 |-- HC04_VC141: string (nullable = true)
 |-- HC01_VC142: integer (nullable = true)
 |-- HC02_VC142: integer (nullable = true)
 |-- HC03_VC142: integer (nullable = true)
 |-- HC04_VC142: string (nullable = true)
 |-- HC01_VC143: integer (nullable = true)
 |-- HC02_VC143: integer (nullable = true)
 |-- HC03_VC143: integer (nullable = true)
 |-- HC04_VC143: string (nullable = true)
 |-- HC01_VC144: integer (nullable = true)
 |-- HC02_VC144: integer (nullable = true)
 |-- HC03_VC144: double (nullable = true)
 |-- HC04_VC144: double (nullable = true)
 |-- HC01_VC145: integer (nullable = true)
 |-- HC02_VC145: integer (nullable = true)
 |-- HC03_VC145: double (nullable = true)
 |-- HC04_VC145: double (nullable = true)
 |-- HC01_VC146: integer (nullable = true)
 |-- HC02_VC146: integer (nullable = true)
 |-- HC03_VC146: double (nullable = true)
 |-- HC04_VC146: double (nullable = true)
 |-- HC01_VC147: integer (nullable = true)
 |-- HC02_VC147: integer (nullable = true)
 |-- HC03_VC147: double (nullable = true)
 |-- HC04_VC147: double (nullable = true)
 |-- HC01_VC148: integer (nullable = true)
 |-- HC02_VC148: integer (nullable = true)
 |-- HC03_VC148: integer (nullable = true)
 |-- HC04_VC148: string (nullable = true)
 |-- HC01_VC149: integer (nullable = true)
 |-- HC02_VC149: integer (nullable = true)
 |-- HC03_VC149: double (nullable = true)
 |-- HC04_VC149: double (nullable = true)
 |-- HC01_VC150: integer (nullable = true)
 |-- HC02_VC150: integer (nullable = true)
 |-- HC03_VC150: double (nullable = true)
 |-- HC04_VC150: double (nullable = true)
 |-- HC01_VC151: integer (nullable = true)
 |-- HC02_VC151: integer (nullable = true)
 |-- HC03_VC151: double (nullable = true)
 |-- HC04_VC151: double (nullable = true)
 |-- HC01_VC152: integer (nullable = true)
 |-- HC02_VC152: integer (nullable = true)
 |-- HC03_VC152: double (nullable = true)
 |-- HC04_VC152: double (nullable = true)
 |-- HC01_VC153: integer (nullable = true)
 |-- HC02_VC153: integer (nullable = true)
 |-- HC03_VC153: integer (nullable = true)
 |-- HC04_VC153: string (nullable = true)
 |-- HC01_VC154: integer (nullable = true)
 |-- HC02_VC154: integer (nullable = true)
 |-- HC03_VC154: double (nullable = true)
 |-- HC04_VC154: double (nullable = true)
 |-- HC01_VC155: integer (nullable = true)
 |-- HC02_VC155: integer (nullable = true)
 |-- HC03_VC155: double (nullable = true)
 |-- HC04_VC155: double (nullable = true)
 |-- HC01_VC156: integer (nullable = true)
 |-- HC02_VC156: integer (nullable = true)
 |-- HC03_VC156: double (nullable = true)
 |-- HC04_VC156: double (nullable = true)
 |-- HC01_VC157: integer (nullable = true)
 |-- HC02_VC157: integer (nullable = true)
 |-- HC03_VC157: double (nullable = true)
 |-- HC04_VC157: double (nullable = true)
 |-- HC01_VC161: string (nullable = true)
 |-- HC02_VC161: string (nullable = true)
 |-- HC03_VC161: double (nullable = true)
 |-- HC04_VC161: double (nullable = true)
 |-- HC01_VC162: string (nullable = true)
 |-- HC02_VC162: string (nullable = true)
 |-- HC03_VC162: double (nullable = true)
 |-- HC04_VC162: double (nullable = true)
 |-- HC01_VC163: string (nullable = true)
 |-- HC02_VC163: string (nullable = true)
 |-- HC03_VC163: double (nullable = true)
 |-- HC04_VC163: double (nullable = true)
 |-- HC01_VC164: string (nullable = true)
 |-- HC02_VC164: string (nullable = true)
 |-- HC03_VC164: double (nullable = true)
 |-- HC04_VC164: double (nullable = true)
 |-- HC01_VC165: string (nullable = true)
 |-- HC02_VC165: string (nullable = true)
 |-- HC03_VC165: double (nullable = true)
 |-- HC04_VC165: double (nullable = true)
 |-- HC01_VC166: string (nullable = true)
 |-- HC02_VC166: string (nullable = true)
 |-- HC03_VC166: double (nullable = true)
 |-- HC04_VC166: double (nullable = true)
 |-- HC01_VC167: string (nullable = true)
 |-- HC02_VC167: string (nullable = true)
 |-- HC03_VC167: double (nullable = true)
 |-- HC04_VC167: double (nullable = true)
 |-- HC01_VC168: string (nullable = true)
 |-- HC02_VC168: string (nullable = true)
 |-- HC03_VC168: double (nullable = true)
 |-- HC04_VC168: double (nullable = true)
 |-- HC01_VC169: string (nullable = true)
 |-- HC02_VC169: string (nullable = true)
 |-- HC03_VC169: double (nullable = true)
 |-- HC04_VC169: double (nullable = true)
 |-- HC01_VC171: string (nullable = true)
 |-- HC02_VC171: string (nullable = true)
 |-- HC03_VC171: double (nullable = true)
 |-- HC04_VC171: double (nullable = true)
 |-- HC01_VC172: string (nullable = true)
 |-- HC02_VC172: string (nullable = true)
 |-- HC03_VC172: double (nullable = true)
 |-- HC04_VC172: double (nullable = true)
 |-- HC01_VC173: string (nullable = true)
 |-- HC02_VC173: string (nullable = true)
 |-- HC03_VC173: double (nullable = true)
 |-- HC04_VC173: double (nullable = true)
 |-- HC01_VC174: string (nullable = true)
 |-- HC02_VC174: string (nullable = true)
 |-- HC03_VC174: double (nullable = true)
 |-- HC04_VC174: double (nullable = true)
 |-- HC01_VC175: string (nullable = true)
 |-- HC02_VC175: string (nullable = true)
 |-- HC03_VC175: double (nullable = true)
 |-- HC04_VC175: double (nullable = true)
 |-- HC01_VC176: string (nullable = true)
 |-- HC02_VC176: string (nullable = true)
 |-- HC03_VC176: double (nullable = true)
 |-- HC04_VC176: double (nullable = true)
 |-- HC01_VC177: string (nullable = true)
 |-- HC02_VC177: string (nullable = true)
 |-- HC03_VC177: double (nullable = true)
 |-- HC04_VC177: double (nullable = true)
 |-- HC01_VC178: string (nullable = true)
 |-- HC02_VC178: string (nullable = true)
 |-- HC03_VC178: double (nullable = true)
 |-- HC04_VC178: double (nullable = true)
 |-- HC01_VC179: string (nullable = true)
 |-- HC02_VC179: string (nullable = true)
 |-- HC03_VC179: double (nullable = true)
 |-- HC04_VC179: double (nullable = true)
 |-- HC01_VC180: string (nullable = true)
 |-- HC02_VC180: string (nullable = true)
 |-- HC03_VC180: double (nullable = true)
 |-- HC04_VC180: double (nullable = true)


In [6]:
MAX_DDIST = 65
COMM_PER = .25
BUS_DAYS = 250

dfFull=dfDist.join(dfIncomes,dfIncomes.z_id==dfDist.z_id).drop(dfIncomes.z_id).where(dfDist.ddist<MAX_DDIST)
dfCommuteTo = dfFull.groupBy(dfFull.z_id).agg({"m_pop":"sum"}).orderBy(dfFull.z_id)\
                .withColumnRenamed('sum(m_pop)','z_m_tot')
dfFull2 = dfFull.join(dfCommuteTo,dfFull.z_id==dfCommuteTo.z_id).drop(dfCommuteTo.z_id)
dfFull3 = dfFull2.withColumn('z_comm',(dfFull2.z_pop*COMM_PER).cast('long'))\
                .withColumn('z_m_comm',(dfFull2.z_pop*COMM_PER*dfFull2.m_pop/dfFull2.z_m_tot).cast('long'))
dfFull3 = dfFull3.withColumn('z_m_tot_miles_yr',dfFull3.ddist*2*BUS_DAYS*dfFull3.z_m_comm)

dfFull3.printSchema()


root
 |-- UATYPE: string (nullable = true)
 |-- ddist: double (nullable = true)
 |-- dist: double (nullable = true)
 |-- id: string (nullable = true)
 |-- m_house_unit: long (nullable = true)
 |-- m_id: long (nullable = true)
 |-- m_land: double (nullable = true)
 |-- m_lat_d: double (nullable = true)
 |-- m_lat_r: double (nullable = true)
 |-- m_long_d: double (nullable = true)
 |-- m_long_r: double (nullable = true)
 |-- m_pop: long (nullable = true)
 |-- m_water: double (nullable = true)
 |-- name: string (nullable = true)
 |-- z_house_unit: long (nullable = true)
 |-- z_land: double (nullable = true)
 |-- z_lat_d: double (nullable = true)
 |-- z_lat_r: double (nullable = true)
 |-- z_long_d: double (nullable = true)
 |-- z_long_r: double (nullable = true)
 |-- z_pop: long (nullable = true)
 |-- z_water: double (nullable = true)
 |-- z_households: long (nullable = true)
 |-- z_med_inc: long (nullable = true)
 |-- z_id: long (nullable = true)
 |-- z_m_tot: long (nullable = true)
 |-- z_comm: long (nullable = true)
 |-- z_m_comm: long (nullable = true)
 |-- z_m_tot_miles_yr: double (nullable = true)


In [7]:
FUEL_ECON = 21.4 #MPG
CARB_PER_GAL = 0.008887
MILES2CARB = 1/FUEL_ECON*CARB_PER_GAL
PERSONSPERHH = 4

# Normalize Data
dfZData = dfFull3.select('z_id','z_lat_d','z_long_d','z_land','z_water','z_pop','z_households','z_comm','z_med_inc','z_house_unit')\
                    .distinct()
dfTemp = dfFull3.groupBy('z_id').agg({'z_m_tot_miles_yr':'sum'})
dfZData = dfZData.join(dfTemp,dfZData.z_id==dfTemp.z_id).drop(dfZData.z_id).withColumnRenamed('sum(z_m_tot_miles_yr)','z_comm_miles')

dfZData = dfZData.withColumn('z_comm_miles_ph',dfZData.z_comm_miles/dfZData.z_pop*PERSONSPERHH)
dfZData = dfZData.withColumn('z_carb_ton_ph',dfZData.z_comm_miles_ph*MILES2CARB)
dfZData = dfZData.join(dfFinancials,dfZData.z_id==dfFinancials.z_id).drop(dfZData.z_id)

dfZData = dfZData.select('z_id', 'z_lat_d', 'z_long_d', 'z_land', 'z_water', \
                         'z_pop', 'z_households', 'z_comm', 'z_med_inc', 'z_house_unit', \
                         'z_comm_miles', 'z_comm_miles_ph', 'z_carb_ton_ph', 'z_pov', 'z_per_comm')

In [8]:
dfZData.coalesce(1).write.csv(PATH_PROCESSEDDATA + ZCARBON_FN,header=True, mode='overwrite')