In [1]:
from data.geographic.arcgis import getStockData
from data.geographic.shapefileGeo import testSQLAlchemyORM, shapefileTest, generateWorldToDB
from data.geographic.geo import AddInformationUsingDataFrame, GisMapUpdate
from data.geographic.geo2 import makeMapLayer
import pandas as pd
import geopandas as gp
from shapely.geometry import Point
In [2]:
df_main = getStockData()
shapefile
---------------
- test to print shapefile content
- divided to two files dbf and shp
- uses dictionaries as resultsets to contain data related to location and the location as polycon
Using datasets geopandas for country and city statistics OR Using the gadm28 dataset
- http://stackoverflow.com/questions/31997859/bulk-insert-a-pandas-dataframe-using-sqlalchemy
crs (coordinate system )
http://stackoverflow.com/questions/3845006/database-of-countries-and-their-cities
In [2]:
'''
Load location data using pre-existing routines
'''
naturalEarthToCSV = True
if naturalEarthToCSV:
# to load into DB reverse getAsPandasDataFrame flag
gp_world, gp_cities = generateWorldToDB(loadCSV = True, getAsPandasDataFrame = False)
print ('Countries: ', gp_world)
print ('Cities: ', gp_cities)
In [9]:
'''
all available fields: ['OBJECTID', 'UID', 'ID_0', 'ISO', 'NAME_0',
'ID_1', 'NAME_1', 'VARNAME_1', 'NL_NAME_1', 'HASC_1', 'CCN_1', 'CCA_1', 'TYPE_1', 'ENGTYPE_1', 'VALIDFR_1', 'VALIDTO_1', 'REMARKS_1',
'ID_2', 'NAME_2', 'VARNAME_2', 'NL_NAME_2', 'HASC_2', 'CCN_2', 'CCA_2', 'TYPE_2', 'ENGTYPE_2', 'VALIDFR_2', 'VALIDTO_2', 'REMARKS_2',
'ID_3', 'NAME_3', 'VARNAME_3', 'NL_NAME_3', 'HASC_3', 'CCN_3', 'CCA_3', 'TYPE_3', 'ENGTYPE_3', 'VALIDFR_3', 'VALIDTO_3', 'REMARKS_3',
'ID_4', 'NAME_4', 'VARNAME_4', 'CCN_4', 'CCA_4', 'TYPE_4', 'ENGTYPE_4', 'VALIDFR_4', 'VALIDTO_4', 'REMARKS_4',
'ID_5', 'NAME_5', 'CCN_5', 'CCA_5', 'TYPE_5', 'ENGTYPE_5', 'REGION', 'VARREGION', 'Shape_Leng', 'Shape_Area']
'''
loadShapefileData = False # load shapefile content as dictionary from i instance to i_max - slow
esriShapefileToGeopandas = True # use geopandas to read shapefile to Dataframe - fast
if loadShapefileData:
shapefileTest(i = 0, i_max = 1)
if esriShapefileToGeopandas:
'''
Usefull fields
'OBJECTID', 'geometry', 'UID', 'ID_0', 'ISO', 'NAME_0',
'REGION', 'VARREGION', 'Shape_Leng', 'Shape_Area'
'ID_1', 'NAME_1',
'ID_2', 'NAME_2',
'ID_3', 'NAME_3',
'ID_4', 'NAME_4',
'ID_5', 'NAME_5',
'''
shp = gp.GeoDataFrame.from_file('./gadm28/gadm28.shp')
shp_1 = shp[['OBJECTID', 'geometry']]
shp = shp[['OBJECTID', 'UID', 'ID_0', 'ISO', 'NAME_0', 'REGION',
'VARREGION', 'Shape_Leng', 'Shape_Area', 'ID_1', 'NAME_1','ID_2', 'NAME_2',
'ID_3', 'NAME_3', 'ID_4', 'NAME_4', 'ID_5', 'NAME_5']]
#save X,Y into csv file
#shp.to_csv("./data/allData.csv",header=True,index=False,sep="\t")
#shp_1.to_csv("./data/allData_geom.csv",header=True,index=False,sep="\t")
In [4]:
'''
Combine, transpose and store data stored into dataframe
cities: Country,City,AccentCity,Region,Population,Latitude,Longitude
- Country, City, Population,Latitude,Longitude - link to add iso3
countrycodes: euname,modified,linked_country,iso3,iso2,grc,isonum,country,imperitive
- country, iso3, iso2
- define datasets
- merge with country
- add geometry
- store to csv
'''
combineDataForCities = True
if combineDataForCities:
df_cities = pd.read_csv("./data/worldcitiespop.csv", sep = ',', encoding = "ISO-8859-1", header = 0,
names=['Country','City','AccentCity','Region','Population','Latitude','Longitude'])
df_cities = df_cities[['Country','City','Region','Population','Latitude','Longitude']]
df_cities.columns = ['iso2', 'City','Region','Population','Latitude','Longitude']
df_cities['iso2'] = df_cities['iso2'].str.upper()
df_cities = df_cities[df_cities['Population'] > 50000]
df_countryCodes = pd.read_csv("./data/countryISO2, 3.csv", sep = ',', header = 0,
names=['euname','modified','linked_country','iso3','iso2','grc','isonum','country','imperitive'])
df_countryCodes = df_countryCodes[['country', 'iso3', 'iso2']]
df_main = pd.merge(df_cities, df_countryCodes, on='iso2', how='inner')
geometry = [Point(xy) for xy in zip(df_main.Longitude, df_main.Latitude)]
crs = {'init': 'epsg:4326'}
df_geo = gp.GeoDataFrame(df_main, crs=crs, geometry=geometry)
In [3]:
'''
df_main contains the end-result used as the arcgis feature-layer.
It contains the share prise indexed with the company symbol with daily adjusted close column used as changeable values
in fixex table. The constraint for fixed table setup is required by arcgis.
The dataframe merges location data and stock data from separate sources.
'''
df_main
Out[3]:
In [ ]: