Reading and writing shapefiles

Recall the qgis lesson 1 tutorial

In that lesson, you used qgis to plot volcanoe data that was stored as shapefiles

In the next cell we use fiona to read that shapefile and print out the data on the first three volcanoes


In [1]:
import a301utils
from pathlib import Path
from fiona import collection
import pprint
#
# make dictionaries print pretty
#
pp=pprint.PrettyPrinter(indent=4)
#
# find the Alaska shapefile given the location of the
# a301utils package
# 
utils_dir=Path(a301utils.__path__[0])
print('start here: {}'.format(utils_dir))
#
# go up one level (parent)
#
root=utils_dir.parent
shapefile=root / Path('notebooks/qgis/Data/lesson_1/Alaska_Volcanoes.shp')
print('\nfound shapefile: {}\n'.format(shapefile))          
#
# now use fiona to open the Volcanoes shapefile and get its metadata
#
with collection(str(shapefile), "r") as input:
    print('\nthe projection in wkt (well known text) format: \n{}\n'.format(input.crs_wkt))
    print('\nmetadata -- projection in epsg format: {}\n'
          .format(input.meta['crs']))
    print('\ndata schema for Alaska_Volcanoes shapefile\n')
    for key,value in input.meta['schema'].items():
        print('\n--{}--\n'.format(key))
        pp.pprint(value)
    data_points=list(input)
    print('\n--first three volcanoes--\n')
    for item in data_points[:3]:
        pp.pprint(item)


start here: /Users/phil/repos/a301_2016/a301utils

found shapefile: /Users/phil/repos/a301_2016/notebooks/qgis/Data/lesson_1/Alaska_Volcanoes.shp


the projection in wkt (well known text) format: 
GEOGCS["GCS_WGS_1984",DATUM["WGS_1984",SPHEROID["WGS_84",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]]


metadata -- projection in epsg format: {'init': 'epsg:4326'}


data schema for Alaska_Volcanoes shapefile


--properties--

OrderedDict([   ('ObjectID', 'int:9'),
                ('NUMBER', 'str:8'),
                ('LOCATION', 'str:24'),
                ('NAME', 'str:27'),
                ('ELEVATION', 'int:12'),
                ('TYPE', 'str:19'),
                ('STATUS', 'str:23'),
                ('TIME_CODE', 'str:2')])

--geometry--

'Point'

--first three volcanoes--

{   'geometry': {   'coordinates': (175.9079745324891, 52.34843477238331),
                    'type': 'Point'},
    'id': '0',
    'properties': OrderedDict([   ('ObjectID', 0),
                                  ('NUMBER', '1101-01-'),
                                  ('LOCATION', 'Aleutian Is'),
                                  ('NAME', 'Buldir'),
                                  ('ELEVATION', 656),
                                  ('TYPE', 'Stratovolcano'),
                                  ('STATUS', 'Holocene?'),
                                  ('TIME_CODE', '?')]),
    'type': 'Feature'}
{   'geometry': {   'coordinates': (-170.4327087900658, 63.59910978286132),
                    'type': 'Point'},
    'id': '1',
    'properties': OrderedDict([   ('ObjectID', 1),
                                  ('NUMBER', '1104-05-'),
                                  ('LOCATION', 'Alaska-W'),
                                  ('NAME', 'Kookooligit Mountains'),
                                  ('ELEVATION', 673),
                                  ('TYPE', 'Shield volcano'),
                                  ('STATUS', 'Holocene'),
                                  ('TIME_CODE', 'U')]),
    'type': 'Feature'}
{   'geometry': {   'coordinates': (177.59915153406416, 52.101440772153275),
                    'type': 'Point'},
    'id': '2',
    'properties': OrderedDict([   ('ObjectID', 2),
                                  ('NUMBER', '1101-02-'),
                                  ('LOCATION', 'Aleutian Is'),
                                  ('NAME', 'Kiska'),
                                  ('ELEVATION', 1220),
                                  ('TYPE', 'Stratovolcano'),
                                  ('STATUS', 'Historical'),
                                  ('TIME_CODE', 'D1')]),
    'type': 'Feature'}

In [4]:
import glob
import os
from pathlib import Path
import sys
import json
import numpy as np
import h5py
from datetime import datetime, timezone
from a301utils.a301_readfile import download
import fiona
import rasterio
import pyproj


tiff_file = 'MYD021KM.A2006303.2220.006.reproject_1_4_3.tif'
download(tiff_file)
#
# this file has the trimmed groundtrack from the groundtrack notebook
#
cloudsat_file='MYD021KM.A2006303.2220.006.groundtrack.h5'
download(cloudsat_file)

with h5py.File(cloudsat_file,'r') as groundfile:
    reproject_name=groundfile.attrs['reproject_filename']
    cloudsat_lats=groundfile['trimmed_lats'][...]
    cloudsat_lons=groundfile['trimmed_lons'][...]
    cloudsat_times=groundfile['cloudsat_times'][...]
    #
    # we know we had original datetimes in the Greenwich (UTC) timezone
    #
    cloudsat_dates=[datetime.fromtimestamp(item,timezone.utc) for item in cloudsat_times]


MYD021KM.A2006303.2220.006.reproject_1_4_3.tif already exists
and is 51315354 bytes
will not overwrite


MYD021KM.A2006303.2220.006.groundtrack.h5 already exists
and is 66368 bytes
will not overwrite

Define a simple schema (the only info is location and point num)


In [7]:
schema = { 'geometry': 'Point','properties':{'num':'int' }}
#
# copy the projection from the tif file so we put the groundtrack
# in the same coordinates
driver='ESRI Shapefile'
raster=rasterio.open(tiff_file,'r')
crs = raster.crs.to_dict()
proj = pyproj.Proj(crs)
with fiona.open("ground_track", "w", driver=driver, 
                schema=schema,crs=crs,encoding='utf-8') as output:
    for index,lon_lat in enumerate(zip(cloudsat_lons,cloudsat_lats)):
        lon,lat=lon_lat
        x,y=proj(lon,lat)
        #print(x,y,lon,lat)
        geometry={'coordinates': (x, y), 'type': 'Point'}
        out_dict=dict(geometry=geometry,properties={'num':index})
        output.write(out_dict)

now open qgis and add the tiffile as a raster layer and the groundtrack as a vector layer


In [ ]: