Or if it was way more than 9, then could have sorted the file names first, then only the change of scene location, construct a new dictionary ...
In [1]:
from osgeo import ogr, osr, gdal
import fiona
from shapely.geometry import Point, shape
import numpy as np
import pandas as pd
import os
import sys
import tarfile
import timeit
In [2]:
# Change this for Win7,macOS
bases = "C:\Users\deepak\Desktop\Repo\Maps\Districts\Census\Dist.shp"
# base_ = "/Users/macbook/Documents/BTP/Satellite/Data/Maps/Districts/Census_2011"
fc = fiona.open(bases)
In [3]:
def reverse_geocode(pt):
for feature in fc:
if shape(feature['geometry']).contains(pt):
return feature['properties']['DISTRICT']
return "NRI"
In [4]:
base2 = "G:\BTP\Satellite\Data\Test" # Win7
base = "G:\BTP\Satellite\Data\Test2" # Win7
In [5]:
def extract(filename, force=False):
root = os.path.splitext(os.path.splitext(filename)[0])[0] # remove .tar.gz
if os.path.isdir(os.path.join(base,root)) and not force:
# You may override by setting force=True.
print('%s already present - Skipping extraction of %s' % (root, filename))
else:
print('Extracting data for %s' % root)
tar = tarfile.open(os.path.join(base,filename))
sys.stdout.flush()
tar.extractall(os.path.join(base,root))
tar.close()
In [6]:
# extracting for Test2
for directory, subdirList, fileList in os.walk(base):
for filename in fileList:
if filename.endswith(".tar.gz"):
d = extract(filename)
In [7]:
directories = [os.path.join(base, d) for d in sorted(os.listdir(base)) if os.path.isdir(os.path.join(base, d))]
# print directories
In [8]:
ds = gdal.Open(base2 + "\LE07_L1TP_146039_20101223_20161211_01_T1\LE07_L1TP_146039_20101223_20161211_01_T1_B1.TIF")
In [9]:
type(ds)
Out[9]:
Prepare one ds
variable here itself, for the transformation of the coordinate system below.
In [10]:
# create the new coordinate system
wgs84_wkt = """
GEOGCS["WGS 84",
DATUM["WGS_1984",
SPHEROID["WGS 84",6378137,298.257223563,
AUTHORITY["EPSG","7030"]],
AUTHORITY["EPSG","6326"]],
PRIMEM["Greenwich",0,
AUTHORITY["EPSG","8901"]],
UNIT["degree",0.01745329251994328,
AUTHORITY["EPSG","9122"]],
AUTHORITY["EPSG","4326"]]"""
new_cs = osr.SpatialReference()
new_cs.ImportFromWkt(wgs84_wkt)
Out[10]:
In [11]:
def func(dsx):
old_cs= osr.SpatialReference()
old_cs.ImportFromWkt(dsx.GetProjectionRef())
trs = osr.CoordinateTransformation(old_cs,new_cs)
return trs
In [12]:
def pixel2coord(x, y, xoff, a, b, yoff, d, e):
"""Returns global coordinates from coordinates x,y of the pixel"""
xp = a * x + b * y + xoff
yp = d * x + e * y + yoff
return(xp, yp)
In [ ]:
In [28]:
dicts = [[],[],[],[],[],[],[],[],[]]
dictr = [(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0)]
dicti = {"146039":0, "146040":1, "146041":2, "147039":3, "147040":4, "147041":5, "148039":6, "148040":7, "149039":8}
In [29]:
k = 50
In [30]:
stx = timeit.default_timer()
for directory in directories:
""" Identifying Month, Year, Spacecraft ID """
date = directory.split('\\')[-1].split('_')[3] # Change for Win7
satx = directory.split('\\')[-1][3]
month = date[4:6]
year = date[0:4]
scene = directory.split('\\')[-1].split('_')[2]
index = dicti[scene]
#if index != 1: continue
""" Visiting every GeoTIFF file """
for _,_,files in os.walk(directory):
for filename in files:
if filename.endswith(".TIF"):
if filename[-5] == '2': break
print os.path.join(directory,filename)
ds = gdal.Open(os.path.join(directory,filename))
if ds == None: continue
col, row, _ = ds.RasterXSize, ds.RasterYSize, ds.RasterCount
xoff, a, b, yoff, d, e = ds.GetGeoTransform()
#--------------------
transform = func(ds)
#--------------------
dictr[index] = (col,row)
""" Now go to each pixel, find its lat,lon. Hence its district, and the pixel value """
for i in range(0,col,col/k):
for j in range(0,row,row/k):
########### fetching the lat and lon coordinates
x,y = pixel2coord(i, j, xoff, a, b, yoff, d, e)
lonx, latx, z = transform.TransformPoint(x,y)
########### fetching the name of district
point = Point(lonx,latx)
district = reverse_geocode(point)
dicts[index].append(district)
# #----------------------------------------------------------
# if filename[-5] == '1':
# point = Point(lonx,latx)
# district = reverse_geocode(point)
# dicts[i][str(lonx)+str(latx)] = district
# else:
# print "-------------------- !!!!!!! -------------------"
# district = dictx[str(lonx)+str(latx)]
# #----------------------------------------------------------
elapsed = timeit.default_timer() - stx
print (elapsed)
print "Seconds"
In [31]:
print dictr
In [32]:
print latx
print lonx
print district
In [34]:
for i in range(9):
print len(dicts[i])
In [47]:
# print dicts[0]
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [80]:
base = "G:\BTP\Satellite\Data\Bulk\Landsat"
In [69]:
# base = "G:\BTP\Satellite\Data\Test"
In [81]:
directories = [os.path.join(base, d) for d in sorted(os.listdir(base)) if os.path.isdir(os.path.join(base, d))]
# print directories
In [66]:
ricep = pd.read_csv("C:\Users\deepak\Desktop\Repo\BTP\Satellite\Ricep_large.csv")
ricep = ricep.drop(["Unnamed: 0"],axis=1)
ricep.head()
Out[66]:
In [67]:
a = np.empty((ricep.shape[0],1))*np.NAN
In [68]:
""" 'features' contain collumn indexes for the new features """
""" 'dictn' is the dictionary mapping name of collumn index to the index number """
features = []
dictn = {}
k = 10
for i in range(1,13):
for j in range(1,11):
s = str(i) + "_B" + str(j) + "_"
features.append(s+"M")
features.append(s+"V")
dictn[s+"M"] = k
dictn[s+"V"] = k+1
k = k+2
In [71]:
for i in range(1,13):
for j in range(1,11):
s = str(i) + "_B" + str(j) + "_"
features.append(s+"Mn")
features.append(s+"Vn")
In [85]:
tmp = pd.DataFrame(index=range(ricep.shape[0]),columns=features)
ricex = pd.concat([ricep,tmp], axis=1)
In [86]:
k = 50
In [87]:
stx = timeit.default_timer()
for directory in directories:
# if bx: continue
# else: bx = True
""" Identifying Month, Year, Spacecraft ID """
date = directory.split('\\')[-1].split('_')[3] # Change for Win7
satx = directory.split('\\')[-1][3]
month = date[4:6]
year = date[0:4]
scene = directory.split('\\')[-1].split('_')[2]
index = dicti[scene]
""" Visiting every GeoTIFF file """
for _,_,files in os.walk(directory):
for filename in files:
if filename.endswith(".TIF"):
if filename[-5] == '8': continue
ds = gdal.Open(os.path.join(directory,filename))
if ds == None: continue
col, row, _ = ds.RasterXSize, ds.RasterYSize, ds.RasterCount
xoff, a, b, yoff, d, e = ds.GetGeoTransform()
ind = -1
for i in range(0,col,col/k):
for j in range(0,row,row/k):
ind += 1
if ind>2600: break
########### fetching the lat and lon coordinates
########### fetching the name of district
district = dicts[index][ind]
if district == "NRI": continue
########### Locating the row in DataFrame which we want to update
district = district.lower()
district = district.strip()
r = ricex.index[(ricex['ind_district'] == district) & (ricex['Crop_Year'] == int(year))].tolist()
if len(r) == 1:
########### The pixel value for that location
px,py = i,j
pix = ds.ReadAsArray(px,py,1,1)
pix = int(pix[0][0])
""" Found the row, so now .."""
""" Find Collumn index corresponding to Month, Band """
####### Band Number ########
band = filename.split("\\")[-1].split("_")[7:][0].split(".")[0][1]
bnd = band
if band == '6':
if filename.split("\\")[-1].split("_")[7:][2][0] == '1':
bnd = band
else:
bnd = '9'
elif band == 'Q':
bnd = '10'
if month[0] == '0':
month = month[1]
sm = month + "_B" + bnd +"_M"
cm = dictn[sm]
r = r[0]
# cm is the collumn indexe for mean
# r[0] is the row index
##### Checking if values are null ...
valm = ricex.iloc[r,cm]
if pd.isnull(valm):
ricex.iloc[r,cm] = int(pix)
ricex.iloc[r,cm+1] = int(pix*pix)
ricex.iloc[r,cm+240] = 1
continue
##### if the values are not null ...
valv = int(ricex.iloc[r,cm+1])
n = int(ricex.iloc[r,cm+240])
n = n+1
# Mean & Variance update
ricex.iloc[r,cm] = valm + (pix-valm)/n
ricex.iloc[r,cm+1] = ((n-2)/(n-1))*valv + (pix-valm)*(pix-valm)/n
ricex.iloc[r,cm+240] = n
elapsed = timeit.default_timer() - stx
print (elapsed)
print "Seconds"
In [89]:
ricex.to_csv("Ricex_prepared.csv")