In [1]:
import sys
sys.path.append('C:\Users\Chrisi\Desktop\Studium\iv\Project\DirtyDrive')
sys.path.append('C:\Users\Chrisi\Desktop\Studium\iv\Project\DirtyDrive\DirtyDrive')
sys.path
import pandas as pd
import numpy as np
import django, os
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'DirtyDrive.settings')
#settings.configure()
django.setup()
from django.contrib.gis import gdal
gdal.HAS_GDAL
from DirtyDrive import db_saver
import datetime
from DirtyDrive.models import CityDistrict, DriveNowCarType, DriveNowCar, DriveNowChargingStation, DriveNowPetrolStation
from django.db import connection
from django.db.models import Count, Avg
from django.contrib.gis.geos import Point, Polygon
from geopy.distance import vincenty
from django.db import connection
import json
import db_controller
c = db_controller.db_controller()
In [2]:
def dictfetchall(cursor):
"Return all rows from a cursor as a dict"
columns = [col[0] for col in cursor.description]
return [
dict(zip(columns, row))
for row in cursor.fetchall()
]
def getDrivesDF(fromDate,toDate):
with connection.cursor() as cursor:
cursor.execute(
'SELECT car.name as car_name, car."carId" as car_id, datetime as datetime0, car.lng as lng0, car.lat as lat0, car."innerCleanliness" as clean0, dis.id as dis_id0, dis.name as dis_name0 \
FROM public."DirtyDrive_citydistrict" as dis, public."DirtyDrive_drivenowcar" as car \
WHERE ST_Contains(dis.polygon, car.point) AND \
car.datetime BETWEEN to_date(\''+fromDate+'\',\'YYYY-MM-DD\') AND to_date(\''+toDate+'\',\'YYYY-MM-DD\') \
;'
)
#res = cursor.fetchall()
drives_df = pd.DataFrame(dictfetchall(cursor))
drives_df = drives_df.sort_values(['car_id', 'datetime0'])
drives_df['duration'] = drives_df['datetime0'].shift(-1) - drives_df['datetime0']
drives_df['datetime1'] = drives_df['datetime0'].shift(-1)
drives_df['dis_id1'] = drives_df['dis_id0'].shift(-1)
drives_df['dis_name1'] = drives_df['dis_name0'].shift(-1)
drives_df['clean1'] = drives_df['clean0'].shift(-1)
drives_df['lat1'] = drives_df['lat0'].shift(-1)
drives_df['lng1'] = drives_df['lng0'].shift(-1)
#filter1
drives_df = drives_df[drives_df['duration'] > '00:11:00']
drives_df['distance'] = drives_df.apply(lambda p: vincenty((p['lat0'], p['lng0']), (p['lat1'], p['lng1'])).km , axis=1)
return drives_df
def genCountMatrix(drives_df):
counts_df = drives_df.groupby(['dis_id0', 'dis_id1']).size()
counts_df = counts_df.reset_index()
counts_df.columns = ('dis_id0','dis_id1','count')
mat = np.zeros((25, 25), dtype=np.int)
for row in counts_df.iterrows():
#print row[1][1]
d = row[1]
mat[int(d[0])-1][int(d[1])-1] = int(d[2]);
return mat
def saveMatrixJson(filename, mat):
matlist = mat.tolist()
dump = json.dumps(matlist)
with open(filename, "w") as text_file:
text_file.write(dump)
In [3]:
#fromDate = '2016-12-01'
#toDate = '2016-12-09'
for a in range(1,9):
for b in range(a+1,9):
fromDate = '2016-12-0'+str(a)
toDate = '2016-12-0'+str(b)
filename = "gen/chord_mat_"+str(a)+"-"+str(b-1)+".json"
print filename
drives_df = getDrivesDF(fromDate, toDate)
mymatrix = genCountMatrix(drives_df)
saveMatrixJson(filename, mymatrix)