In [14]:
%matplotlib inline
%pylab inline
In [15]:
import warnings
warnings.filterwarnings('ignore')
In [5]:
# !pip install pyelasticsearch
In [16]:
from pyelasticsearch import ElasticSearch, bulk_chunks
import pandas as pd
In [17]:
ES_HOST = 'http://localhost:9200/'
INDEX_NAME = "expo2009"
DOC_TYPE = "flight"
In [18]:
# ElasticSearch?
In [19]:
es = ElasticSearch(ES_HOST)
In [20]:
es.count('*')['count']
Out[20]:
In [21]:
# init index
try :
es.delete_index(INDEX_NAME)
print('Deleting %s'%(INDEX_NAME))
except :
print('ERROR: Deleting %s failed!'%(INDEX_NAME))
pass
In [22]:
es.create_index(index=INDEX_NAME)
Out[22]:
In [23]:
# https://pyelasticsearch.readthedocs.io/en/latest/api/#pyelasticsearch.ElasticSearch.put_mapping
# https://www.elastic.co/guide/en/elasticsearch/reference/current/null-value.html
mapping = {
'flight': {
'properties': {
'SecurityDelay': {
'type': 'integer',
'null_value': -1
},
'FlightNum': {
'type': 'text'
},
'Origin': {
'type': 'keyword'
},
'LateAircraftDelay': {
'type': 'integer',
'null_value': -1
},
'NASDelay': {
'type': 'integer',
'null_value': -1
},
'ArrTime': {
'type': 'integer'
},
'AirTime': {
'type': 'integer'
},
'DepTime': {
'type': 'integer'
},
'Month': {
'type': 'string'
},
'CRSElapsedTime': {
'type': 'integer'
},
'DayofMonth': {
'type': 'string'
},
'Distance': {
'type': 'integer'
},
'CRSDepTime': {
'type': 'integer',
},
'DayOfWeek': {
'type': 'keyword'
},
'CancellationCode': {
'type': 'keyword'
},
'Dest': {
'type': 'keyword'
},
'DepDelay': {
'type': 'integer'
},
'TaxiIn': {
'type': 'integer'
},
'UniqueCarrier': {
'type': 'keyword'
},
'ArrDelay': {
'type': 'integer'
},
'Cancelled': {
'type': 'boolean'
},
'Diverted': {
'type': 'boolean'
},
'message': {
'type': 'text'
},
'TaxiOut': {
'type': 'integer'
},
'ActualElapsedTime': {
'type': 'integer'
},
'CarrierDelay': {
'type': 'integer',
'null_value': -1
},
'@timestamp': {
'format': 'strict_date_optional_time||epoch_millis',
'type': 'date'
},
'Year': {
'type': 'keyword'
},
'WeatherDelay': {
'type': 'integer',
'null_value': -1
},
'CRSArrTime': {
'type': 'integer'
},
'TailNum': {
'type': 'text'
}
}
}
}
es.put_mapping(index=INDEX_NAME, doc_type=DOC_TYPE,mapping=mapping )
Out[23]:
In [24]:
es.count('*')['count']
Out[24]:
In [26]:
# if import fails, we can selectivly remove entries
# GET expo2009/_search
# {
# "query": {
# "range": {
# "@timestamp" : { "gte" : "2002-01-01T00:00:00" }
# }
# }
# }
# # https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-delete-by-query.html
# POST expo2009/_delete_by_query
# {
# "query": {
# "range": {
# "@timestamp" : { "gte" : "2002-01-01T00:00:00" }
# }
# }
# }
In [ ]:
# curl -XPOST "http://localhost:9200/expo2009/_delete_by_query" -H 'Content-Type: application/json' -d'
# {
# "query": {
# "range": {
# "@timestamp" : { "gte" : "2002-01-01T00:00:00" }
# }
# }
# }'
In [ ]: