In [14]:
%matplotlib inline
%pylab inline


Populating the interactive namespace from numpy and matplotlib

In [15]:
import warnings
warnings.filterwarnings('ignore')

In [5]:
# !pip install pyelasticsearch


Collecting pyelasticsearch
  Downloading pyelasticsearch-1.4.tar.gz (53kB)
    100% |████████████████████████████████| 61kB 931kB/s ta 0:00:01
Requirement already satisfied: certifi in /home/olli/anaconda3/lib/python3.6/site-packages (from pyelasticsearch)
Collecting elasticsearch<2.0.0,>=1.3.0 (from pyelasticsearch)
  Downloading elasticsearch-1.9.0-py2.py3-none-any.whl (59kB)
    100% |████████████████████████████████| 61kB 2.4MB/s eta 0:00:01
Requirement already satisfied: urllib3<2.0,>=1.8 in /home/olli/anaconda3/lib/python3.6/site-packages (from pyelasticsearch)
Collecting simplejson>=3.0 (from pyelasticsearch)
  Downloading simplejson-3.13.2.tar.gz (79kB)
    100% |████████████████████████████████| 81kB 1.7MB/s ta 0:00:011
Requirement already satisfied: six<2.0,>=1.4.0 in /home/olli/anaconda3/lib/python3.6/site-packages (from pyelasticsearch)
Building wheels for collected packages: pyelasticsearch, simplejson
  Running setup.py bdist_wheel for pyelasticsearch ... done
  Stored in directory: /home/olli/.cache/pip/wheels/1b/0c/0e/65b564e99a54d8db71e27f3997e55a27a2ab74960f001dac01
  Running setup.py bdist_wheel for simplejson ... done
  Stored in directory: /home/olli/.cache/pip/wheels/c2/d0/42/5d1d1290c19d999277582c585f80426c61987aff01eb104ed6
Successfully built pyelasticsearch simplejson
Installing collected packages: elasticsearch, simplejson, pyelasticsearch
Successfully installed elasticsearch-1.9.0 pyelasticsearch-1.4 simplejson-3.13.2

In [16]:
from pyelasticsearch import ElasticSearch, bulk_chunks
import pandas as pd

In [17]:
ES_HOST = 'http://localhost:9200/'
INDEX_NAME = "expo2009"
DOC_TYPE = "flight"

In [18]:
# ElasticSearch?

In [19]:
es = ElasticSearch(ES_HOST)

In [20]:
es.count('*')['count']


Out[20]:
1

In [21]:
# init index
try :
    es.delete_index(INDEX_NAME)
    print('Deleting %s'%(INDEX_NAME))
except :
    print('ERROR: Deleting %s failed!'%(INDEX_NAME))
    pass


DELETE /expo2009 [status:404 request:0.007s]
ERROR: Deleting expo2009 failed!

In [22]:
es.create_index(index=INDEX_NAME)


Out[22]:
{'acknowledged': True, 'index': 'expo2009', 'shards_acknowledged': True}

In [23]:
# https://pyelasticsearch.readthedocs.io/en/latest/api/#pyelasticsearch.ElasticSearch.put_mapping
# https://www.elastic.co/guide/en/elasticsearch/reference/current/null-value.html
mapping = {
    'flight': {
        'properties': {
            'SecurityDelay': {
                'type': 'integer',
                'null_value': -1
            },
            'FlightNum': {
                'type': 'text'
            },
            'Origin': {
                'type': 'keyword'
            },
            'LateAircraftDelay': {
                'type': 'integer',
                'null_value': -1
            },
            'NASDelay': {
                'type': 'integer',
                'null_value': -1
            },
            'ArrTime': {
                'type': 'integer'
            },
            'AirTime': {
                'type': 'integer'
            },
            'DepTime': {
                'type': 'integer'
            },
            'Month': {
                'type': 'string'
            },
            'CRSElapsedTime': {
                'type': 'integer'
            },
            'DayofMonth': {
                'type': 'string'
            },
            'Distance': {
                'type': 'integer'
            },
            'CRSDepTime': {
                'type': 'integer',
            },
            'DayOfWeek': {
                'type': 'keyword'
            },
            'CancellationCode': {
                'type': 'keyword'
            },
            'Dest': {
                'type': 'keyword'
            },
            'DepDelay': {
                'type': 'integer'
            },
            'TaxiIn': {
                'type': 'integer'
            },
            'UniqueCarrier': {
                'type': 'keyword'
            },
            'ArrDelay': {
                'type': 'integer'
            },
            'Cancelled': {
                'type': 'boolean'
            },
            'Diverted': {
                'type': 'boolean'
            },
            'message': {
                'type': 'text'
            },
            'TaxiOut': {
                'type': 'integer'
            },
            'ActualElapsedTime': {
                'type': 'integer'
            },
            'CarrierDelay': {
                'type': 'integer',
                'null_value': -1
            },
            '@timestamp': {
                'format': 'strict_date_optional_time||epoch_millis',
                'type': 'date'
            },
            'Year': {
                'type': 'keyword'
            },
            'WeatherDelay': {
                'type': 'integer',
                'null_value': -1
            },
            'CRSArrTime': {
                'type': 'integer'
            },
            'TailNum': {
                'type': 'text'
            }
        }
    }

}
es.put_mapping(index=INDEX_NAME, doc_type=DOC_TYPE,mapping=mapping )


Out[23]:
{'acknowledged': True}

In [24]:
es.count('*')['count']


Out[24]:
1

In [26]:
# if import fails, we can selectivly remove entries

# GET expo2009/_search
# {
#   "query": {
#     "range": {
#         "@timestamp" : { "gte" : "2002-01-01T00:00:00" }
#     }
#   }
# }

# # https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-delete-by-query.html

# POST expo2009/_delete_by_query
# {
#   "query": { 
#     "range": {
#         "@timestamp" : { "gte" : "2002-01-01T00:00:00" }
#     }
#   }
# }

In [ ]:
# curl -XPOST "http://localhost:9200/expo2009/_delete_by_query" -H 'Content-Type: application/json' -d'
# {
#   "query": { 
#     "range": {
#         "@timestamp" : { "gte" : "2002-01-01T00:00:00" }
#     }
#   }
# }'

In [ ]: