In [8]:
import elasticsearch;
import random;
import math;
import time;
# search the user_id based on the given name and the nameId list
def _searchUserIdByName(_name, _nameIdList):
_uid = '';
if (_nameIdList):
for _tuple in _nameIdList:
if (_tuple['name']==_name):
_uid = _tuple['id'];
break;
return _uid;
def _fetchJeymartItems(_e2, _hit, _docObj):
_res = _e2.search(
index='odt_jeymart_items',
doc_type='doc',
body=
{
"query": {
"function_score": {
"query": {
"bool": {
"must": [
{
"exists": {
"field": "hf_price_suggested"
}
},
{
"match": {
"t_description": _hit['t_description']
}
},
{
"terms": {
"k_category": _hit['k_category']
}
},
{
"match": {
"k_photo": _hit['k_photo']
}
}
]
}
},
"functions": [
{
"random_score": {
"field": "hf_price_suggested"
}
}
],
"score_mode": "sum"
}
},
"sort": [
{
"_score": {
"order": "asc"
}
}
]
}
);
_len=len(_res['hits']['hits']);
_i = int(math.floor(random.random()*_len));
if (_i >= _len):
if (_len>0):
_i=_i-1;
else:
_i=-1;
#while (_i >= len(_res['hits']['hits'])):
# _i = int(math.floor(random.random()*len(_res['hits']['hits'])));
if (_i!=-1):
_chosenHit=_res['hits']['hits'][_i];
_docObj['s_brand_name']=_chosenHit['_source']['s_brand_name'];
_docObj['hf_price_suggested']=_chosenHit['_source']['hf_price_suggested'];
return _docObj;
def _runBulkInBatches(_es, _bList):
_res = {};
_len = len(_bList);
_threshold = 50000;
_parts=[];
# some problem here...
_partsTotal = int(round(_len/_threshold+0.5, 0));
_startIdx=0;
for _i in range(_partsTotal):
if (_startIdx<_len):
_parts[_i]=_bList[_startIdx:_threshold];
_startIdx+=_threshold;
else:
break;
# run bulk and sleep for 1minute
for _partBulk in _parts:
_es.bulk(body=_partBulk);
time.sleep(60);
return _res;
#_choice = input("do you want to run the fix?");
_choice="yes";
if (_choice.lower()=="yes"):
_es = elasticsearch.Elasticsearch( [ "http://localhost:9201" ], requestTimeout="600000" );
_res = _es.search(
index = "odt_jeymart_user_trx",
doc_type = "doc",
body = {
"size": 0,
"aggs": {
"NAME": {
"terms": {
"field": "user.s_full_name.keyword",
"size": 10000
}
}
}
}
);
# create user_id based on the names
_names = _res['aggregations']['NAME']['buckets'];
_uids = [];
_nameId = [];
for _i in range(len(_names)):
_uid = int(round(random.random()*1000000, 0));
# check if _uid exists or not
while (_uid in _uids):
_uid = int(round(random.random()*1000000, 0));
_uids.append(_uid);
# associate the name and user_id together
for _j in range(len(_names)):
_nameId.append({
'name': _names[_j]['key'],
'id': _uids[_j]
});
#print(_nameId[0]['name']);
#print(_nameId[0]['id']);
#print(_nameId);
# ----------------------------------------
# - scrolling of the odt_jeymart_user_trx
# ----------------------------------------
_e2 = elasticsearch.Elasticsearch([ "http://localhost:9201" ]);
_res = _es.search(
index = "odt_jeymart_user_trx",
doc_type = "doc",
body = {
"size": 1000,
"query": {
"match_all": {}
}
},
scroll = "5m"
);
#print(len(_res['hits']['hits']));
#print(_res);
# close the cursor
#_es.clear_scroll( scroll_id = _res['_scroll_id']);
_acc_hits=0;
_bulkLst = [];
while (len(_res['hits']['hits'])>0):
# loop each hits
for _hit in _res['hits']['hits']:
_docCtx = {};
_src = _hit['_source'];
# 1. (k_user_id) search and replace the value of this doc
_uid_1 = _searchUserIdByName(_src['user']['s_full_name'], _nameId);
_docCtx['k_user_id'] = _uid_1;
# 2. run another query against the "odt_jeymart_items"
_docCtx = _fetchJeymartItems(_e2, _hit['_source'], _docCtx);
# 3. update the _bulkLst for _bulk operation
_bulkLst.append({ "update": { "_index": "odt_jeymart_user_trx", "_type": "doc", "_id": _hit['_id'] } });
_bulkLst.append({ "doc": _docCtx });
_res = _es.scroll(
scroll_id = _res['_scroll_id'],
scroll = "5m"
);
# counter info
_acc_hits+=len(_res['hits']['hits']);
print(_acc_hits);
#_size=len(_bulkLst);
print(len(_bulkLst));
#print(_bulkLst[_size-1]);
#print(_bulkLst[_size-2]);
# 4. run _bulk
# too large, divide into 4 batches... play safe
_res = _es.bulk(body=_bulkLst);
print(_res);
# close the cursor
#_es.clear_scroll( scroll_id = _res['_scroll_id']);
del _es;
del _e2;
else:
print('** so you are supposed to run the following **');
In [10]:
arr = [ 1, 2, 3, 4, 5];
print (arr);
arr2=arr[0:3];
print(arr2);
print(arr);