In [ ]:
from collections import namedtuple
rawEventsRdd = sc.textFile("/home/mert/yahoo/events.txt")
EventDataRow = namedtuple("EventDataRow", ["userId", "itemId", "ts", "latitude", "longitude", "city", "day_of_week", "time_of_day" , "event_type"])
def parseRawData(line):
lineSplit = line.split("\t")
return EventDataRow(userId=lineSplit[0],
itemId=lineSplit[1],
ts=int(lineSplit[2]),
latitude=float(lineSplit[3]),
longitude=float(lineSplit[4]),
city=lineSplit[5],
day_of_week=int(lineSplit[6]),
time_of_day=int(lineSplit[7]),
event_type=lineSplit[-1],
)
#eventsRdd = sc.parallelize(rawEventsRdd.map(parseRawData).take(10000000))
eventsRdd = rawEventsRdd.map(parseRawData).cache()
userIdConversionDictionary = eventsRdd.map(lambda x: x.userId).distinct().zipWithIndex().collectAsMap()
userIdConversionDictionaryBroadcast = sc.broadcast(userIdConversionDictionary)
itemIdConversionDictionary = eventsRdd.map(lambda x: x.itemId).distinct().zipWithIndex().collectAsMap()
itemIdConversionDictionaryBroadcast = sc.broadcast(itemIdConversionDictionary)
cityConversionDictionary = eventsRdd.map(lambda x: x.city).distinct().zipWithIndex().collectAsMap()
cityConversionDictionaryBroadcast = sc.broadcast(cityConversionDictionary)
eventsConvertedRdd = eventsRdd.map(lambda x: EventDataRow(
userId=userIdConversionDictionaryBroadcast.value[x.userId],
itemId=itemIdConversionDictionaryBroadcast.value[x.itemId],
ts=x.ts,
latitude=x.latitude,
longitude=x.longitude,
city=cityConversionDictionaryBroadcast.value[x.city],
day_of_week=x.day_of_week,
time_of_day=x.time_of_day,
event_type=x.event_type
))
eventsConvertedRdd.take(2)
In [ ]:
finalRDD = eventsConvertedRdd.map(lambda x: [
x.userId,(
x.itemId,
x.ts,
x.latitude,
x.longitude,)
])
finalRDD.take(3)
#groupData = map((lambda (x,y): (x, list(y))), sorted(finalRDD.groupByKey().collect()))
#groupData = map((lambda (x,y): (x, sorted(list(y),key=lambda a: a[1]))), sorted(finalRDD.groupByKey()))
groupData = finalRDD.groupByKey().map(lambda (x,y): (x, sorted(list(y),key=lambda a: a[1])))
#groupData = sc.parallelize(groupData.take(2000))
In [ ]:
from math import radians, cos, sin, asin, sqrt
def haversine(lon1, lat1, lon2, lat2):
"""
Calculate the great circle distance between two points
on the earth (specified in decimal degrees)
"""
# convert decimal degrees to radians
lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
# haversine formula
dlon = lon2 - lon1
dlat = lat2 - lat1
a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
c = 2 * asin(sqrt(a))
r = 6371 # Radius of earth in kilometers. Use 3956 for miles
return c * r
def detectMovement(x):
data = x[1]
newData = [(data[0][0], data[0][1], data[0][2], data[0][3], 1)]
for i in xrange(1,len(data)):
event = data[i]
distance = haversine(event[3],event[2], data[i-1][3], data[i-1][2]) * 1000 #in meters
time_difference = event[1] - newData[i-1][1] #in seconds
moving = 1 #not available
if time_difference <= 300: #if 2 consecutive events are more than 300 seconds away, the movement is not available
velocity = distance/time_difference if time_difference > 0 else -1
if velocity < 0:
moving = 1; #not available
elif velocity >= 0 and velocity <= 1:
moving = 2 #standing still
elif velocity <=2.4:
moving = 3 #walking spead
else:
moving = 4 #faster
newData.append((event[0],event[1],event[2],event[3], moving))
return (x[0], newData)
#return x
#print haversine(elem[0][1][2][1],elem[0][1][1][1],elem[6][1][2][1],elem[6][1][1][1])
groupData = groupData.map(detectMovement).cache()
#groupData.take(1)
In [ ]:
import datetime
from math import radians, cos, sin, asin, sqrt
from collections import Counter
def remove_duplicates(values):
output = []
seen = set()
for value in values:
# If value has not been encountered yet,
# ... add it to both list and set.
if value not in seen:
output.append(value)
seen.add(value)
return output
def haversine(lon1, lat1, lon2, lat2):
"""
Calculate the great circle distance between two points
on the earth (specified in decimal degrees)
"""
# convert decimal degrees to radians
lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
# haversine formula
dlon = lon2 - lon1
dlat = lat2 - lat1
a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
c = 2 * asin(sqrt(a))
r = 6371 # Radius of earth in kilometers. Use 3956 for miles
return c * r
def convertLocation(line):
listGroup = line[1]
workGroup = [x for x in listGroup if datetime.datetime.fromtimestamp(int(x[1])).hour >= 6 and
datetime.datetime.fromtimestamp(int(x[1])).hour <= 18]
numNearLocation = []
i = 0
for x in workGroup:
numNearLocation.append(0);
for y in workGroup:
if haversine(x[3], x[2], y[3], y[2]) < 0.1:
numNearLocation[i] = numNearLocation[i] + 1
i = i + 1
if len(numNearLocation) > 0:
index_work = numNearLocation.index(max(numNearLocation))
else:
index_work = -1
# workGroup = [(x[0],x[1],x[2],x[3],1) if haversine(x[3], x[2], workGroup[index_work][3], workGroup[index_work][2]) < 0.1
# else (x[0],x[1],x[2],x[3],0) for x in workGroup]
#workGroup3 = [(x[0],x[1],0) for x in workGroup if haversine(x[3], x[2], workGroup[index][3], workGroup[index][2]) >= 0.1]
homeGroup = [x for x in listGroup if datetime.datetime.fromtimestamp(int(x[1])).hour < 6 or
datetime.datetime.fromtimestamp(int(x[1])).hour > 18]
numNearLocation = []
i = 0
for x in homeGroup:
numNearLocation.append(0);
for y in homeGroup:
if haversine(x[3], x[2], y[3], y[2]) < 0.1:
numNearLocation[i] = numNearLocation[i] + 1
i = i + 1
if len(numNearLocation) > 0:
index_home = numNearLocation.index(max(numNearLocation))
else:
index_home = -1
# homeGroup = [(x[0],x[1],x[2],x[3],2) if haversine(x[3], x[2], homeGroup[index_home][3], homeGroup[index_home][2]) < 0.1
# else (x[0],x[1],x[2],x[3],0) for x in homeGroup]
if index_home != -1 and index_work != -1:
listGroup = [(x[0],x[1],x[4],1) if haversine(x[3], x[2], workGroup[index_work][3], workGroup[index_work][2]) < 0.01
else(
(x[0],x[1],x[4],2) if haversine(x[3], x[2], homeGroup[index_home][3], homeGroup[index_home][2]) < 0.01
else (x[0],x[1],x[4],3)
)
for x in listGroup]
else:
listGroup = [(x[0],x[1],x[4],3)
for x in listGroup]
listGroup = [(x[0],x[2],x[3],1) if datetime.datetime.fromtimestamp(int(x[1])).hour >= 6 and
datetime.datetime.fromtimestamp(int(x[1])).hour <= 13
else(
(x[0],x[2],x[3],2) if datetime.datetime.fromtimestamp(int(x[1])).hour >= 13 and
datetime.datetime.fromtimestamp(int(x[1])).hour <= 18
else (x[0],x[2],x[3],3)
)
for x in listGroup]
# context for last app used
# newListGroup = []
# for i in range(len(listGroup)):
# if i == 0:
# newListGroup.append((listGroup[i][0],listGroup[i][1],listGroup[i][2],listGroup[i][3],-1))
# else:
# newListGroup.append((listGroup[i][0],listGroup[i][1],listGroup[i][2],listGroup[i][3],listGroup[i-1][0]))
# label with 1 and 0
# newListGroup = []
# for i in range(len(listGroup)):
# NumberOfLastApp = 50
# if i < NumberOfLastApp:
# lastApp = [x[0] for x in listGroup[:i]]
# if listGroup[i][0] not in lastApp:
# newListGroup.append((listGroup[i][0],listGroup[i][1],listGroup[i][2],listGroup[i][3],0))
# else:
# newListGroup.append((listGroup[i][0],listGroup[i][1],listGroup[i][2],listGroup[i][3],1))
# else:
# lastApp = [x[0] for x in listGroup[i-NumberOfLastApp:i]]
# if listGroup[i][0] not in lastApp:
# newListGroup.append((listGroup[i][0],listGroup[i][1],listGroup[i][2],listGroup[i][3],0))
# else:
# newListGroup.append((listGroup[i][0],listGroup[i][1],listGroup[i][2],listGroup[i][3],1))
# listGroup = newListGroup
#least recently used
l = len(listGroup)
numTrain = l * 8 / 10
numTest = l - numTrain
trainList = listGroup[:numTrain] #0.8 train set
trainList = sorted(trainList,key=lambda x:int(x[1]), reverse=True); #sort by timestamp with descending
RecommenderDuplicate = [t[0] for t in trainList] #take only id for train set
Recommender = remove_duplicates(RecommenderDuplicate) #remove duplicate
NumberOfLastApp = 6
if len(Recommender) >= NumberOfLastApp:
lastApp = Recommender[:NumberOfLastApp]
else:
lastApp = Recommender
newListGroup = []
for i in range(len(listGroup)):
if listGroup[i][0] not in lastApp:
newListGroup.append((listGroup[i][0],listGroup[i][1],listGroup[i][2],listGroup[i][3],0))
else:
newListGroup.append((listGroup[i][0],listGroup[i][1],listGroup[i][2],listGroup[i][3],1))
listGroup = newListGroup
#listGroup = [(x[0],x[1],x[2],x[3],1) for x in listGroup]
# context for frequency
# newListGroup = []
# for i in range(len(listGroup)):
# if i == 0:
# newListGroup.append((listGroup[i][0],listGroup[i][1],listGroup[i][2],listGroup[i][3],-1))
# else:
# appGourp = [x[0] for x in listGroup[:i]]
# frquentApp = Counter(appGourp).most_common()[0][0]
# newListGroup.append((listGroup[i][0],listGroup[i][1],listGroup[i][2],listGroup[i][3],frquentApp))
# label with 1 and 0
# newListGroup = []
# for i in range(len(listGroup)):
# if i == 0:
# newListGroup.append((listGroup[i][0],listGroup[i][1],listGroup[i][2],listGroup[i][3],listGroup[i][4],0))
# else:
# appGourp = [x[0] for x in listGroup[:i]]
# frquentApp = Counter(appGourp).most_common()
# frquentApp = [x[0] for x in frquentApp]
# numberOfFrequentApp = 4
# if len(frquentApp) >= numberOfFrequentApp:
# frquentApp = frquentApp[:numberOfFrequentApp]
# if listGroup[i][0] not in frquentApp :
# newListGroup.append((listGroup[i][0],listGroup[i][1],listGroup[i][2],listGroup[i][3],listGroup[i][4],1))
# else:
# newListGroup.append((listGroup[i][0],listGroup[i][1],listGroup[i][2],listGroup[i][3],listGroup[i][4],0))
#most recently used
l = len(listGroup)
numTrain = l * 8 / 10
numTest = l - numTrain
trainList = listGroup[:numTrain] #0.8 train set
RecommenderDuplicate = [t[0] for t in trainList] #take only id for train set
Recommender = remove_duplicates(RecommenderDuplicate) #remove duplicate
numberOfFrequentApp = 6
if len(Recommender) >= numberOfFrequentApp:
firstApp = Recommender[:numberOfFrequentApp]
else:
firstApp = Recommender
newListGroup = []
for i in range(len(listGroup)):
if listGroup[i][0] not in firstApp:
newListGroup.append((listGroup[i][0],listGroup[i][1],listGroup[i][2],listGroup[i][3],listGroup[i][4],0))
else:
newListGroup.append((listGroup[i][0],listGroup[i][1],listGroup[i][2],listGroup[i][3],listGroup[i][4],1))
listGroup = newListGroup
#most frequently used
l = len(listGroup)
numTrain = l * 8 / 10
numTest = l - numTrain
trainList = listGroup[:numTrain] #0.9 train set
RecommenderDuplicate = [t[0] for t in trainList] #take only id for train set
Recommender = Counter(RecommenderDuplicate).most_common()
Recommender = [t[0] for t in Recommender]
numberOfFrequentApp = 6
if len(Recommender) >= numberOfFrequentApp:
frequentApp = Recommender[:numberOfFrequentApp]
else:
frequentApp = Recommender
newListGroup = []
for i in range(len(listGroup)):
if listGroup[i][0] not in frequentApp:
newListGroup.append((listGroup[i][0],listGroup[i][1],listGroup[i][2],listGroup[i][3],listGroup[i][4],listGroup[i][5],0))
else:
newListGroup.append((listGroup[i][0],listGroup[i][1],listGroup[i][2],listGroup[i][3],listGroup[i][4],listGroup[i][5],1))
listGroup = newListGroup
#time = datetime.datetime.fromtimestamp(int(line[1][0][1]))
#line[1][1] = datetime.datetime.fromtimestamp(int("1284101485")).strftime('%Y-%m-%d %H:%M:%S')
#return line[0],(workGroup+homeGroup)[:20],listGroup[:20]#,len(workGroup+homeGroup),len(workGroup),len(homeGroup)
return line[0],newListGroup
final = groupData.map(convertLocation)
final.take(1)
In [ ]:
from random import shuffle
def topFiveSortedList(oldList, context, probability):
for x in range(0,len(oldList)-1):
if x == 0 and oldList[x][1] < probability:
oldList[x] = (context,probability)
if oldList[x+1][1] < oldList[x][1]:
temp = oldList[x+1]
oldList[x+1] = oldList[x]
oldList[x] = temp
else:
break
elif oldList[x+1][1] < oldList[x][1]:
temp = oldList[x+1]
oldList[x+1] = oldList[x]
oldList[x] = temp
else:
break
#return sorted(oldList,key=lambda x: -x[1])
return oldList
def remove_duplicates(values):
output = []
seen = set()
for value in values:
# If value has not been encountered yet,
# ... add it to both list and set.
if value not in seen:
output.append(value)
seen.add(value)
return output
def bayesian(line):
listGroup = line[1]
#shuffle(listGroup) #shuffle the list
l = len(listGroup)
numTrain = l * 8 / 10
numTest = l - numTrain
trainList = listGroup[:numTrain] #0.8 train set
testList = listGroup[numTrain:] #0.2 test set
#trainRDD = sc.parallelize(trainList).count()
newTestList = []
for t in testList:
context = [x for x in trainList if x[1]==t[1] and x[2]==t[2] and x[3]==t[3] and x[4]==t[4] and x[5]==t[5] and x[6]==t[6]]
#context = [x for x in trainList if x[1]==t[1] and x[3]==t[3]]
numContext = float(len(context))
if numTrain != 0:
p_context = numContext/numTrain #P(C1i, C2j, C3k)
else:
p_context = 0
p_app = [(-1,0),(-1,0),(-1,0),(-1,0),(-1,0)]
context_no_duplicate = remove_duplicates(context)
for c in context_no_duplicate:
appi = [x for x in trainList if x[0]==c[0]]
numAppi = float(len(appi))
if numTrain != 0:
p_appi = numAppi/numTrain
else:
p_appi = 0
contextAppi = [x for x in trainList if x[0]==c[0] and x[1]==c[1] and x[2]==c[2] and x[3]==c[3] and x[4]==c[4] and x[5]==c[5] and x[6]==c[6]]
#contextAppi = [x for x in trainList if x[0]==c[0] and x[1]==c[1] and x[3]==c[3]]
if numAppi != 0: #P(C1i, C2j, C3k | APPid)
p_contextAppi = len(contextAppi)/numAppi
else:
p_contextAppi = 0
if p_context != 0: #P(APPid | C1i,C2j,C3k = P(C1i, C2j, C3k | APPid) P(APPid) /P(C1i, C2j, C3k)
p = p_contextAppi * p_appi / p_context
else:
p = 0
p_app = topFiveSortedList(p_app,c[0],p)
p_app = sorted(p_app,key=lambda x: -x[1])
app_rec = map(lambda x:x[0],p_app[:5])
newTestList.append((t[0],app_rec))
scores = 0
numHit = 0
for t in newTestList:
if t[0] == t[1][0]:
scores = scores+1.0
numHit = numHit+1
elif t[0] == t[1][1]:
scores = scores+0.8
numHit = numHit+1
elif t[0] == t[1][2]:
scores = scores+0.6
numHit = numHit+1
elif t[0] == t[1][3]:
scores = scores+0.4
numHit = numHit+1
elif t[0] == t[1][4]:
scores = scores+0.2
numHit = numHit+1
else:
numHit = numHit+1
#scores = scores / numTest
if numHit != 0:
scores = scores / numHit
else:
scores = 0
#return newTestList[:20]
return scores
result = final.map(bayesian)
#result.mean()
f = open('asdf2.txt','a')
f.write(str(result.mean()) + ' last 6 app, first top 6, frequency 6 added according to whole train data with all data')
f.write('\n')
f.close()
In [ ]:
from random import shuffle
def topFiveSortedList(oldList, context, probability):
for x in range(0,len(oldList)-1):
if x == 0 and oldList[x][1] < probability:
oldList[x] = (context,probability)
if oldList[x+1][1] < oldList[x][1]:
temp = oldList[x+1]
oldList[x+1] = oldList[x]
oldList[x] = temp
else:
break
elif oldList[x+1][1] < oldList[x][1]:
temp = oldList[x+1]
oldList[x+1] = oldList[x]
oldList[x] = temp
else:
break
#return sorted(oldList,key=lambda x: -x[1])
return oldList
def remove_duplicates(values):
output = []
seen = set()
for value in values:
# If value has not been encountered yet,
# ... add it to both list and set.
if value not in seen:
output.append(value)
seen.add(value)
return output
def bayesian(line):
listGroup = line[1]
#shuffle(listGroup) #shuffle the list
l = len(listGroup)
numTrain = l * 8 / 10
numTest = l - numTrain
trainList = listGroup[:numTrain] #0.8 train set
testList = listGroup[numTrain:] #0.2 test set
#trainRDD = sc.parallelize(trainList).count()
newTestList = []
for t in testList:
context = [x for x in trainList if x[1]==t[1] and x[2]==t[2] and x[3]==t[3] and x[4]==t[4] and x[5]==t[5] and x[6]==t[6]]
#context = [x for x in trainList if x[1]==t[1] and x[3]==t[3]]
numContext = float(len(context))
if numTrain != 0:
p_context = numContext/numTrain #P(C1i, C2j, C3k)
else:
p_context = 0
p_app = [(-1,0),(-1,0),(-1,0),(-1,0),(-1,0)]
context_no_duplicate = remove_duplicates(context)
for c in context_no_duplicate:
appi = [x for x in trainList if x[0]==c[0]]
numAppi = float(len(appi))
if numTrain != 0:
p_appi = numAppi/numTrain
else:
p_appi = 0
contextAppi = [x for x in trainList if x[0]==c[0] and x[1]==c[1] and x[2]==c[2] and x[3]==c[3] and x[4]==c[4] and x[5]==c[5] and x[6]==c[6]]
#contextAppi = [x for x in trainList if x[0]==c[0] and x[1]==c[1] and x[3]==c[3]]
if numAppi != 0: #P(C1i, C2j, C3k | APPid)
p_contextAppi = len(contextAppi)/numAppi
else:
p_contextAppi = 0
if p_context != 0: #P(APPid | C1i,C2j,C3k = P(C1i, C2j, C3k | APPid) P(APPid) /P(C1i, C2j, C3k)
p = p_contextAppi * p_appi / p_context
else:
p = 0
p_app = topFiveSortedList(p_app,c[0],p)
p_app = sorted(p_app,key=lambda x: -x[1])
app_rec = map(lambda x:x[0],p_app[:5])
newTestList.append((t[0],app_rec))
scores = 0
numHit = 0
for t in newTestList:
if t[0] == t[1][0]:
scores = scores+1.0
numHit = numHit+1
elif t[0] == t[1][1]:
scores = scores+0.8
numHit = numHit+1
elif t[0] == t[1][2]:
scores = scores+0.6
numHit = numHit+1
elif t[0] == t[1][3]:
scores = scores+0.4
numHit = numHit+1
elif t[0] == t[1][4]:
scores = scores+0.2
numHit = numHit+1
# else:
# numHit = numHit+1
#scores = scores / numTest
if numHit != 0:
scores = scores / numHit
else:
scores = 0
#return newTestList[:20]
return scores
result = final.map(bayesian)
#result.mean()
f = open('asdf2.txt','a')
f.write(str(result.mean()) + ' last 6 app, first top 6, frequency 6 added according to whole train data with all data with quality hits')
f.write('\n')
f.close()
In [ ]:
from random import shuffle
def topFiveSortedList(oldList, context, probability):
for x in range(0,len(oldList)-1):
if x == 0 and oldList[x][1] < probability:
oldList[x] = (context,probability)
if oldList[x+1][1] < oldList[x][1]:
temp = oldList[x+1]
oldList[x+1] = oldList[x]
oldList[x] = temp
else:
break
elif oldList[x+1][1] < oldList[x][1]:
temp = oldList[x+1]
oldList[x+1] = oldList[x]
oldList[x] = temp
else:
break
#return sorted(oldList,key=lambda x: -x[1])
return oldList
def remove_duplicates(values):
output = []
seen = set()
for value in values:
# If value has not been encountered yet,
# ... add it to both list and set.
if value not in seen:
output.append(value)
seen.add(value)
return output
def bayesian(line):
listGroup = line[1]
#shuffle(listGroup) #shuffle the list
l = len(listGroup)
numTrain = l * 8 / 10
numTest = l - numTrain
trainList = listGroup[:numTrain] #0.8 train set
testList = listGroup[numTrain:] #0.2 test set
#trainRDD = sc.parallelize(trainList).count()
newTestList = []
for t in testList:
context = [x for x in trainList if x[1]==t[1] and x[2]==t[2] and x[3]==t[3] and x[4]==t[4] and x[5]==t[5] and x[6]==t[6]]
#context = [x for x in trainList if x[1]==t[1] and x[3]==t[3]]
numContext = float(len(context))
if numTrain != 0:
p_context = numContext/numTrain #P(C1i, C2j, C3k)
else:
p_context = 0
p_app = [(-1,0),(-1,0),(-1,0),(-1,0),(-1,0)]
context_no_duplicate = remove_duplicates(context)
for c in context_no_duplicate:
appi = [x for x in trainList if x[0]==c[0]]
numAppi = float(len(appi))
if numTrain != 0:
p_appi = numAppi/numTrain
else:
p_appi = 0
contextAppi = [x for x in trainList if x[0]==c[0] and x[1]==c[1] and x[2]==c[2] and x[3]==c[3] and x[4]==c[4] and x[5]==c[5] and x[6]==c[6]]
#contextAppi = [x for x in trainList if x[0]==c[0] and x[1]==c[1] and x[3]==c[3]]
if numAppi != 0: #P(C1i, C2j, C3k | APPid)
p_contextAppi = len(contextAppi)/numAppi
else:
p_contextAppi = 0
if p_context != 0: #P(APPid | C1i,C2j,C3k = P(C1i, C2j, C3k | APPid) P(APPid) /P(C1i, C2j, C3k)
p = p_contextAppi * p_appi / p_context
else:
p = 0
p_app = topFiveSortedList(p_app,c[0],p)
p_app = sorted(p_app,key=lambda x: -x[1])
app_rec = map(lambda x:x[0],p_app[:5])
newTestList.append((t[0],app_rec))
scores = 0
numHit = 0
for t in newTestList:
if t[0] == t[1][0]:
scores = scores+1.0
numHit = numHit+1
elif t[0] == t[1][1]:
scores = scores+0.8
numHit = numHit+1
elif t[0] == t[1][2]:
scores = scores+0.6
numHit = numHit+1
elif t[0] == t[1][3]:
scores = scores+0.4
numHit = numHit+1
elif t[0] == t[1][4]:
scores = scores+0.2
numHit = numHit+1
# else:
# numHit = numHit+1
#scores = scores / numTest
if numHit != 0:
scores = scores / numHit
else:
scores = 0
#return newTestList[:20]
return scores
result = final.map(bayesian)
#result.mean()
f = open('asdf2.txt','a')
f.write(str(result.mean()) + ' last 6 app, first top 6, frequency 6 added according to whole train data with all data for hit rate')
f.write('\n')
f.close()
In [11]:
from collections import Counter
a = ((1,1),(1,2),(2,3))
b = [x[0] for x in a]
count = Counter(b).most_common()
count = [x[0] for x in count]
1 not in count
listGroup = [1,1,1,1,1,1]
for i in range(len(listGroup)):
print listGroup[:i]
In [ ]: