In [1]:
import pymongo
conn = pymongo.MongoClient('selnpcgwnx1004', 8888)
db = conn.stdata
In [2]:
CP_names = [u'Node__Check_Config_Activation_Time',
u'Dallas__No_DPFcrashes',
u'Dallas__No_Crashes',
u'TC00__Check_New_Sessions_Created_After_Kill_Gtpcd_on_PSC',
u'TC00__Check_Alarm_After_PPB_Failure',
u'TC00__No_Emerg_Errors_Warnings_During_Stability',
u'Dallas__No_Critical_Errors',
u'TC00__Check_Board_Allocation_After_Kill_Gtpcd_on_GSC',
u'TC00__Check_Alarm_After_Kill_Sgwcd_on_PSC',
u'TC00__Check_Active_Notification_At_BH',
u'TC00__Increase_In_Number_Of_System_Alarms',
u'TC00__Check_Board_Allocation_After_Kill_Gtpcd_on_PSC',
u'Node__Check_No_Pending_PGW_Ftp_CDRs',
u'Robustness__Check_ISPlog',
u'Robustness__Negative_Node_Counter_Check',
u'Robustness__Board_Status_Check_ervlog',
u'TC00__Check_Board_Allocation_After_PPB_Failure',
u'Node__Check_No_Pending_PGW_Gtpp_CDRs',
u'TC00__Dallas_Signaling_Check_During_Stability',
u'TC00__Check_New_Sessions_Created_After_PPB_Failure',
u'TC00__Increase_In_Number_Of_EPG_Alarms',
u'TC00__total_execution_time',
u'Node__Check_No_CDR_Failures',
u'Node__No_Coredumps',
u'TC00__Check_Board_Allocation_After_Kill_Sgwcd_on_PSC',
u'TC00__Check_Active_Notification_In_The_End',
u'TC00__Check_Node_Log_During_Robustness',
u'TC00__Check_Alarm_After_Kill_Gtpcd_on_GSC',
u'TC00__check_flooding_messages',
u'TC00__Check_New_Sessions_Created_After_Kill_Gtpcd_on_GSC',
u'Node__Check_No_Pending_SGW_Bp_CDRs',
u'TC00__Check_Unexpected_Alarms',
u'TC00__Threshold_Value_PGW_Contexts',
u'TC00__Check_New_Sessions_Created_After_Kill_Sgwcd_on_PSC',
u'Robustness__Check_Requirement_PSC_Recovery_Time_of_Kill_Gtpcd_on_PSC',
u'Robustness__Too_Big_Node_Counter_Check',
u'Robustness__All_UE_Detached_In_The_End',
u'TC00__No_Coredump_During_Stability',
u'Robustness__Board_Status_Check_ervshelllog',
u'TC00__Robustness__Check_Requirement_Recovery_Time_of_Activate_GSC_on_Spare_After_Kill_Gtpcd_on_GSC',
u'Common__No_Testcase_Errors',
u'Robustness__Missing_Route_Check',
u'TC00__Check_Alarm_After_Kill_Gtpcd_on_PSC',
u'Node__Check_Config_Save_Time',
u'Robustness__Check_Requirement_Recovery_Time_of_PGWU_resilience_For_PPB_Failure',
u'TC00__Threshold_Value_SGW_Contexts',
u'Node__Check_Commit_Time',
u'Robustness__Check_Requirement_Payload_Interruption_Time_For_PPB_Failure',
u'Node__Check_No_Pending_SGW_Rf_CDRs',
u'TC00__Dallas_Payload_Check_During_Stability',
u'Robustness__Reached_BH_With_No_Failure',
u'Robustness__No_New_Active_Alarm_In_The_End',
u'TC00__Robustness__Check_Active_Notification_At_BH',
u'Common__Testcase_Valid',
u'Node__SGW_CPB_Process_Start_Error',
u'Node__PGW_PPB_Process_Start_Error',
u'Node__No_Configuration_Errors',
u'Robustness__Check_Requirement_Recovery_Time_of_Activate_GSC_on_Spare_After_Kill_Gtpcd_on_GSC',
u'Robustness__Check_Active_Notification_At_BH',
u'Robustness__No_Active_Alarm_In_The_End',
u'Robustness__Payload_Interruption_Time_For_PPB_Failure',
u'Robustness__Recovery_Time_of_Activate_GSC_on_Spare_After_Kill_Gtpcd_on_GSC',
u'Robustness__Recovery_Time_of_PGWU_resilience_For_PPB_Failure',
u'Robustness__PSC_Recovery_Time_of_Kill_Gtpcd_on_PSC',
u'Demo__Threshold_value_pgw_contexts',
u'Node__Check_Config_File_Size',
u'Node__Check_Config_Nr_MO_Classes',
u'Node__Check_Config_Nr_Attributes',
u'CP_Error',
u'Robustness__Board_Status_Check']
failure_reason_verbose_dict = {
"EPG-platform" : 0,
"EPG-application": 1,
"EPGCATS" : 2,
"Dallas" : 3,
"ITTE" : 4,
"AutoTT" : 5,
"Portal" : 6,
"Verdict" : 7,
"SSR-sim" : 8,
"EQDB" : 9,
"Outline" : 10
}
failure_reason_dict = {
"EPG-platform" : 0,
"EPG-application": 1,
"EPGCATS" : 1,
"Dallas" : 0,
"ITTE" : 0,
"AutoTT" : 0,
"Portal" : 0,
"Verdict" : 0,
"SSR-sim" : 0,
"EQDB" : 0,
"Outline" : 0
}
failure_reason = failure_reason_dict.keys()
In [3]:
import pandas as pd
import numpy as np
import re
df_CP = pd.DataFrame(columns = CP_names)
df_failure_reason = pd.DataFrame(columns = failure_reason)
cursor = db.hourly.find({"comment": {"$regex": "Failure reasons"}},{"CP_results": 1, "comment": 1, "job_id": 1, "_id":0})
#doc = cursor.next()
for doc in cursor:
job_id = doc["job_id"]
### CP
df_job_cp = pd.DataFrame(np.zeros((1,len(CP_names))), index=[job_id], columns = CP_names)
for CP in doc["CP_results"]:
if doc["CP_results"][CP] == "FAIL":
df_job_cp[CP] =+1
### Failure Reason
df_job_fr = pd.DataFrame(np.zeros((1,len(failure_reason))), index=[job_id], columns = failure_reason)
comment = doc["comment"]
for fr in failure_reason:
if re.search(fr, comment):
df_job_fr[fr] +=1
break
else:
print "no match! Job id: %s" %job_id
df_CP = df_CP.append(df_job_cp)
df_failure_reason = df_failure_reason.append(df_job_fr)
array_failure_reason = np.zeros(df_failure_reason.shape[0])
array_failure_reason_verbose = np.zeros(df_failure_reason.shape[0])
for i in range(df_failure_reason.shape[0]):
a = df_failure_reason.iloc[i]
for key in failure_reason_dict:
if a[a>0].index == key:
array_failure_reason[i] = failure_reason_dict[key]
break
for i in range(df_failure_reason.shape[0]):
a = df_failure_reason.iloc[i]
for key in failure_reason_verbose_dict:
if a[a>0].index == key:
array_failure_reason_verbose[i] = failure_reason_verbose_dict[key]
break
In [4]:
### Merge minor CPs
def cp_merge(cp_list, df, src_cp, dst_cp):
cp_list.remove(src_cp)
df[dst_cp] = df[dst_cp] + df[src_cp]
return cp_list, df.reindex(columns= cp_list)
CP_names, df_CP = cp_merge(CP_names, df_CP,"TC00__Check_Active_Notification_In_The_End", "Robustness__No_Active_Alarm_In_The_End")
CP_names, df_CP = cp_merge(CP_names, df_CP,"Robustness__No_New_Active_Alarm_In_The_End", "Robustness__No_Active_Alarm_In_The_End")
CP_names, df_CP = cp_merge(CP_names, df_CP,"TC00__Robustness__Check_Requirement_Recovery_Time_of_Activate_GSC_on_Spare_After_Kill_Gtpcd_on_GSC", "Robustness__Check_Requirement_Recovery_Time_of_Activate_GSC_on_Spare_After_Kill_Gtpcd_on_GSC")
CP_names, df_CP = cp_merge(CP_names, df_CP,"TC00__Robustness__Check_Active_Notification_At_BH", "Robustness__Check_Active_Notification_At_BH")
CP_names, df_CP = cp_merge(CP_names, df_CP,"TC00__Check_Active_Notification_At_BH", "Robustness__Check_Active_Notification_At_BH")
### Remove duplicate or all zeros CP data
dup_cp = ['Node__Check_Commit_Time',
'Node__Check_Config_File_Size',
'Node__Check_Config_Save_Time',
'CP_Error',
'Dallas__No_Critical_Errors',
'Node__Check_Config_Nr_MO_Classes',
'Dallas__No_DPFcrashes',
'Node__Check_Config_Nr_Attributes',
'Node__Check_Config_Activation_Time']
df_CP = df_CP.drop(dup_cp, 1)
In [5]:
fr_distribution = df_failure_reason.sum()
print("Failure Reason Distribution:")
print fr_distribution
print("\nTotal number of failed CI Hourly test jobs: %s" %df_CP.shape[0])
#plt.bar([i + 0.1 for i in range(len(a.index))], a.values, 1)
#plt.xticks([i + 0.5 for i in range(len(a.index))],a.index)
print("Bad Delivery(EPG-application + EPGCATS): %s"
%(fr_distribution["EPG-application"] + fr_distribution["EPGCATS"]))
print("Environment Problem(platform, Dallas,ITTE,AutoTT,etc): %s"
%(df_CP.shape[0] - fr_distribution["EPG-application"] - fr_distribution["EPGCATS"]))
In [6]:
%matplotlib inline
from matplotlib import pyplot as plt
# Environment Failure
list_job_id_0 = [ index[0] for index, x in np.ndenumerate(array_failure_reason) if x ==0 ]
# Bad Delivery
list_job_id_1 = [ index[0] for index, x in np.ndenumerate(array_failure_reason) if x ==1 ]
for m in range(7):
f, axarr = plt.subplots(4,2, sharex='col', sharey='row')
plt.subplots_adjust(top=2, right=2)
for n in range(8):
i = n/2
j = n%2
# Bad Delivery = Blue, Env = Red
axarr[i,j].plot(list_job_id_1, df_CP.iloc[list_job_id_1,n+m*8], 'bo', list_job_id_0, df_CP.iloc[list_job_id_0,n+m*8], 'ro')
axarr[i,j].set_title(df_CP.columns[n+m*8])
axarr[i,j].axis([0,910,-0.1, 1.5])
axarr[i,j].set_yticks([0,1])
In [7]:
from sklearn import svm
def training_perdict(df_CP, array_failure_reason, **kargs):
random_index = list(range(df_CP.shape[0]))
np.random.shuffle(random_index)
### Select 800 samples as training set
### and the rest 106 as test set
training_index = random_index[:800]
testing_index = random_index[800:]
training_CP = df_CP.iloc[training_index,]
training_fr = array_failure_reason[training_index]
test_CP = df_CP.iloc[testing_index,]
test_fr = array_failure_reason[testing_index]
## Training
clf = svm.LinearSVC(**kargs)
clf.fit(training_CP,training_fr)
## Test
predictions = [int(a) for a in clf.predict(test_CP)]
num_correct = sum(int(a == y) for a, y in zip(predictions, test_fr))
print("%s of %s (%.02f%%) values correct." % (num_correct, len(predictions),num_correct*100.0/len(predictions)))
return num_correct*100.0/len(predictions)
for C in [1]:
#print("C=%s" %C)
correct_ratio = []
for i in range(10):
correct_ratio.append(training_perdict(df_CP, array_failure_reason, C=C, class_weight={1:2}))
print("max:%.2f%%, mean:%.2f%%, min:%.2f%%" %(max(correct_ratio), sum(correct_ratio)/len(correct_ratio), min(correct_ratio)))
In [ ]:
### Training
from sklearn import svm
#clf = svm.SVC()
#sample_weight_new_100 = np.ones(training_CP.shape[0])
#sample_weight_new_100[:50] *=5
#clf.fit(training_CP,training_fr, sample_weight=sample_weight_new_100)
clf = svm.LinearSVC()
clf.fit(training_CP,training_fr)
In [ ]:
### Test
predictions = [int(a) for a in clf.predict(test_CP)]
num_correct = sum(int(a == y) for a, y in zip(predictions, test_fr))
print("%s of %s (%.02f%%) values correct." % (num_correct, len(predictions),num_correct*100/len(predictions)))
In [ ]:
# Display the difference
[(a,y) for a, y in zip(predictions, test_fr) if a !=y]
In [ ]:
### Create a random array
random_index = list(range(df_CP.shape[0]))
np.random.shuffle(random_index)
### Select 800 samples as training set
### and the rest 106 as test set
training_index = random_index[:800]
testing_index = random_index[800:]
training_CP = df_CP.iloc[training_index,]
#training_fr = df_failure_reason.iloc[training_index,]
training_fr = array_failure_reason[training_index]
test_CP = df_CP.iloc[testing_index,]
#test_fr = df_failure_reason.iloc[testing_index,]
test_fr = array_failure_reason[testing_index]
### choice first 100 as test set
test_CP = df_CP.iloc[:100]
test_fr = array_failure_reason[:100]
training_CP = df_CP[100:]
training_fr = array_failure_reason[100:]
In [ ]:
# Dump
import pickle
pickle.dump(df_CP, open("dump_df_CP", "wb"), True)
pickle.dump(df_failure_reason, open("dump_df_failure_reason", "wb"), True)
In [ ]:
# Load
import pickle
testjob_list = pickle.load(open("/lab/epg_team_builds/LSV/tmp/dump_testjob_hourly", 'rb'))
# Python 3
with open("/Users/Kimi/Downloads/dump_df_CP", 'rb') as f:
df_CP = pickle.load(f, encoding='latin1')
In [ ]:
for i in range(906):
a = df_failure_reason.iloc[i]
for key in failure_reason_dict:
if a[a>0].index == key:
array_failure_reason[i] = failure_reason_dict[key]
break
In [ ]:
def remove_dots(data):
for key in data.keys():
if type(data[key]) is dict: data[key] = remove_dots(data[key])
if '.' in key:
data[key.replace('.', '_')] = data[key]
del data[key]
return data