

# Author: Baozi #-*- codeing:utf-8 -*- import _pickle as pickle from sklearn import ensemble import random from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report,confusion_matrix import numpy as np########## ########### TRAINING_PICKLE = ‘motog-old-65-withnoise-statistical.p‘# 1a TRAINING_PICKLE = ‘trunc-dataset1a-noisefree-statistical.p‘# 1a # TESTING_PICKLE= ‘motog-new-65-withnoise-statistical.p‘# 2 TESTING_PICKLE = ‘trunc-dataset2-noisefree-statistical.p‘# 2print(‘Loading pickles...‘) trainingflowlist = pickle.load(open(TRAINING_PICKLE, ‘rb‘), encoding=‘iso-8859-1‘) testingflowlist = pickle.load(open(TESTING_PICKLE, ‘rb‘), encoding=‘iso-8859-1‘) print(‘Done...‘) print(‘‘)print(‘Training with ‘ + TRAINING_PICKLE + ‘: ‘ + str(len(trainingflowlist))) print(‘Testing with ‘ + TESTING_PICKLE + ‘: ‘ + str(len(testingflowlist))) print(‘‘)for THR in range(10):p = [] r = [] f = [] a = [] c = []for i in range(5): print(i) ########## PREPARE STUFF trainingexamples = [] classifier = ensemble.RandomForestClassifier() classifier2 = ensemble.RandomForestClassifier()########## GET FLOWS for package, time, flow in trainingflowlist: trainingexamples.append((flow, package)) # print(‘‘)########## SHUFFLE DATA to ensure classes are "evenly" distributed random.shuffle(trainingexamples)########## TRAINING PART 1 X1_train = [] y1_train = [] ##################################################### for flow, package in trainingexamples[:int(float(len(trainingexamples)) / 2)]: X1_train.append(flow) y1_train.append(package)# print(‘Fitting classifier...‘), y1_train) # print(‘Classifier fitted!‘) # print(‘‘########## TRAINING PART 2 (REINFORCEMENT) X2_train = [] y2_train = [] tmpx_train = [] tmpy_train = []count = 0 count1 = 0 count2 = 0############################################### for flow, package in trainingexamples[int(float(len(trainingexamples)) / 2):]: # flow = np.array(flow).reshape(1,-1) # tmp.append(flow) tmpx_train.append(flow) tmpy_train.append(package)predictions = classifier.predict(tmpx_train) #print(type(predictions))#< class ‘numpy.ndarray‘> #print(predictions[0]) for flow, package in trainingexamples[int(float(len(trainingexamples)) / 2):]: X2_train.append(flow) prediction = predictions[count]if (prediction == package): y2_train.append(package) count1 += 1 else: y2_train.append(‘ambiguous‘) count2 += 1 count += 1 print("Step Finished!!!!!!!!!!!") # print(count1) # print(count2)# print(‘Fitting 2nd classifier...‘), y2_train) # print(‘2nd classifier fitted!‘ # print(‘‘########## TESTINGthreshold = float(THR) / 10X_test = [] y_test = [] tmpx_test = [] tmpy_test = [] count= 0 totalflows = 0 consideredflows = 0for package, time, flow in testingflowlist: tmpx_test.append(flow) tmpy_test.append(package)predictionss = classifier2.predict(tmpx_test)#此时的分类器可以预测带有ambiguous标签的样本 prediction_proba = classifier2.predict_proba(tmpx_test)#此时的分类器可以预测带有ambiguous标签的样本 #print(type(prediction_proba))#< class ‘numpy.ndarray‘> print(prediction_proba[0])for package, time, flow in testingflowlist: prediction = predictionss[count] if (prediction != ‘ambiguous‘): prediction_probability = max(prediction_proba[0]) totalflows += 1if (prediction_probability > = threshold): consideredflows += 1X_test.append(flow) y_test.append(package) count += 1y_pred = classifier2.predict(X_test)p.append(precision_score(y_test, y_pred, average="macro") * 100) r.append(recall_score(y_test, y_pred, average="macro") * 100) f.append(f1_score(y_test, y_pred, average="macro") * 100) a.append(accuracy_score(y_test, y_pred) * 100) c.append(float(consideredflows) * 100 / totalflows)print(‘Threshold: ‘ + str(threshold)) print(np.mean(p)) print(np.mean(r)) print(np.mean(f)) print(np.mean(a)) print(np.mean(c)) print(‘‘)

