Commit b2e42931 authored by Antoine PIGEAU's avatar Antoine PIGEAU
Browse files

new return for result - not finished

parent cc7733e9
......@@ -24,6 +24,8 @@ import os
import hashlib
from collections import namedtuple
import numpy as np
import operator
......@@ -40,10 +42,14 @@ from exportManager import exportLatex
from featureManager.featureMultiGroup import FeatureMultiGroup
from classifierManager.constant import Constant
class ScriptClassifier:
'''
'''
def __init__(self,
classifier,
fileName):
......@@ -178,8 +184,8 @@ class ScriptClassifier:
'''
ResultSingleIteration = namedtuple('ResultSingleIteration', ['accuracy', 'auc', 'confusionMatrix', 'dictWeight'])
featureMultiGroup = self.classifier.getFeatureMultiGroup()
(train_x, train_y,
......@@ -202,7 +208,7 @@ class ScriptClassifier:
accuracy = accuracy_score(test_y, pred_y)
resultWeight = self.getBestFeatures(model, self.classifier.features, featureMultiGroup)
dictWeight = self.getBestFeatures(model, self.classifier.features, featureMultiGroup)
print("--------------------------------------------------")
print("input dimension:"+str(train_x.shape))
......@@ -214,7 +220,8 @@ class ScriptClassifier:
assert confusionMatrix.shape == (2,2)
return (accuracy, confusionMatrix, resultWeight, auc)
return ResultSingleIteration(accuracy, auc, confusionMatrix, dictWeight) #(accuracy, auc, confusionMatrix, resultWeight)
def predictionTaskNTimes(self,
course,
......@@ -235,13 +242,16 @@ class ScriptClassifier:
print("---------------------------------- Call predictionTaskNTimes of ScriptClassifier")
accuracys = []
ResultCourse = namedtuple('ResultCourse', ['accuracies', 'aucs', 'avgConfusionMatrix', 'dictWeight'])
resultCourse = None
accuracies = []
aucScores = []
size = len(self.classifier.nameGroups)
confusionMatrice = np.zeros((size, size))
avgConfusionMatrix = np.zeros((size, size))
dictResultWeight = {}
dictWeight = {}
name = str(self.classifier.features).encode('utf-8')
hashFeature = "CourseMergedHash["+hashlib.sha1(name).hexdigest()+"]"
......@@ -264,48 +274,53 @@ class ScriptClassifier:
if os.path.exists(fileNameSavedResult) and cache:
with open(fileNameSavedResult, "rb") as fileResult:
(accuracys, aucScores, confusionMatrice, dictResultWeight) = pickle.load(fileResult)
#(accuracies, aucScores, avgConfusionMatrix, dictResultWeight) = pickle.load(fileResult)
resultCourse = ResultCourse(*pickle.load(fileResult))
print(str(fileNameSavedResult)+" is already done")
return (accuracys, aucScores, confusionMatrice, dictResultWeight)
#return (accuracies, aucScores, avgConfusionMatrix, dictResultWeight)
return resultCourse
#self.classifier.testIdCourse = testIdCourse
for _ in range(0, ntime):
(accuracy, confusionMatrix, resultWeight, auc) = self.predictionTask(course,
whereToCut=whereToCut)
# (accuracy, auc, confusionMatrix, resultWeight)
result = self.predictionTask(course,
whereToCut=whereToCut)
accuracys.append(accuracy)
aucScores.append(auc)
accuracies.append(result.accuracy)
aucScores.append(result.auc)
confusionMatrice = confusionMatrice + confusionMatrix
avgConfusionMatrix = avgConfusionMatrix + result.confusionMatrix
for (feature, weight) in resultWeight:
if feature not in dictResultWeight:
dictResultWeight[feature] = [weight]
for (feature, weight) in result.dictWeight:
if feature not in dictWeight:
dictWeight[feature] = [weight]
else:
dictResultWeight[feature].append(weight)
dictWeight[feature].append(weight)
for feature in dictResultWeight:
dictResultWeight[feature] = np.average(dictResultWeight[feature])
for feature in dictWeight:
dictWeight[feature] = np.average(dictWeight[feature])
confusionMatrice = confusionMatrice / float(ntime)
s = np.sum(confusionMatrice, axis=0)
confusionMatrice = confusionMatrice / s
avgConfusionMatrix = avgConfusionMatrix / float(ntime)
s = np.sum(avgConfusionMatrix, axis=0)
avgConfusionMatrix = avgConfusionMatrix / s
if cache == False:
with open(fileNameSavedResult, "wb") as fileResult:
pickle.dump((accuracys, aucScores, confusionMatrice, dictResultWeight), fileResult)
return (accuracys, aucScores, confusionMatrice, dictResultWeight)
resultCourse = ResultCourse(accuracies, aucScores, avgConfusionMatrix, dictWeight)
with open(fileNameSavedResult, "wb") as fileResult:
#pickle.dump((accuracies, aucScores, confusionMatrice, dictResultWeight), fileResult)
pickle.dump(tuple(resultCourse), fileResult)
return resultCourse #(accuracies, aucScores, confusionMatrice, dictResultWeight)
def setFeatures(self, course=None, whereToCut=None):
pass
def predictionTaskForAllCourses(self, whereToCut, ntime = 10):
def predictionTaskForAllCourses(self, whereToCut, ntime = 10, cache=True):
'''
Do the prediction task for all the courses (call the previous function predictionNTimes on all the courses).
......@@ -313,12 +328,15 @@ class ScriptClassifier:
@param whereToCut: the whereToCut use to cut the sequence when loaded. Unity is the number of days
@param features: list of feature to use for the input data. Each element is feature defined in the FeatureSequence class
@param algorithms: list of SVM algorithm to test. Possible values are in the class Svm
@param cache : use the cache obtained previously for all the courses
@return: ([accuracyCourse1, accuracyCourse2, ...],
[ [accuracyClass1Course1, class2....], [Course2, ...],... ],
[rocCourse1, rocCourse2, ...] ), a triplet list of accuracies, list of accuracy per class, list of
roc score
'''
ResultAllCourses = namedtuple('ResultAllCourses', ['avgAccuracy', 'stdAccurracy', 'avgAuc', 'stdAuc', 'accuracyPerClass', 'avgConfusionMatrix', 'dictAllCourses'])
fileNameResult = os.path.join(self.directoryExperiment,
self.fileName+
......@@ -337,24 +355,25 @@ class ScriptClassifier:
self.setFeatures(course, whereToCut)
(accuracysCourse, aucScoresCourse, confusionMatrix, dictWeight) = self.predictionTaskNTimes(
course,
whereToCut=whereToCut,
ntime=ntime,
cache=True)
#(accuracysCourse, aucScoresCourse, confusionMatrix, dictWeight)
# (accuracies, aucScores, avgConfusionMatrix, dictResultWeight)
resultCourse = self.predictionTaskNTimes(course,
whereToCut=whereToCut,
ntime=ntime,
cache=cache)
dictResults[course.getCourseId()] = (accuracysCourse, aucScoresCourse, confusionMatrix, dictWeight)
dictResults[course.getCourseId()] = resultCourse
accuracy = np.nanmean(accuracysCourse)
aucScore = np.nanmean(aucScoresCourse)
accuracy = np.nanmean(resultCourse.accuracies)
aucScore = np.nanmean(resultCourse.aucs)
accuracies.append(accuracy)
aucScores.append(aucScore)
accuracysPerClass.append(confusionMatrix.diagonal())
accuracysPerClass.append(resultCourse.avgConfusionMatrix.diagonal())
if dictWeight:
dictWeightAllCourses[course.getCourseId()] = dictWeight
if resultCourse.dictWeight:
dictWeightAllCourses[course.getCourseId()] = resultCourse.dictWeight
courseName = course.getName()
fileResult.write("--------------------------\n")
......@@ -364,11 +383,11 @@ class ScriptClassifier:
fileResult.write("RocScore: "+str(aucScore)+" \n")
fileResult.write("confusion matrix\n")
fileResult.write(str(confusionMatrix))
fileResult.write(str(resultCourse.avgConfusionMatrix))
if dictWeight:
if resultCourse.dictWeight:
fileResult.write("\nbest feature:\n")
sortedDictWeigth = sorted(dictWeight.items(), key=operator.itemgetter(1), reverse=True)
sortedDictWeigth = sorted(resultCourse.dictWeight.items(), key=operator.itemgetter(1), reverse=True)
fileResult.write(str(sortedDictWeigth))
fileResult.write("\n--------------------------\n")
......@@ -403,8 +422,9 @@ class ScriptClassifier:
fileResult.write(" best feature for all courses:\n")
resultWeigth = sorted(resultWeight.items(), key=operator.itemgetter(1), reverse=True)
fileResult.write(str(resultWeigth))
# 'avgAccuracy', 'stdAccurracy', 'avgAuc', 'stdAuc', 'accuracyPerClass', 'avgConfusionMatrix', 'dictAllCourses'
resultAllCourses = ResultAllCourses(scoreFinal, stdAccuracy, aucScoreFinal, stdAuc, )
return (accuracies, accuracysPerClass, aucScores, stdAccuracy, stdAuc, dictResults)
def predictionTaskForAllPeriods(self, ntime = 10):
......
......@@ -80,17 +80,19 @@ if __name__ == "__main__":
''' classification N times '''
# classifier.features = ConstantFeature.BEST_LR_FEATURE_ALL_COURSES[course.getCourseId()][25]
#
# (accuracys, aucScores, confusionMatrix, dictResultWeight) = classifier.predictionTaskNTimes(course,
# whereToCut=25,
# ntime=10,
# cache = False)
# print("accuracy: ", np.nanmean(accuracys))
# print("std accuracy: ", np.std(accuracys))
# print("confusion Matrix:\n", confusionMatrix)
# print("AUC score: ", np.nanmean(aucScores))
# print("std AUC:", np.std(aucScores))
classifier.features = ConstantFeature.BEST_LR_FEATURE_ALL_COURSES[course.getCourseId()][25]
result = classifier.predictionTaskNTimes(course,
whereToCut=25,
ntime=10,
cache = False)
print("accuracy: ", np.nanmean(result.accuracies))
print("std accuracy: ", np.std(result.accuracies))
print("confusion Matrix:\n", result.avgConfusionMatrix)
print("AUC score: ", np.nanmean(result.aucs))
print("std AUC:", np.std(result.aucs))
print("dictWeight:", result.dictWeight)
''' for all courses '''
......@@ -103,7 +105,7 @@ if __name__ == "__main__":
''' for all periods '''
classifier.predictionTaskForAllPeriods(ntime=10)
# classifier.predictionTaskForAllPeriods(ntime=10)
''' NN : all courses with hidden layers'''
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment