Commit bf498454 authored by Bo's avatar Bo
Browse files

adds the functions:

averageDuration -- computers the average duration for the specific part
in a course.
splitSequence -- split and rebuild the construction of data in order to
extract and analysis arguments more easily.
averageLengthForPart -- get the average numbers of parts seen in a
course for all users
parent bb1ce837
# -*- coding: UTF-8 -*-
Created on 2016��6��3��
@author: E15E888T
from dataManager.scriptDatabaseManager2 import ScriptDatabaseManager
from dataManager.databaseManagerData2 import DatabaseManager
from dataManager.scriptDatabaseStatistic import ScriptDatabaseStatistic
import math
import os
import pickle
from asyncore import read
from lib2to3.pgen2.tokenize import Special
from pattern.metrics import duration
databaseManager = DatabaseManager()
scriptDatabaseManager = ScriptDatabaseManager()
class DataProcessing:
def averageDuration(self, idPart, sequence):
get the average duration of the special part in a special course
sum, ave, n = 0, 0, 0
sequence = self.splitSequence(sequence, 'idPart+duration', False)
for i in range(0,len(sequence)):
for j in range(0,len(sequence[i])):
if sequence[i][j][0] == idPart:
sum = sum + sequence[i][j][1]
n = n + 1
ave = float(sum)/float(n)
return ave
def splitSequence(self, sequence, argument, needToSave):
get the different arguments of the users for a specific course
return: a list [idPart, idPart, ... ,idPart], [idSession, idSession, ... ,idSession]
[duration, duration, ... ,duration] or [((idPart, duration),(idPart, duration)...)...((idPart, duration),(idPart, duration)...)]
list = []
(idCourse, firstSequence) = list[0]
userid = []; partid = []; sessionid = []; duration = [];
secondSequence = [] ; thirdSequence = []; fourthSequence = []; finalSequence = []
for i in range(0,len(firstSequence)):
for j in range(0,len(firstSequence[i][1])):
a = tuple((firstSequence[i][1][j][0],firstSequence[i][1][j][2]))
if i != 0:
del secondSequence[0:len(firstSequence[i-1][1])]
b = tuple(secondSequence)
if argument == 'idUser+idPart+duration':
thirdSequence.insert(2*i, firstSequence[i][0])
if i != 0:
del thirdSequence[0:2]
c = tuple(thirdSequence)
if argument == 'idUser':
finalSequence = userid
elif argument == 'idPart':
finalSequence = partid
elif argument == 'idSession':
finalSequence = sessionid
elif argument == 'duration':
finalSequence = duration
elif argument == 'idPart+duration':
finalSequence = thirdSequence
elif argument == 'idUser+idPart+duration':
finalSequence = fourthSequence
print ("Wrong input type")
if needToSave:
fileName = "filterSequences"+str(idCourse)+".p"
pickle.dump(finalSequence, open(os.path.join('filtersequences', fileName), "wb"))
return finalSequence
def averageLengthForPart(self, idCourse):
get the average length of part in a course
scriptDatabaseManager = ScriptDatabaseManager()
a = scriptDatabaseManager.getSequencesSucceedPremium(idCourse)
idpart = self.splitSequence(a, 'idPart', False)
iduser = self.splitSequence(a, 'idUser', False)
ave = float(len(idpart))/float(len(iduser))
return ave
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment