Commit 88e8da36 authored by Yingyin LIU's avatar Yingyin LIU
Browse files

fixed:XXXBeforeExercise() & testUnitComputeFeature

parent 1eafb722
......@@ -250,7 +250,7 @@ class computeFeature:
'''
groups = group.data.groupby(Constant_model.COLUMN_ID_USER)
exercises = group.course.data[(group.course.data[Constant_model.COLUMN_TYPE_RESOURCE].isin(Constant_model.TYPE_EXERCISE))][Constant_model.COLUMN_ID_RESOURCE].tolist()
exercises = group.course.getExercisesId()
data = []
for user in groups:
......@@ -295,18 +295,20 @@ class computeFeature:
'''
groups = group.data.groupby(Constant_model.COLUMN_ID_USER)
# FIXME : course has already a method to retrieve the exercises...
exercises = group.course.data[(group.course.data[Constant_model.COLUMN_TYPE_RESOURCE].isin(Constant_model.TYPE_EXERCISE))][Constant_model.COLUMN_ID_RESOURCE].tolist()
exercises = group.course.getExercisesId()
data = []
for user in groups:
# after function groupby('$userID'), user[0] is user ID, user[1] is user dataframe
subdata = [user[0]]
date_list = []
for exer in exercises:
date = user[1][(user[1][Constant_model.COLUMN_ID_RESOURCE] == exer)][Constant_model.COLUMN_START].tolist()
# if this user finished exercise
if len(date):
# date[0] is exercise date
date_list.append(date[0])
# if this user didn't finished exercise, append '0'
else:
date_list.append(0)
for index in range(0, len(date_list)):
......@@ -341,9 +343,10 @@ class computeFeature:
'''
group.setSession()
if not Constant_model.COLUMN_SESSION in group.data.columns:
group.setSession()
groups = group.data.groupby(Constant_model.COLUMN_ID_USER)
exercises = group.course.data[(group.course.data[Constant_model.COLUMN_TYPE_RESOURCE].isin(Constant_model.TYPE_EXERCISE))][Constant_model.COLUMN_ID_RESOURCE].tolist()
exercises = group.course.getExercisesId()
data = []
for user in groups:
......@@ -408,8 +411,8 @@ class computeFeature:
@return: average intensity of the sessions
'''
group.setSession()
if not Constant_model.COLUMN_SESSION in group.data.columns:
group.setSession()
data = []
for sequence in group.getSequences():
data.append([sequence.idUser, sequence.events.groupby([Constant_model.COLUMN_SESSION]).sum()[Constant_model.COLUMN_INTENSITY].mean()])
......@@ -425,7 +428,8 @@ class computeFeature:
@return: a dataframe, one column for the average number of sessions and one row per user
'''
group.setSession()
if not Constant_model.COLUMN_SESSION in group.data.columns:
group.setSession()
data = []
for sequence in group.getSequences():
data.append([sequence.idUser, len(sequence.events)/len(sequence.events.groupby([Constant_model.COLUMN_SESSION]))])
......@@ -593,12 +597,13 @@ class computeFeature:
def getMaxRestDays(group):
# FEATURE_MAX_REST_DAY = 34
group.setSession()
if not Constant_model.COLUMN_SESSION in group.data.columns:
group.setSession()
data = []
for sequence in group.getSequences():
#remove registration and unregistration data
seq_events = sequence.events[(sequence.events[Constant_model.COLUMN_TYPE_RESOURCE] != Constant_model.TYPE_REGISTRATION) & (sequence.events[Constant_model.COLUMN_TYPE_RESOURCE] != Constant_model.TYPE_UNREGISTRATION)]
seq_events = sequence.events[(~sequence.events[Constant_model.COLUMN_TYPE_RESOURCE].isin(Constant_model.TYPE_REGISTRATION))]
list_date_max = seq_events.groupby(Constant_model.COLUMN_SESSION).max()[Constant_model.COLUMN_START].tolist()
list_date_min = seq_events.groupby(Constant_model.COLUMN_SESSION).min()[Constant_model.COLUMN_START].tolist()
max_rest_days = 0
......@@ -615,10 +620,11 @@ class computeFeature:
def getTotalRestDays(group):
# FEATURE_DURATION_REST = 36
group.setSession()
if not Constant_model.COLUMN_SESSION in group.data.columns:
group.setSession()
data = []
for sequence in group.getSequences():
seq_events = sequence.events[(sequence.events[Constant_model.COLUMN_TYPE_RESOURCE] != Constant_model.TYPE_REGISTRATION) & (sequence.events[Constant_model.COLUMN_TYPE_RESOURCE] != Constant_model.TYPE_UNREGISTRATION)]
seq_events = sequence.events[(~sequence.events[Constant_model.COLUMN_TYPE_RESOURCE].isin(Constant_model.TYPE_REGISTRATION))]
list_date_max = seq_events.groupby(Constant_model.COLUMN_SESSION).max()[Constant_model.COLUMN_START].tolist()
list_date_min = seq_events.groupby(Constant_model.COLUMN_SESSION).min()[Constant_model.COLUMN_START].tolist()
sum_rest_days = 0
......@@ -632,10 +638,11 @@ class computeFeature:
def getAverageRestDays(group):
# FEATURE_AVERAGE_REST_DAY = 37
group.setSession()
if not Constant_model.COLUMN_SESSION in group.data.columns:
group.setSession()
data = []
for sequence in group.getSequences():
seq_events = sequence.events[(sequence.events[Constant_model.COLUMN_TYPE_RESOURCE] != Constant_model.TYPE_REGISTRATION) & (sequence.events[Constant_model.COLUMN_TYPE_RESOURCE] != Constant_model.TYPE_UNREGISTRATION)]
seq_events = sequence.events[(~sequence.events[Constant_model.COLUMN_TYPE_RESOURCE].isin(Constant_model.TYPE_REGISTRATION))]
list_date_max = seq_events.groupby(Constant_model.COLUMN_SESSION).max()[Constant_model.COLUMN_START].tolist()
list_date_min = seq_events.groupby(Constant_model.COLUMN_SESSION).min()[Constant_model.COLUMN_START].tolist()
sum_rest_days = 0
......
......@@ -127,7 +127,7 @@ class Test(unittest.TestCase):
# 7 getAllMarks
res_fea_7 = new_computeFeature.getAllMarks(group)
self.assertTrue(isinstance(res_fea_7, pd.DataFrame))
self.assertTrue(res_fea_7[res_fea_7['idUser'] == idUser]['all marks'] == 82.4)
self.assertTrue(res_fea_7[res_fea_7['idUser'] == idUser]['all marks'].values[0] == 82.4)
# 8 duration before assessment
res_fea_8 = new_computeFeature.getDurationBeforeExercise(group)
......@@ -147,29 +147,29 @@ class Test(unittest.TestCase):
# 13 getAverageIntensitySession
res_fea_13 = new_computeFeature.getAverageIntensitySession(group)
self.assertTrue(isinstance(res_fea_13, pd.DataFrame))
self.assertTrue(round(res_fea_13[res_fea_13['idUser'] == idUser]['average duration in session'],4) == 26.8857)
self.assertTrue(round(res_fea_13[res_fea_13['idUser'] == idUser]['average duration in session'].values[0],4) == 26.8857)
# 14 getAverageNumberOfEventInSession
res_fea_14 = new_computeFeature.getAverageNumberOfEventInSession(group)
self.assertTrue(isinstance(res_fea_14, pd.DataFrame))
self.assertTrue(res_fea_14[res_fea_14['idUser'] == idUser]['nbr event in session'] == 5.8)
self.assertTrue(res_fea_14[res_fea_14['idUser'] == idUser]['nbr event in session'].values[0] == 5.8)
# getTimeSinceLastEvent
# 34 the max duration of the rest (day)
res_fea_34 = new_computeFeature.getMaxRestDays(group)
self.assertTrue(isinstance(res_fea_34, pd.DataFrame))
self.assertTrue(res_fea_34[res_fea_34['idUser'] == idUser]['max rest day'] == 24)
self.assertTrue(res_fea_34[res_fea_34['idUser'] == idUser]['max rest day'].values[0] == 24)
# 36 Duration of the rest
res_fea_36 = new_computeFeature.getTotalRestDays(group)
self.assertTrue(isinstance(res_fea_36, pd.DataFrame))
self.assertTrue(res_fea_36[res_fea_36['idUser'] == idUser]['duration rest'] == 249)
self.assertTrue(res_fea_36[res_fea_36['idUser'] == idUser]['duration rest'].values[0] == 249)
# 37 Average rest days
res_fea_37 = new_computeFeature.getAverageRestDays(group)
self.assertTrue(isinstance(res_fea_37, pd.DataFrame))
self.assertTrue(round(res_fea_37[res_fea_37['idUser'] == idUser]['average rest day'], 2) == 7.32)
self.assertTrue(round(res_fea_37[res_fea_37['idUser'] == idUser]['average rest day'].values[0], 2) == 7.32)
if __name__ == "__main__":
#import sys;sys.argv = ['', 'Test.testLoadFeature']
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment