Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Antoine PIGEAU
2015-Hubble-UserProfiles
Commits
88e8da36
Commit
88e8da36
authored
Jan 25, 2019
by
Yingyin LIU
Browse files
fixed:XXXBeforeExercise() & testUnitComputeFeature
parent
1eafb722
Changes
2
Hide whitespace changes
Inline
Side-by-side
source/featureManager/oulad/computeFeature.py
View file @
88e8da36
...
...
@@ -250,7 +250,7 @@ class computeFeature:
'''
groups
=
group
.
data
.
groupby
(
Constant_model
.
COLUMN_ID_USER
)
exercises
=
group
.
course
.
data
[(
group
.
course
.
data
[
Constant_model
.
COLUMN_TYPE_RESOURCE
].
isin
(
Constant_model
.
TYPE_EXERCISE
))][
Constant_model
.
COLUMN_ID_RESOURCE
].
tolist
()
exercises
=
group
.
course
.
getExercisesId
()
data
=
[]
for
user
in
groups
:
...
...
@@ -295,18 +295,20 @@ class computeFeature:
'''
groups
=
group
.
data
.
groupby
(
Constant_model
.
COLUMN_ID_USER
)
# FIXME : course has already a method to retrieve the exercises...
exercises
=
group
.
course
.
data
[(
group
.
course
.
data
[
Constant_model
.
COLUMN_TYPE_RESOURCE
].
isin
(
Constant_model
.
TYPE_EXERCISE
))][
Constant_model
.
COLUMN_ID_RESOURCE
].
tolist
()
exercises
=
group
.
course
.
getExercisesId
()
data
=
[]
for
user
in
groups
:
# after function groupby('$userID'), user[0] is user ID, user[1] is user dataframe
subdata
=
[
user
[
0
]]
date_list
=
[]
for
exer
in
exercises
:
date
=
user
[
1
][(
user
[
1
][
Constant_model
.
COLUMN_ID_RESOURCE
]
==
exer
)][
Constant_model
.
COLUMN_START
].
tolist
()
# if this user finished exercise
if
len
(
date
):
# date[0] is exercise date
date_list
.
append
(
date
[
0
])
# if this user didn't finished exercise, append '0'
else
:
date_list
.
append
(
0
)
for
index
in
range
(
0
,
len
(
date_list
)):
...
...
@@ -341,9 +343,10 @@ class computeFeature:
'''
group
.
setSession
()
if
not
Constant_model
.
COLUMN_SESSION
in
group
.
data
.
columns
:
group
.
setSession
()
groups
=
group
.
data
.
groupby
(
Constant_model
.
COLUMN_ID_USER
)
exercises
=
group
.
course
.
data
[(
group
.
course
.
data
[
Constant_model
.
COLUMN_TYPE_RESOURCE
].
isin
(
Constant_model
.
TYPE_EXERCISE
))][
Constant_model
.
COLUMN_ID_RESOURCE
].
tolist
()
exercises
=
group
.
course
.
getExercisesId
()
data
=
[]
for
user
in
groups
:
...
...
@@ -408,8 +411,8 @@ class computeFeature:
@return: average intensity of the sessions
'''
group
.
setSession
()
if
not
Constant_model
.
COLUMN_SESSION
in
group
.
data
.
columns
:
group
.
setSession
()
data
=
[]
for
sequence
in
group
.
getSequences
():
data
.
append
([
sequence
.
idUser
,
sequence
.
events
.
groupby
([
Constant_model
.
COLUMN_SESSION
]).
sum
()[
Constant_model
.
COLUMN_INTENSITY
].
mean
()])
...
...
@@ -425,7 +428,8 @@ class computeFeature:
@return: a dataframe, one column for the average number of sessions and one row per user
'''
group
.
setSession
()
if
not
Constant_model
.
COLUMN_SESSION
in
group
.
data
.
columns
:
group
.
setSession
()
data
=
[]
for
sequence
in
group
.
getSequences
():
data
.
append
([
sequence
.
idUser
,
len
(
sequence
.
events
)
/
len
(
sequence
.
events
.
groupby
([
Constant_model
.
COLUMN_SESSION
]))])
...
...
@@ -593,12 +597,13 @@ class computeFeature:
def
getMaxRestDays
(
group
):
# FEATURE_MAX_REST_DAY = 34
group
.
setSession
()
if
not
Constant_model
.
COLUMN_SESSION
in
group
.
data
.
columns
:
group
.
setSession
()
data
=
[]
for
sequence
in
group
.
getSequences
():
#remove registration and unregistration data
seq_events
=
sequence
.
events
[(
sequence
.
events
[
Constant_model
.
COLUMN_TYPE_RESOURCE
]
!=
Constant_model
.
TYPE_REGISTRATION
)
&
(
sequence
.
events
[
Constant_model
.
COLUMN_TYPE_RESOURCE
]
!=
Constant_model
.
TYPE_UN
REGISTRATION
)]
seq_events
=
sequence
.
events
[(
~
sequence
.
events
[
Constant_model
.
COLUMN_TYPE_RESOURCE
]
.
isin
(
Constant_model
.
TYPE_
REGISTRATION
)
)
]
list_date_max
=
seq_events
.
groupby
(
Constant_model
.
COLUMN_SESSION
).
max
()[
Constant_model
.
COLUMN_START
].
tolist
()
list_date_min
=
seq_events
.
groupby
(
Constant_model
.
COLUMN_SESSION
).
min
()[
Constant_model
.
COLUMN_START
].
tolist
()
max_rest_days
=
0
...
...
@@ -615,10 +620,11 @@ class computeFeature:
def
getTotalRestDays
(
group
):
# FEATURE_DURATION_REST = 36
group
.
setSession
()
if
not
Constant_model
.
COLUMN_SESSION
in
group
.
data
.
columns
:
group
.
setSession
()
data
=
[]
for
sequence
in
group
.
getSequences
():
seq_events
=
sequence
.
events
[(
sequence
.
events
[
Constant_model
.
COLUMN_TYPE_RESOURCE
]
!=
Constant_model
.
TYPE_REGISTRATION
)
&
(
sequence
.
events
[
Constant_model
.
COLUMN_TYPE_RESOURCE
]
!=
Constant_model
.
TYPE_UN
REGISTRATION
)]
seq_events
=
sequence
.
events
[(
~
sequence
.
events
[
Constant_model
.
COLUMN_TYPE_RESOURCE
]
.
isin
(
Constant_model
.
TYPE_
REGISTRATION
)
)
]
list_date_max
=
seq_events
.
groupby
(
Constant_model
.
COLUMN_SESSION
).
max
()[
Constant_model
.
COLUMN_START
].
tolist
()
list_date_min
=
seq_events
.
groupby
(
Constant_model
.
COLUMN_SESSION
).
min
()[
Constant_model
.
COLUMN_START
].
tolist
()
sum_rest_days
=
0
...
...
@@ -632,10 +638,11 @@ class computeFeature:
def
getAverageRestDays
(
group
):
# FEATURE_AVERAGE_REST_DAY = 37
group
.
setSession
()
if
not
Constant_model
.
COLUMN_SESSION
in
group
.
data
.
columns
:
group
.
setSession
()
data
=
[]
for
sequence
in
group
.
getSequences
():
seq_events
=
sequence
.
events
[(
sequence
.
events
[
Constant_model
.
COLUMN_TYPE_RESOURCE
]
!=
Constant_model
.
TYPE_REGISTRATION
)
&
(
sequence
.
events
[
Constant_model
.
COLUMN_TYPE_RESOURCE
]
!=
Constant_model
.
TYPE_UN
REGISTRATION
)]
seq_events
=
sequence
.
events
[(
~
sequence
.
events
[
Constant_model
.
COLUMN_TYPE_RESOURCE
]
.
isin
(
Constant_model
.
TYPE_
REGISTRATION
)
)
]
list_date_max
=
seq_events
.
groupby
(
Constant_model
.
COLUMN_SESSION
).
max
()[
Constant_model
.
COLUMN_START
].
tolist
()
list_date_min
=
seq_events
.
groupby
(
Constant_model
.
COLUMN_SESSION
).
min
()[
Constant_model
.
COLUMN_START
].
tolist
()
sum_rest_days
=
0
...
...
source/featureManager/oulad/testUnitComputeFeature.py
View file @
88e8da36
...
...
@@ -127,7 +127,7 @@ class Test(unittest.TestCase):
# 7 getAllMarks
res_fea_7
=
new_computeFeature
.
getAllMarks
(
group
)
self
.
assertTrue
(
isinstance
(
res_fea_7
,
pd
.
DataFrame
))
self
.
assertTrue
(
res_fea_7
[
res_fea_7
[
'idUser'
]
==
idUser
][
'all marks'
]
==
82.4
)
self
.
assertTrue
(
res_fea_7
[
res_fea_7
[
'idUser'
]
==
idUser
][
'all marks'
]
.
values
[
0
]
==
82.4
)
# 8 duration before assessment
res_fea_8
=
new_computeFeature
.
getDurationBeforeExercise
(
group
)
...
...
@@ -147,29 +147,29 @@ class Test(unittest.TestCase):
# 13 getAverageIntensitySession
res_fea_13
=
new_computeFeature
.
getAverageIntensitySession
(
group
)
self
.
assertTrue
(
isinstance
(
res_fea_13
,
pd
.
DataFrame
))
self
.
assertTrue
(
round
(
res_fea_13
[
res_fea_13
[
'idUser'
]
==
idUser
][
'average duration in session'
],
4
)
==
26.8857
)
self
.
assertTrue
(
round
(
res_fea_13
[
res_fea_13
[
'idUser'
]
==
idUser
][
'average duration in session'
]
.
values
[
0
]
,
4
)
==
26.8857
)
# 14 getAverageNumberOfEventInSession
res_fea_14
=
new_computeFeature
.
getAverageNumberOfEventInSession
(
group
)
self
.
assertTrue
(
isinstance
(
res_fea_14
,
pd
.
DataFrame
))
self
.
assertTrue
(
res_fea_14
[
res_fea_14
[
'idUser'
]
==
idUser
][
'nbr event in session'
]
==
5.8
)
self
.
assertTrue
(
res_fea_14
[
res_fea_14
[
'idUser'
]
==
idUser
][
'nbr event in session'
]
.
values
[
0
]
==
5.8
)
# getTimeSinceLastEvent
# 34 the max duration of the rest (day)
res_fea_34
=
new_computeFeature
.
getMaxRestDays
(
group
)
self
.
assertTrue
(
isinstance
(
res_fea_34
,
pd
.
DataFrame
))
self
.
assertTrue
(
res_fea_34
[
res_fea_34
[
'idUser'
]
==
idUser
][
'max rest day'
]
==
24
)
self
.
assertTrue
(
res_fea_34
[
res_fea_34
[
'idUser'
]
==
idUser
][
'max rest day'
]
.
values
[
0
]
==
24
)
# 36 Duration of the rest
res_fea_36
=
new_computeFeature
.
getTotalRestDays
(
group
)
self
.
assertTrue
(
isinstance
(
res_fea_36
,
pd
.
DataFrame
))
self
.
assertTrue
(
res_fea_36
[
res_fea_36
[
'idUser'
]
==
idUser
][
'duration rest'
]
==
249
)
self
.
assertTrue
(
res_fea_36
[
res_fea_36
[
'idUser'
]
==
idUser
][
'duration rest'
]
.
values
[
0
]
==
249
)
# 37 Average rest days
res_fea_37
=
new_computeFeature
.
getAverageRestDays
(
group
)
self
.
assertTrue
(
isinstance
(
res_fea_37
,
pd
.
DataFrame
))
self
.
assertTrue
(
round
(
res_fea_37
[
res_fea_37
[
'idUser'
]
==
idUser
][
'average rest day'
],
2
)
==
7.32
)
self
.
assertTrue
(
round
(
res_fea_37
[
res_fea_37
[
'idUser'
]
==
idUser
][
'average rest day'
]
.
values
[
0
]
,
2
)
==
7.32
)
if
__name__
==
"__main__"
:
#import sys;sys.argv = ['', 'Test.testLoadFeature']
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment