Last active
November 15, 2017 10:20
-
-
Save SamuelMarks/b284c5fc4c6699cdfd603f6b9065c513 to your computer and use it in GitHub Desktop.
OpenEdX datasets attempts to: SELECT grade, student_id, minutes_taken FROM questions_answered GROUP BY exam
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Now ignore everyone else; mongodump to CSV; `LOAD DATA FROM CSV` in MySQL | |
db.modulestore.structures.aggregate([ | |
{ $unwind: '$blocks' }, | |
{ $replaceRoot: { newRoot: "$blocks" } }, | |
{ $out: 'modulestore.blocks' } | |
]); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Resolve all children so each block document contains the entire hierarchy of blocks | |
// (not just one level deep of descendent IDs) | |
db.modulestore.structures.aggregate([ | |
{ | |
$unwind: '$blocks' // flatten "blocks" array | |
}, | |
{ | |
$replaceRoot: { // move "blocks" field to top level | |
newRoot: "$blocks" | |
} | |
}, | |
{ | |
$unwind: { // flatten "fields.children" array | |
path: "$fields.children", | |
preserveNullAndEmptyArrays: true | |
} | |
}, | |
{ | |
// this step is technically not needed but it might speed up things - try running with and without that | |
$addFields: { // we only keep the second (last, really) entry of all your arrays since this is the only valid join key for the graphLookup | |
"fields.children": { | |
$slice: [ "$fields.children", -1 ] | |
} | |
} | |
}, | |
{ | |
$unwind: { // flatten "fields.children" array one more time because it was nested before | |
path: "$fields.children", | |
preserveNullAndEmptyArrays: true | |
} | |
}, { | |
$group: { // reduce the number of lookups required later by eliminating duplicate parent-child paths | |
"_id": "$block_id", | |
"block_type": { $first: "$block_type" }, | |
"definition": { $first: "$definition" }, | |
"fieldsFormat": { $first: "$fields.format" }, | |
"fieldsChildren": { $addToSet: "$fields.children" } | |
} | |
}, { | |
$project: { // restore original structure | |
"block_id": "$_id", | |
"block_type": "$block_type", | |
"definition": "$definition", | |
"fields": { | |
"format": "$fieldsFormat", | |
"children": "$fieldsChildren" | |
} | |
} | |
}, { // spit out the result into "modulestore.mapped0" collection, overwriting all existing content | |
$out: 'modulestore.mapped0' | |
}]); | |
db.modulestore.mapped0.aggregate([ | |
{ | |
$graphLookup: { | |
from: 'modulestore.mapped0', | |
startWith: '$block_id', | |
connectToField: 'fields.children', | |
connectFromField: 'block_id', | |
as: 'block_ids', | |
maxDepth: 0 | |
} | |
}, | |
{ | |
$lookup: { | |
from: 'modulestore.mapped0', | |
localField: 'block_ids.fields.children', | |
foreignField: '_id', | |
as: 'block_ids.fields.children' | |
} | |
} | |
]); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
mongoexport --fields blocks --collection 'modulestore.structures' -d edxapp --assertExists --query '{"blocks.fields.format": {$in: ["Midterm Exam", "Final Exam"]}}' | node -e 'require("readline").createInterface({input: process.stdin,output: process.stdout,terminal: false}).on("line", l => console.info(JSON.parse(l).blocks.filter(e => ["Midterm Exam", "Final Exam"].indexOf(e.fields.format) > -1).map(e => `${e.block_id},${e.fields.format},${e.fields.display_name}`).join("\n")))' | { echo 'block_id,exam,display_name'; sort -u; } > /tmp/blockid_exam_displayname.csv |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Deps: mysql_data_load0.sql | |
WITH | |
Q0 AS ( | |
SELECT T0.id, T0.module_type, T0.course_id, T0.module_id, SUBSTRING_INDEX(T0.module_id, '@', -1) block_id, TIMESTAMPDIFF(MINUTE, T0.created, T0.modified) minutes_taken | |
FROM edxapp.courseware_studentmodule T0 | |
WHERE T0.course_id = '<my_course_id>' /* AND T0.module_type = 'problem' */ | |
), | |
Q1 AS ( | |
SELECT T1.block_id, T1.exam, T1.children | |
FROM blockid_exam_displayname T1 | |
) | |
SELECT Q0.*, Q1.block_id q1_block_id, Q1.exam, Q1.children | |
FROM Q0 | |
LEFT JOIN Q1 ON ( | |
LOCATE(Q0.block_id, Q1.children) | |
); | |
-- Also tried: | |
WITH big AS | |
(SELECT | |
T0.module_type, | |
T0.course_id, | |
T0.module_id, | |
SUBSTRING_INDEX(T0.module_id, '@', -1) block_id, | |
T2.display_name, | |
T2.parent_display_name, | |
T2.exam, | |
T2.children, | |
TIMESTAMPDIFF(MINUTE, T0.created, T0.modified) minutes_taken | |
FROM edxapp.courseware_studentmodule T0 | |
JOIN blockid_exam_displayname T2 | |
ON SUBSTRING_INDEX(T0.module_id, '@', -1) = T2.block_id | |
) SELECT * FROM big | |
WHERE course_id = '<course_id>' AND module_type = 'course'; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Deps: mysql_data_load1.sql | |
-- Attempts to associate exam with all rows | |
WITH RECURSIVE exam_paths (block_id, display_name, `type`, parent, exam) AS ( | |
SELECT block_id, display_name, `type`, parent, exam | |
FROM blockid_exam_displayname | |
WHERE parent IS NULL | |
UNION ALL | |
SELECT e.block_id, e.display_name, e.`type`, e.parent, ep.exam | |
FROM exam_paths AS ep | |
JOIN blockid_exam_displayname AS e | |
ON ep.block_id = e.parent | |
) SELECT distinct(block_id), display_name, `type`, parent, exam | |
FROM exam_paths | |
ORDER BY exam; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Deps: mysql_data_load1.sql | |
WITH RECURSIVE Q0 AS ( | |
SELECT T0.block_id, T0.display_name, | |
T0.child, T0.parent, | |
IFNULL(T0.exam, '') AS exam | |
FROM blockid_exam_displayname T0 | |
-- WHERE exam IS NULL | |
UNION ALL | |
SELECT T1.block_id, T1.display_name, | |
T1.child, T1.parent, | |
-- T1.exam | |
CONCAT( | |
IFNULL(Q0.exam, ''), | |
':', | |
IFNULL(T1.exam, '') | |
) AS exam0 | |
-- FROM blockid_exam_displayname T1 | |
FROM Q0 | |
JOIN blockid_exam_displayname T1 | |
ON T1.block_id=Q0.parent | |
) SELECT distinct(Q0.block_id), Q0.child, Q0.parent, Q0.exam | |
FROM Q0 | |
WHERE Q0.exam IS NOT NULL | |
ORDER BY WEIGHT_STRING(Q0.exam) DESC; | |
-- Results in all having 'Midterm Exam'; none have 'Final Exam' :( |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Deps: mysql_data_load2.sql | |
WITH | |
Q0 AS | |
(SELECT *, SUBSTRING_INDEX(T0.module_id, '@', -1) block_id | |
FROM edxapp.courseware_studentmodule T0) | |
SELECT Q0.* | |
FROM Q0 | |
INNER JOIN blockid_exam_displayname_child T1 | |
ON Q0.block_id = T1.block_id AND Q0.module_type = T1.block_type; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Deps: mysql_data_load2.sql | |
WITH RECURSIVE Q0 AS ( | |
SELECT T0.block_id, T0.block_type, T0.display_name, | |
T0.child_id, T0.child_type, T0.parent, | |
IFNULL(T0.exam, '') AS exam | |
FROM blockid_exam_displayname_child T0 | |
-- WHERE exam IS NULL | |
UNION ALL | |
SELECT T1.block_id, T1.block_type, T1.display_name, | |
T1.child_id, T1.child_type, T1.parent, | |
-- T1.exam | |
CONCAT( | |
IFNULL(Q0.exam, ''), | |
':', | |
IFNULL(T1.exam, '') | |
) AS exam0 | |
-- FROM blockid_exam_displayname T1 | |
FROM Q0 | |
JOIN blockid_exam_displayname_child T1 | |
ON T1.block_id=Q0.parent | |
) SELECT distinct(Q0.block_id), Q0.block_type, Q0.child_id, Q0.child_type, Q0.parent, Q0.exam | |
FROM Q0 | |
WHERE Q0.exam IS NOT NULL AND Q0.exam LIKE '%Exam%' | |
ORDER BY WEIGHT_STRING(Q0.exam) DESC; | |
-- ^This one almost works; only get 239 rows back though; none with the block_type of 'problem'. | |
-- Only block_types were: course; chapter; sequential |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Deps: mongo_export0.bash | |
CREATE TEMPORARY TABLE blockid_exam_displayname ( | |
block_id VARCHAR(33) PRIMARY KEY, | |
exam VARCHAR(33), | |
display_name VARCHAR(33), | |
parent_display_name VARCHAR(33), | |
children TEXT | |
); | |
LOAD DATA LOCAL INFILE '/tmp/blockid_exam_displayname_children.csv' | |
INTO TABLE blockid_exam_displayname | |
FIELDS TERMINATED BY ',' | |
ENCLOSED BY '"' | |
IGNORE 1 LINES; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Deps: mysql_load_data1.bash | |
CREATE TEMPORARY TABLE blockid_exam_displayname ( | |
block_id VARCHAR(33) NOT NULL, | |
exam VARCHAR(33), | |
display_name VARCHAR(33), | |
child VARCHAR(33), | |
parent VARCHAR(33) NULL, | |
INDEX(parent), | |
FOREIGN KEY (parent) REFERENCES blockid_exam_displayname (block_id), | |
PRIMARY KEY (block_id, child) | |
); | |
LOAD DATA LOCAL INFILE '/tmp/blockid_exam_displayname_child.csv' | |
INTO TABLE blockid_exam_displayname | |
FIELDS TERMINATED BY ',' | |
ENCLOSED BY '"'; | |
-- Set `parent` field | |
UPDATE blockid_exam_displayname T0 | |
INNER JOIN blockid_exam_displayname T1 | |
ON T1.child = T0.block_id | |
SET T0.parent = T1.block_id | |
WHERE T0.parent IS NULL; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Deps: mysql_load_data2.bash | |
DROP TABLE IF EXISTS blockid_exam_displayname_child; | |
CREATE TEMPORARY TABLE blockid_exam_displayname_child ( | |
id MEDIUMINT NOT NULL AUTO_INCREMENT, | |
block_id VARCHAR(33) NOT NULL, | |
block_type VARCHAR(33) NOT NULL, | |
exam VARCHAR(33), | |
display_name VARCHAR(33), | |
child_id VARCHAR(33), | |
child_type VARCHAR(33), | |
parent VARCHAR(33) NULL, | |
INDEX(parent), | |
INDEX(exam), | |
PRIMARY KEY(id) | |
-- FOREIGN KEY (parent) REFERENCES blockid_exam_displayname_child (block_id), | |
-- PRIMARY KEY (block_id, block_type, child) | |
); | |
LOAD DATA LOCAL INFILE '/tmp/blockid_exam_displayname_child.csv' | |
INTO TABLE blockid_exam_displayname_child | |
FIELDS TERMINATED BY ',' | |
ENCLOSED BY '"' | |
IGNORE 1 LINES | |
(`block_id`, `block_type`, `exam`, `child_id`, `child_type`); | |
-- Set `parent` field | |
UPDATE blockid_exam_displayname_child T0 | |
INNER JOIN blockid_exam_displayname_child T1 | |
ON T1.child_id = T0.block_id AND T1.child_type = T0.block_type | |
SET T0.parent = T1.block_id | |
WHERE T0.parent IS NULL; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Deps: post_mongo0.js in directory above | |
mongoexport --fields blocks --collection 'modulestore.structures' -d edxapp --assertExists --query '{"blocks.fields.format": {$in: ["Midterm Exam", "Final Exam"]}}' | node ../post_mongo0.js | { echo 'block_id,exam,display_name,parent_display_name,children'; sort -u; } > blockid_exam_displayname.csv |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Deps: post_mongo1.js in directory above | |
mongoexport --fields block_id,block_type,fields --collection 'modulestore.blocks' -d edxapp --assertExists | node post_mongo.js | { echo 'id,block_id,block_type,exam,child_id,child_type'; sort -u; } > blockid_exam_displayname_child.csv |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env node | |
// Parses such that one child per row; rather than an array of children | |
require("readline").createInterface({input: process.stdin,output: process.stdout,terminal: false}).on("line", l => { | |
l = JSON.parse(l); | |
process.stdout.write(l.blocks.map(e => { | |
const r = `"${e.block_id}",${e.fields.hasOwnProperty("format") ? "\"" + e.fields.format + "\"" : "\\N"},${e.fields.hasOwnProperty("display_name") ? "\"" + e.fields.display_name + "\"" : "\\N"},${l.blocks[0].fields.hasOwnProperty("display_name")? "\""+l.blocks[0].fields.display_name+"\"" : "\\N"}`; | |
return e.fields.children && e.fields.children[0] ? e.fields.children.map(child => `${r},"${child[0]}","${child[1]}"`).join("\n"): `${r},\\N`; | |
}).join("\n")) | |
}) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment