Skip to content

Instantly share code, notes, and snippets.

@netoisc
Created September 25, 2020 19:26
Show Gist options
  • Select an option

  • Save netoisc/7f4995751341f5bcb7657e241fc2aa7d to your computer and use it in GitHub Desktop.

Select an option

Save netoisc/7f4995751341f5bcb7657e241fc2aa7d to your computer and use it in GitHub Desktop.
Migrate NBgrader sqlite database to postgres
from nbgrader.api import Gradebook
import petl as etl
import psycopg2
from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT
import sqlite3
db_name= 'nb_database'
db_user= 'illumidesk'
db_pwd= 'illumidesk.'
db_host= 'illumidesk_db_instance'
db_port = 5432
sqlite_gradebook_path = '/Users/ecruz/Downloads/gradebook.db'
def drop_create_pg_database() -> None:
with psycopg2.connect(f'user={db_user} password={db_pwd} host={db_host}') as conn:
conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
try:
with conn.cursor() as cur:
cur.execute(f'DROP DATABASE {db_name};')
except Exception as e:
# database not exists
print('An error occurred by dropping the database:', e)
else:
with conn.cursor() as cur:
cur.execute(f'CREATE DATABASE {db_name};')
def create_nb_grader_schema():
postgres_url = f'postgresql://{db_user}:{db_pwd}@{db_host}:{db_port}/{db_name}'
# initialize the nbgrader database in postgres
with Gradebook(postgres_url) as gb:
pass
def migrate_data():
nb_sqlite_conn = sqlite3.connect(sqlite_gradebook_path)
nb_postgres_conn = psycopg2.connect(f'dbname={db_name} user={db_user} password={db_pwd} host={db_host}')
course_table = etl.fromdb(nb_sqlite_conn, 'SELECT * FROM course')
print('Course table existing items:', len(course_table))
etl.todb(course_table, nb_postgres_conn, 'course')
assignment_table = etl.fromdb(nb_sqlite_conn, 'SELECT * FROM assignment')
print('Assignment table existing items:', len(assignment_table))
etl.todb(assignment_table, nb_postgres_conn, 'assignment')
student_table = etl.fromdb(nb_sqlite_conn, 'SELECT * FROM student')
print('Student table existing items:', len(student_table))
etl.todb(student_table, nb_postgres_conn, 'student')
notebook_table = etl.fromdb(nb_sqlite_conn, 'SELECT * FROM notebook')
print('Notebook table existing items:', len(notebook_table))
etl.todb(notebook_table, nb_postgres_conn, 'notebook')
# tables linked to notebook table >>
sub_assig_table = etl.fromdb(nb_sqlite_conn, 'SELECT * FROM submitted_assignment')
print('Submitted_assignment table existing items:', len(sub_assig_table))
etl.todb(sub_assig_table, nb_postgres_conn, 'submitted_assignment')
sub_notebook_table = etl.fromdb(nb_sqlite_conn, 'SELECT id, assignment_id, notebook_id, case flagged when 1 then "True" else "False" end flagged, late_submission_penalty FROM submitted_notebook')
print('Submitted_notebook table existing items:', len(sub_notebook_table))
etl.todb(sub_notebook_table, nb_postgres_conn, 'submitted_notebook')
basec_table = etl.fromdb(nb_sqlite_conn, 'SELECT * FROM base_cell')
print('base_cell table existing items:', len(basec_table))
etl.todb(basec_table, nb_postgres_conn, 'base_cell')
sourcec_table = etl.fromdb(nb_sqlite_conn, 'SELECT id, name, cell_type, case locked when 1 then "True" else "False" end locked, source, checksum FROM source_cell')
print('source_cell table existing items:', len(sourcec_table))
etl.todb(sourcec_table, nb_postgres_conn, 'source_cell')
# tables linked to base_cell >>
solutionc_table = etl.fromdb(nb_sqlite_conn, 'SELECT * FROM solution_cells')
print('solution_cells table existing items:', len(solutionc_table))
etl.todb(solutionc_table, nb_postgres_conn, 'solution_cells')
taskc_table = etl.fromdb(nb_sqlite_conn, 'SELECT * FROM task_cells')
print('task_cells table existing items:', len(taskc_table))
etl.todb(taskc_table, nb_postgres_conn, 'task_cells')
gradec_table = etl.fromdb(nb_sqlite_conn, 'SELECT * FROM grade_cells')
print('grade_cells table existing items:', len(taskc_table))
etl.todb(gradec_table, nb_postgres_conn, 'grade_cells')
# tables linked to submitted_notebook and base_cell >>
comment_table = etl.fromdb(nb_sqlite_conn, 'SELECT * FROM comment')
print('comment table existing items:', len(comment_table))
etl.todb(comment_table, nb_postgres_conn, 'comment')
grade_table = etl.fromdb(nb_sqlite_conn, 'SELECT id, notebook_id, cell_id, auto_score, manual_score, extra_credit, case needs_manual_grade when 1 then "True" else "False" end needs_manual_grade FROM grade')
print('grade table existing items:', len(grade_table))
etl.todb(grade_table, nb_postgres_conn, 'grade')
# Validate source table sizes Vs target table sizes
course_table_pg = etl.fromdb(nb_postgres_conn, 'SELECT * FROM course')
assert len(course_table) == len(course_table_pg)
assignment_table_pg = etl.fromdb(nb_postgres_conn, 'SELECT * FROM assignment')
assert len(assignment_table) == len(assignment_table_pg)
student_table_pg = etl.fromdb(nb_postgres_conn, 'SELECT * FROM student')
assert len(student_table) == len(student_table_pg)
notebook_table_pg = etl.fromdb(nb_postgres_conn, 'SELECT * FROM notebook')
assert len(notebook_table) == len(notebook_table_pg)
sub_assig_table_pg = etl.fromdb(nb_postgres_conn, 'SELECT * FROM submitted_assignment')
assert len(sub_assig_table) == len(sub_assig_table_pg)
sub_notebook_table_pg = etl.fromdb(nb_postgres_conn, 'SELECT * FROM submitted_notebook')
assert len(sub_notebook_table) == len(sub_notebook_table_pg)
basec_table_pg = etl.fromdb(nb_postgres_conn, 'SELECT * FROM base_cell')
assert len(basec_table) == len(basec_table_pg)
sourcec_table_pg = etl.fromdb(nb_postgres_conn, 'SELECT * FROM source_cell')
assert len(sourcec_table) == len(sourcec_table_pg)
solutionc_table_pg = etl.fromdb(nb_postgres_conn, 'SELECT * FROM solution_cells')
assert len(solutionc_table) == len(solutionc_table_pg)
taskc_table_pg = etl.fromdb(nb_postgres_conn, 'SELECT * FROM task_cells')
assert len(taskc_table) == len(taskc_table_pg), f'{len(taskc_table)} Vs {len(taskc_table_pg)}'
gradec_table_pg = etl.fromdb(nb_postgres_conn, 'SELECT * FROM grade_cells')
assert len(gradec_table) == len(gradec_table_pg), f'{len(gradec_table)} Vs {len(gradec_table_pg)}'
comment_table_pg = etl.fromdb(nb_postgres_conn, 'SELECT * FROM comment')
assert len(comment_table) == len(comment_table_pg), f'{len(comment_table)} Vs {len(comment_table_pg)}'
grade_table_pg = etl.fromdb(nb_postgres_conn, 'SELECT * FROM grade')
assert len(grade_table) == len(grade_table_pg), f'{len(grade_table)} Vs {len(grade_table_pg)}'
if __name__ == '__main__':
# drop_create_pg_database():
create_nb_grader_schema()
migrate_data()
@netoisc
Copy link
Copy Markdown
Author

netoisc commented Sep 25, 2020

This is the complete schema:
nbgrader_schema

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment