Created
August 18, 2015 09:08
-
-
Save mtyaka/11126b257f7bf5a646a4 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(pipeline)[analytics@edx-analytics-devstack pipeline]$ launch-task InsertToMysqlUserProfilesTask --local-scheduler | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('sqoop-import = edx.analytics.tasks.sqoop:SqoopImportFromMysql') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('last-country = edx.analytics.tasks.user_location:LastCountryForEachUser') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('enrollment_validation = edx.analytics.tasks.enrollment_validation:CourseEnrollmentValidationTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('inc-enrollments-report = edx.analytics.tasks.reports.incremental_enrollments:WeeklyIncrementalUsersAndEnrollments') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('total-enrollments-report = edx.analytics.tasks.reports.total_enrollments:WeeklyAllUsersAndEnrollments') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('video = edx.analytics.tasks.video:InsertToMysqlAllVideoTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('export-student-module = edx.analytics.tasks.database_exports:StudentModulePerCourseAfterImportWorkflow') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('calendar = edx.analytics.tasks.calendar_task:CalendarTableTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('location-per-course = edx.analytics.tasks.location_per_course:LastCountryOfUser') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('enrollments-report = edx.analytics.tasks.reports.enrollments:EnrollmentsByWeek') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('dump-student-module = edx.analytics.tasks.database_exports:StudentModulePerCourseTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('noop = edx.analytics.tasks.performance:ParseEventLogPerformanceTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('user-profiles = edx.analytics.tasks.user_profiles:InsertToMysqlUserProfilesTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('user-activity = edx.analytics.tasks.user_activity:CourseActivityWeeklyTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('grade-dist = edx.analytics.tasks.studentmodule_dist:GradeDistFromSqoopToMySQLWorkflow') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('enrollments_and_registrations_workflow-manifest = edx.analytics.tasks.reports.enrollments_and_registrations_workflow_manifest:EnrollmentsandRegistrationsWorkflow') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('database-import = edx.analytics.tasks.database_imports:ImportAllDatabaseTablesTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('catalog = edx.analytics.tasks.course_catalog:CourseCatalogWorkflow') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('enrollments = edx.analytics.tasks.enrollments:ImportEnrollmentsIntoMysql') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('course-enroll = edx.analytics.tasks.course_enroll:CourseEnrollmentChangesPerDay') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('export-events = edx.analytics.tasks.event_exports:EventExportTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('overall_events = edx.analytics.tasks.overall_events:TotalEventsDailyTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('student_engagement = edx.analytics.tasks.student_engagement:StudentEngagementTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('answer-dist = edx.analytics.tasks.answer_dist:AnswerDistributionPerCourse') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('insert-into-table = edx.analytics.tasks.mysql_load:MysqlInsertTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('all_events_report = edx.analytics.tasks.reports.total_events_report:TotalEventsReportWorkflow') | |
DEBUG:edx.analytics.tasks.launchers.local:Using override.cfg | |
2015-08-18 09:04:51,648 INFO 11904 [luigi-interface] worker.py:267 - Scheduled InsertToMysqlUserProfilesTask(database=analytics, credentials=/home/analytics/.mysql_creds, warehouse_path=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/, overwrite=True, import_date=None) (PENDING) | |
2015-08-18 09:05:06,189 INFO 11904 [luigi-interface] worker.py:267 - Scheduled ImportAuthUserProfileTask(overwrite=False, destination=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/, credentials=/home/analytics/.mysql_lms_creds, import_date=2015-08-18) (DONE) | |
2015-08-18 09:05:20,648 INFO 11904 [luigi-interface] worker.py:267 - Scheduled ImportAuthUserTask(overwrite=False, destination=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/, credentials=/home/analytics/.mysql_lms_creds, import_date=2015-08-18) (DONE) | |
2015-08-18 09:05:20,649 INFO 11904 [luigi-interface] worker.py:267 - Scheduled HiveTableFromParameterQueryTask(overwrite=True, warehouse_path=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/, insert_query= | |
SELECT | |
au.id, | |
au.username, | |
au.last_login, | |
au.date_joined, | |
au.is_staff, | |
au.email, | |
p.name, | |
IF(p.gender != '', p.gender, NULL), | |
p.year_of_birth, | |
IF(p.level_of_education != '', p.level_of_education, NULL) | |
FROM auth_user au | |
LEFT OUTER JOIN auth_userprofile p ON au.id = p.user_id | |
WHERE au.is_active | |
, table=user_profile, columns=(('id', 'INT'), ('username', 'STRING'), ('last_login', 'TIMESTAMP'), ('date_joined', 'TIMESTAMP'), ('is_staff', 'TINYINT'), ('email', 'STRING'), ('name', 'STRING'), ('gender', 'STRING'), ('year_of_birth', 'INT'), ('level_of_education', 'STRING')), partition=dt=2015-08-18) (PENDING) | |
2015-08-18 09:05:20,653 INFO 11904 [luigi-interface] worker.py:267 - Scheduled ExternalURL(url=/home/analytics/.mysql_creds) (DONE) | |
2015-08-18 09:05:20,654 INFO 11904 [luigi-interface] interface.py:193 - Done scheduling tasks | |
2015-08-18 09:05:20,654 INFO 11904 [luigi-interface] worker.py:282 - [pid 11904] Worker Worker(salt=342919858, host=edx-analytics-devstack, username=analytics, pid=11904) running HiveTableFromParameterQueryTask(overwrite=True, warehouse_path=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/, insert_query= | |
SELECT | |
au.id, | |
au.username, | |
au.last_login, | |
au.date_joined, | |
au.is_staff, | |
au.email, | |
p.name, | |
IF(p.gender != '', p.gender, NULL), | |
p.year_of_birth, | |
IF(p.level_of_education != '', p.level_of_education, NULL) | |
FROM auth_user au | |
LEFT OUTER JOIN auth_userprofile p ON au.id = p.user_id | |
WHERE au.is_active | |
, table=user_profile, columns=(('id', 'INT'), ('username', 'STRING'), ('last_login', 'TIMESTAMP'), ('date_joined', 'TIMESTAMP'), ('is_staff', 'TINYINT'), ('email', 'STRING'), ('name', 'STRING'), ('gender', 'STRING'), ('year_of_birth', 'INT'), ('level_of_education', 'STRING')), partition=dt=2015-08-18) | |
2015-08-18 09:05:23,327 INFO 11904 [edx.analytics.tasks.util.overwrite] overwrite.py:55 - Removing existing output for task HiveTableFromParameterQueryTask(overwrite=True, warehouse_path=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/, insert_query= | |
SELECT | |
au.id, | |
au.username, | |
au.last_login, | |
au.date_joined, | |
au.is_staff, | |
au.email, | |
p.name, | |
IF(p.gender != '', p.gender, NULL), | |
p.year_of_birth, | |
IF(p.level_of_education != '', p.level_of_education, NULL) | |
FROM auth_user au | |
LEFT OUTER JOIN auth_userprofile p ON au.id = p.user_id | |
WHERE au.is_active | |
, table=user_profile, columns=(('id', 'INT'), ('username', 'STRING'), ('last_login', 'TIMESTAMP'), ('date_joined', 'TIMESTAMP'), ('is_staff', 'TINYINT'), ('email', 'STRING'), ('name', 'STRING'), ('gender', 'STRING'), ('year_of_birth', 'INT'), ('level_of_education', 'STRING')), partition=dt=2015-08-18) | |
2015-08-18 09:05:28,643 INFO 11904 [luigi-interface] hive.py:358 - ['hive', '-f', '/tmp/tmpF9h4tG', '-i', '/home/analytics/.hiverc', '--hiveconf', "mapred.job.name=HiveTableFromParameterQueryTask(overwrite=True, warehouse_path=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/, insert_query=\n SELECT\n au.id,\n au.username,\n au.last_login,\n au.date_joined,\n au.is_staff,\n au.email,\n p.name,\n IF(p.gender != '', p.gender, NULL),\n p.year_of_birth,\n IF(p.level_of_education != '', p.level_of_education, NULL)\n FROM auth_user au\n LEFT OUTER JOIN auth_userprofile p ON au.id = p.user_id\n WHERE au.is_active\n , table=user_profile, columns=(('id', 'INT'), ('username', 'STRING'), ('last_login', 'TIMESTAMP'), ('date_joined', 'TIMESTAMP'), ('is_staff', 'TINYINT'), ('email', 'STRING'), ('name', 'STRING'), ('gender', 'STRING'), ('year_of_birth', 'INT'), ('level_of_education', 'STRING')), partition=dt=2015-08-18)"] | |
2015-08-18 09:05:28,645 INFO 11904 [luigi-interface] hadoop.py:203 - hive -f /tmp/tmpF9h4tG -i /home/analytics/.hiverc --hiveconf mapred.job.name=HiveTableFromParameterQueryTask(overwrite=True, warehouse_path=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/, insert_query= | |
SELECT | |
au.id, | |
au.username, | |
au.last_login, | |
au.date_joined, | |
au.is_staff, | |
au.email, | |
p.name, | |
IF(p.gender != '', p.gender, NULL), | |
p.year_of_birth, | |
IF(p.level_of_education != '', p.level_of_education, NULL) | |
FROM auth_user au | |
LEFT OUTER JOIN auth_userprofile p ON au.id = p.user_id | |
WHERE au.is_active | |
, table=user_profile, columns=(('id', 'INT'), ('username', 'STRING'), ('last_login', 'TIMESTAMP'), ('date_joined', 'TIMESTAMP'), ('is_staff', 'TINYINT'), ('email', 'STRING'), ('name', 'STRING'), ('gender', 'STRING'), ('year_of_birth', 'INT'), ('level_of_education', 'STRING')), partition=dt=2015-08-18) | |
2015-08-18 09:05:31,544 INFO 11904 [luigi-interface] hadoop.py:234 - Logging initialized using configuration in jar:file:/usr/local/hive/lib/hive-common-1.2.1.jar!/hive-log4j.properties | |
2015-08-18 09:05:42,044 INFO 11904 [luigi-interface] hadoop.py:234 - OK | |
2015-08-18 09:05:42,046 INFO 11904 [luigi-interface] hadoop.py:234 - Time taken: 2.862 seconds | |
2015-08-18 09:05:42,691 INFO 11904 [luigi-interface] hadoop.py:234 - OK | |
2015-08-18 09:05:42,692 INFO 11904 [luigi-interface] hadoop.py:234 - Time taken: 0.645 seconds | |
2015-08-18 09:05:42,999 INFO 11904 [luigi-interface] hadoop.py:234 - OK | |
2015-08-18 09:05:43,000 INFO 11904 [luigi-interface] hadoop.py:234 - Time taken: 0.307 seconds | |
2015-08-18 09:05:43,296 INFO 11904 [luigi-interface] hadoop.py:234 - OK | |
2015-08-18 09:05:43,300 INFO 11904 [luigi-interface] hadoop.py:234 - Time taken: 0.296 seconds | |
2015-08-18 09:05:44,432 INFO 11904 [luigi-interface] hadoop.py:234 - Query ID = analytics_20150818090543_5fcf5bf8-0713-495a-9b98-a6ada1c622cd | |
2015-08-18 09:05:44,433 INFO 11904 [luigi-interface] hadoop.py:234 - Total jobs = 1 | |
2015-08-18 09:05:48,418 INFO 11904 [luigi-interface] hadoop.py:234 - 15/08/18 09:05:48 WARN conf.Configuration: file:/tmp/analytics/ec3de7ac-590d-421e-abf1-a591d652e109/hive_2015-08-18_09-05-43_302_7757311696724941498-1/-local-10005/jobconf.xml:an attempt to override final parameter: mapreduce.job.end-notification.max.retry.interval; Ignoring. | |
2015-08-18 09:05:48,502 INFO 11904 [luigi-interface] hadoop.py:234 - 15/08/18 09:05:48 WARN conf.Configuration: file:/tmp/analytics/ec3de7ac-590d-421e-abf1-a591d652e109/hive_2015-08-18_09-05-43_302_7757311696724941498-1/-local-10005/jobconf.xml:an attempt to override final parameter: mapreduce.job.end-notification.max.attempts; Ignoring. | |
2015-08-18 09:05:49,113 INFO 11904 [luigi-interface] hadoop.py:234 - Execution log at: /tmp/analytics/analytics_20150818090543_5fcf5bf8-0713-495a-9b98-a6ada1c622cd.log | |
2015-08-18 09:05:50,016 INFO 11904 [luigi-interface] hadoop.py:234 - 2015-08-18 09:05:50 Starting to launch local task to process map join; maximum memory = 518979584 | |
2015-08-18 09:05:51,169 INFO 11904 [luigi-interface] hadoop.py:234 - 2015-08-18 09:05:51 Dump the side-table for tag: 1 with group count: 5 into file: file:/tmp/analytics/ec3de7ac-590d-421e-abf1-a591d652e109/hive_2015-08-18_09-05-43_302_7757311696724941498-1/-local-10002/HashTable-Stage-4/MapJoin-mapfile01--.hashtable | |
2015-08-18 09:05:51,281 INFO 11904 [luigi-interface] hadoop.py:234 - 2015-08-18 09:05:51 Uploaded 1 File to: file:/tmp/analytics/ec3de7ac-590d-421e-abf1-a591d652e109/hive_2015-08-18_09-05-43_302_7757311696724941498-1/-local-10002/HashTable-Stage-4/MapJoin-mapfile01--.hashtable (404 bytes) | |
2015-08-18 09:05:51,283 INFO 11904 [luigi-interface] hadoop.py:234 - 2015-08-18 09:05:51 End of local task; Time Taken: 1.271 sec. | |
2015-08-18 09:05:51,996 INFO 11904 [luigi-interface] hadoop.py:234 - Execution completed successfully | |
2015-08-18 09:05:51,997 INFO 11904 [luigi-interface] hadoop.py:234 - MapredLocal task succeeded | |
2015-08-18 09:05:52,011 INFO 11904 [luigi-interface] hadoop.py:234 - Launching Job 1 out of 1 | |
2015-08-18 09:05:52,013 INFO 11904 [luigi-interface] hadoop.py:234 - Number of reduce tasks is set to 0 since there's no reduce operator | |
2015-08-18 09:05:52,854 INFO 11904 [luigi-interface] hadoop.py:234 - Job running in-process (local Hadoop) | |
2015-08-18 09:05:53,869 INFO 11904 [luigi-interface] hadoop.py:234 - 2015-08-18 09:05:53,864 Stage-4 map = 0%, reduce = 0% | |
2015-08-18 09:05:54,880 INFO 11904 [luigi-interface] hadoop.py:234 - 2015-08-18 09:05:54,880 Stage-4 map = 100%, reduce = 0% | |
2015-08-18 09:05:54,886 INFO 11904 [luigi-interface] hadoop.py:234 - Ended Job = job_local1358817778_0001 | |
2015-08-18 09:05:54,901 INFO 11904 [luigi-interface] hadoop.py:234 - Loading data to table default.user_profile partition (dt=2015-08-18) | |
2015-08-18 09:05:55,200 INFO 11904 [luigi-interface] hadoop.py:234 - Partition default.user_profile{dt=2015-08-18} stats: [numFiles=1, numRows=5, totalSize=433, rawDataSize=428] | |
2015-08-18 09:05:55,351 INFO 11904 [luigi-interface] hadoop.py:234 - MapReduce Jobs Launched: | |
2015-08-18 09:05:55,352 INFO 11904 [luigi-interface] hadoop.py:234 - Stage-Stage-4: HDFS Read: 437 HDFS Write: 524 SUCCESS | |
2015-08-18 09:05:55,353 INFO 11904 [luigi-interface] hadoop.py:234 - Total MapReduce CPU Time Spent: 0 msec | |
2015-08-18 09:05:55,353 INFO 11904 [luigi-interface] hadoop.py:234 - OK | |
2015-08-18 09:05:55,356 INFO 11904 [luigi-interface] hadoop.py:234 - Time taken: 12.052 seconds | |
2015-08-18 09:05:55,817 INFO 11904 [luigi-interface] worker.py:296 - [pid 11904] Worker Worker(salt=342919858, host=edx-analytics-devstack, username=analytics, pid=11904) done HiveTableFromParameterQueryTask(overwrite=True, warehouse_path=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/, insert_query= | |
SELECT | |
au.id, | |
au.username, | |
au.last_login, | |
au.date_joined, | |
au.is_staff, | |
au.email, | |
p.name, | |
IF(p.gender != '', p.gender, NULL), | |
p.year_of_birth, | |
IF(p.level_of_education != '', p.level_of_education, NULL) | |
FROM auth_user au | |
LEFT OUTER JOIN auth_userprofile p ON au.id = p.user_id | |
WHERE au.is_active | |
, table=user_profile, columns=(('id', 'INT'), ('username', 'STRING'), ('last_login', 'TIMESTAMP'), ('date_joined', 'TIMESTAMP'), ('is_staff', 'TINYINT'), ('email', 'STRING'), ('name', 'STRING'), ('gender', 'STRING'), ('year_of_birth', 'INT'), ('level_of_education', 'STRING')), partition=dt=2015-08-18) | |
2015-08-18 09:05:55,822 INFO 11904 [luigi-interface] worker.py:282 - [pid 11904] Worker Worker(salt=342919858, host=edx-analytics-devstack, username=analytics, pid=11904) running InsertToMysqlUserProfilesTask(database=analytics, credentials=/home/analytics/.mysql_creds, warehouse_path=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/, overwrite=True, import_date=None) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment