Last active
June 18, 2019 10:25
-
-
Save camallen/8b82382df0363d424013b24813d8f803 to your computer and use it in GitHub Desktop.
Manual classification csv exports for a panoptes project
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Manual csv classifications dump | |
# ensure the config/database.yml is configured to use the read replica database and not the production db. | |
# | |
# run via rails runner from the panoptes cmd line via | |
# rails r project_classifications_csv_dump_export.rb | |
require 'csv' | |
PROJECT_ID = 1 | |
@resource = Project.find PROJECT_ID | |
def completed_resource_classifications | |
@resource | |
.classifications | |
.complete | |
.joins(:workflow).where(workflows: {activated_state: "active"}) | |
.includes(:user, :workflow) | |
end | |
def setup_subjects_cache(classifications) | |
classification_ids = classifications.map(&:id).join(",") | |
sql = "SELECT classification_id, subject_id FROM classification_subjects where classification_id IN (#{classification_ids})" | |
c_s_ids = ActiveRecord::Base.connection.select_rows(sql) | |
@cache.reset_classification_subjects(c_s_ids) | |
subject_ids = c_s_ids.map { |_, subject_id| subject_id } | |
@cache.reset_subjects(Subject.unscoped.where(id: subject_ids).load) | |
subject_ids | |
end | |
def setup_retirement_cache(classifications, subject_ids) | |
workflow_ids = classifications.map(&:workflow_id).uniq | |
retired_counts = SubjectWorkflowStatus.retired.where( | |
subject_id: subject_ids, | |
workflow_id: workflow_ids | |
).load | |
@cache.reset_subject_workflow_statuses(retired_counts) | |
end | |
csv_file_path = "tmp/classifications_#{PROJECT_ID}_export.csv" | |
@cache ||= ClassificationDumpCache.new | |
CSV.open(csv_file_path, 'wb') do |csv| | |
formatter = Formatter::Csv::Classification.new(@cache) | |
csv << formatter.headers | |
completed_resource_classifications.find_in_batches do |batch| | |
subject_ids = setup_subjects_cache(batch) | |
setup_retirement_cache(batch, subject_ids) | |
batch.each do |classification| | |
csv << formatter.to_array(classification) | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment