Skip to content

Instantly share code, notes, and snippets.

@digerata
Last active April 23, 2020 23:54
Show Gist options
  • Save digerata/6713ec148f9a77cfca8ef487e3bcec2d to your computer and use it in GitHub Desktop.
Save digerata/6713ec148f9a77cfca8ef487e3bcec2d to your computer and use it in GitHub Desktop.
Importing a large CSV file
class DataImport < ApplicationRecord
belongs_to :company, optional: true # org in this case
belongs_to :user # employee in this case
enum status: %i[pending processing completed failed]
end
# and then you need a controller that accepts the upload and then kicks off the process data import job...
def upload
file = params[:reviews][:csv_file]
not_found if file.blank?
start = Time.now
@import = CreateDataImportService.call(
company: current_company,
user: current_user,
file: file,
reference_point_id: params[:reviews][:reference_point_id],
type: session[:review_type]
)
ProcessDataImportJob.perform_async(@import.id, session[:review_type].to_s)
logger.warn "Time to Process: #{(Time.now - start)}ms"
redirect_to upload_status_requests_path
end
class CreateDataImportService < BaseService
required_attr :file, :user
optional_attr :company, :type, :reference_point_id
def call
DataImport.create(data_import_attrs)
end
private
def contents
File.read(file.path)
.force_encoding('ISO-8859-1')
.encode(Encoding.find('UTF-8'),
invalid: :replace, # Replace invalid byte sequences
undef: :replace, # Replace anything non-UTF-8
replace: '', # Use a blank for those replacements
universal_newline: true) # Always break lines with \n
end
def data_import_attrs
{
company: company,
csv_contents: contents,
user: user,
reference_point: reference_point
}
end
def reference_point
return nil if reference_point_id.blank?
return ReferencePoint.defaults.find_by(reference_type: review_type, id: reference_point_id) if company.blank?
company.reference_points.find_by(reference_type: review_type, id: reference_point_id)
end
def review_type
FetchReviewTypeService.call(user: user, type: type)
end
end
class CreateDataImports < ActiveRecord::Migration[5.0]
def change
create_table :data_imports do |t|
t.text :csv_contents
t.references :company, foreign_key: true # Organization who owns it
t.references :user, foreign_key: true # User who uploaded
t.integer :rows
t.integer :rows_processed
t.integer :rows_failed
t.integer :status, nullable: false, default: 0
t.text :error_message
t.string :successful_rows, :string, array: true, default: '{}'
t.string :failed_rows, :string, array: true, default: '{}'
t.timestamps
end
end
end
class ProcessDataImportJob
include Sidekiq::Worker
sidekiq_options queue: 'import'
def perform(data_import_id, type)
import = DataImport.find(data_import_id)
import.update!(status: :processing, rows_processed: 0, rows_failed: 0, successful_rows: [], failed_rows: [])
SmarterCSV.process(data, chunk_size: 100).each do |chunk|
chunk.each do |row|
# do the work...
if object.persisted?
import.successful_rows << "#{first_name} #{last_name}"
else
report_review_failure(review)
import.failed_rows << "#{first_name} #{last_name}|#{object.errors.full_messages.join(', ')}"
end
end
end
import.update!(status: :completed)
import.rows = @report.rows
import.rows_processed = @report.success
import.rows_failed = @report.failed
import.save
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment