Skip to content

Instantly share code, notes, and snippets.

@zaiste
Last active December 30, 2017 15:26
Show Gist options
  • Select an option

  • Save zaiste/b488ae5a8a710e9f8775b0efefd50fb5 to your computer and use it in GitHub Desktop.

Select an option

Save zaiste/b488ae5a8a710e9f8775b0efefd50fb5 to your computer and use it in GitHub Desktop.
CSV Parsing in Python, Ruby and Golang
name email title image phone imei color company date ip url state country lat long card
Ava Lesch [email protected] Principal Functionality Designer https://robohash.org/faciliscorruptisimilique.png?size=300x300&set=set1 (536) 688-4097 018292903140690 purple Lemke LLC 2013-10-02 43.114.193.207 http://schummkoch.net/vinnie_ratke Rhode Island Andorra 46.67984738286131 -159.3262343750659 1234-2121-1221-1211
Mrs. Rosendo Lehner [email protected] Human Marketing Architect https://robohash.org/estdolornon.png?size=300x300&set=set1 (716) 028-2631 526496591408898 cyan Wiegand LLC 2012-03-23 119.231.118.111 http://gislason.net/salma Maryland Saint Martin 11.694752422653067 11.649002515793683 1228-1221-1221-1431
Idell Kovacek [email protected] Dynamic Metrics Developer https://robohash.org/quaerataliquidinventore.png?size=300x300&set=set1 1-233-370-1534 547042012987782 maroon Rowe LLC 2013-10-25 26.200.8.115 http://windler.biz/freddie_ruel Missouri Canada -3.4259574115661593 -5.001283636719506 1211-1221-1234-2201
Collin Dibbert Jr. [email protected] Dynamic Data Producer https://robohash.org/molestiasenimesse.png?size=300x300&set=set1 187.798.3992 544081866256087 maroon Nitzsche-Harber 2010-12-28 5.252.71.252 http://mcdermott.io/duncan.stanton Oklahoma Brazil 50.983408853851415 134.91429514616516 1234-2121-1221-1211
Salma Streich [email protected] Senior Metrics Specialist https://robohash.org/erroretneque.png?size=300x300&set=set1 (360) 209-0897 102756703110266 pink Feil, Zemlak and Hoeger 2012-02-21 128.153.221.232 http://gibson.com/ayana.reynolds New Jersey Philippines 30.355276518459632 -108.27469486667013 1211-1221-1234-2201
Margarita Dach [email protected] Chief Accountability Director https://robohash.org/harumdolordoloribus.png?size=300x300&set=set1 1-612-375-0857 532031224097354 red Becker-Ward 2014-09-11 51.160.90.168 http://daugherty.co/ryann_legros Oregon Malaysia -66.28751208157121 -98.21742565364885 1228-1221-1221-1431
Harvey Crist [email protected] Direct Infrastructure Developer https://robohash.org/adcorruptilaboriosam.png?size=300x300&set=set1 974-742-4375 986819467833563 grey Gleichner-Hilpert 2013-02-11 143.252.21.3 http://miller.com/hershel.anderson West Virginia Germany 43.30105643251309 -139.95845614848002 1234-2121-1221-1211
Angelina Crona [email protected] Investor Directives Director https://robohash.org/autabquis.png?size=300x300&set=set1 594-496-3286 356200201930845 lavender Ortiz Inc 2016-02-24 111.222.195.179 http://witting.com/justice Utah Venezuela -23.86534386047373 140.55948090923948 1211-1221-1234-2201
Danial Quigley [email protected] Customer Group Engineer https://robohash.org/cumperferendismolestiae.png?size=300x300&set=set1 (151) 081-2308 101128385003105 orchid Kovacek LLC 2016-05-19 132.174.43.119 http://leannon.co/muhammad Utah Mongolia 54.19917959313804 23.759608904644352 1212-1221-1121-1234
require 'csv'
require 'faker'
headers = %w(name email title image phone imei color company date ip url state country lat long card)
REPETITION = 10
names = Array.new(REPETITION){ |i| Faker::Name.name }
emails = Array.new(REPETITION){ |i| Faker::Internet.email }
titles = Array.new(REPETITION){ |i| Faker::Name.title }
images = Array.new(REPETITION){ |i| Faker::Avatar.image }
phones = Array.new(REPETITION){ |i| Faker::PhoneNumber.cell_phone }
imeis = Array.new(REPETITION){ |i| Faker::Code.imei }
colors = Array.new(REPETITION){ |i| Faker::Color.color_name }
companies = Array.new(REPETITION){ |i| Faker::Company.name }
dates = Array.new(REPETITION){ |i| Faker::Date.between(Date.new(2010,11,01), Date.today) }
ips = Array.new(REPETITION){ |i| Faker::Internet.ip_v4_address }
urls = Array.new(REPETITION){ |i| Faker::Internet.url }
states = Array.new(REPETITION){ |i| Faker::Address.state }
countries = Array.new(REPETITION){ |i| Faker::Address.country }
lats = Array.new(REPETITION){ |i| Faker::Address.latitude }
longs = Array.new(REPETITION){ |i| Faker::Address.longitude }
cards = Array.new(REPETITION){ |i| Faker::Business.credit_card_number }
CSV.open('data.csv', 'w', write_headers: true, headers: headers) do |csv|
1_000_000.times do |i|
csv << [
names[i % REPETITION],
emails[i % REPETITION],
titles[i % REPETITION],
images[i % REPETITION],
phones[i % REPETITION],
imeis[i % REPETITION],
colors[i % REPETITION],
companies[i % REPETITION],
dates[i % REPETITION],
ips[i % REPETITION],
urls[i % REPETITION],
states[i % REPETITION],
countries[i % REPETITION],
lats[i % REPETITION],
longs[i % REPETITION],
cards[i % REPETITION],
]
end
end
package main
import (
"encoding/csv"
"fmt"
"os"
"io"
)
func main() {
file, err := os.Open("data-7M.csv")
if err != nil {
fmt.Println("Error:", err)
return
}
defer file.Close()
reader := csv.NewReader(file)
for {
_, err := reader.Read()
if err == io.EOF {
break
} else if err != nil {
fmt.Println("Error:", err)
return
}
}
}
import time
import functools
import os
import csv
import progressbar
import psutil
csv_path = 'data.csv'
examples = []
def example(fn):
@functools.wraps(fn)
def wrapped():
try:
print('Running: {}'.format(fn.__name__))
start = time.time()
fn()
end = time.time()
print('{:.2f}'.format(end - start))
process = psutil.Process(os.getpid())
print('{:.2f} MB\n'.format(process.memory_info().rss / 1024 / 1024))
except KeyboardInterrupt:
print('Skipping...')
examples.append(wrapped)
return wrapped
@example
def basic():
with open(csv_path, newline='') as f:
reader = csv.reader(f)
for row in reader:
pass
@example
def with_progressbar():
num_lines = sum(1 for line in open(csv_path))
with progressbar.ProgressBar(max_value=num_lines) as bar:
with open(csv_path, newline='') as f:
reader = csv.reader(f)
for row in bar(reader):
pass
@example
def with_headers():
with open(csv_path, newline='') as f:
reader = csv.DictReader(f)
for row in reader:
pass
#import resource
#rint('{:.2f} MB'.format(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024 / 1024))
for example in examples:
example()
require 'csv'
require 'benchmark'
require 'ruby-progressbar'
FILENAME = 'data.csv'
def memory_usage
memory_before = `ps -o rss= -p #{Process.pid}`.to_i
yield
memory_after = `ps -o rss= -p #{Process.pid}`.to_i
puts "Memory: #{((memory_after - memory_before) / 1024.0).round(2)} MB"
end
def time_spent
time = Benchmark.realtime do
yield
end
puts "Time: #{time.round(2)}"
end
memory_usage do
time_spent do
count = File.foreach(FILENAME).inject(0) { |c, _| c + 1 }
bar = ProgressBar.create(throttle_rate: 0.2, total: count)
CSV.foreach(FILENAME, headers: true) do |row|
bar.increment
end
end
end
import csv
with open('eggs.csv', 'w', newline='') as csvfile:
spamwriter = csv.writer(csvfile)
spamwriter.writerow(['Spam'] * 5 + ['Baked Beans'])
spamwriter.writerow(['Spam', 'Lovely, Spam', 'Wonderful Spam'])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment