Created
September 29, 2014 15:24
-
-
Save mathyourlife/01af1751477aa115e1f3 to your computer and use it in GitHub Desktop.
generate chunks of csv files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
# cat First500000.lst | |
# 0,a | |
# 1,a | |
# 2,a | |
# 3,a | |
# 4,a | |
# 5,a | |
# 6,a | |
# 7,a | |
# 8,a | |
# 9,a | |
class EndOfCSV(Exception): | |
pass | |
def pull_lines(reader, N): | |
for idx, line in enumerate(reader): | |
if idx >= N: | |
break | |
yield line | |
try: | |
idx | |
except UnboundLocalError, e: | |
raise EndOfCSV() | |
def read_csv_chunk(filename, chunk_size): | |
with open(filename) as infile: | |
reader = csv.reader(infile) | |
try: | |
while True: | |
yield {row[0]:row[1] for row in pull_lines(reader, chunk_size)} | |
except EndOfCSV, e: | |
pass | |
for chunk in read_csv_chunk('First500000.lst', chunk_size=3): | |
print(chunk) | |
# {'1': 'a', '0': 'a', '2': 'a'} | |
# {'5': 'a', '4': 'a', '6': 'a'} | |
# {'9': 'a', '8': 'a'} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment