Created
September 23, 2018 02:08
-
-
Save WillKoehrsen/a3aa94b49e984e394d3d7e51b341a729 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
from multiprocessing.dummy import Pool as Threadpool | |
from itertools import chain | |
def read_data(file_path): | |
"""Read in json data from `file_path`""" | |
data = [] | |
# Open the file and load in json | |
with open(file_path, 'r') as fin: | |
for l in fin.readlines(): | |
data.append(json.loads(l)) | |
return data | |
# List of files to read in | |
saved_files = ['/data/wiki/partitions/' + x for x in os.listdir('/data/wiki/partitions/')] | |
# Create a threadpool for reading in files | |
threadpool = Threadpool(processes = 10) | |
# Read in the files as a list of lists | |
results = threadpool.map(read_data, saved_files) | |
# Flatten the list of lists to a single list | |
book_list = list(chain(*results)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment