Skip to content

Instantly share code, notes, and snippets.

@dazzag24
Created January 22, 2020 16:31
Show Gist options
  • Save dazzag24/1005de5f36dc47e9996710829b2e296b to your computer and use it in GitHub Desktop.
Save dazzag24/1005de5f36dc47e9996710829b2e296b to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
# https://stackoverflow.com/questions/47113813/using-pyarrow-how-do-you-append-to-parquet-file
# Create a virtualenv or pipenv with pyarrow installed
import pyarrow as pa
import pyarrow.parquet as pq
def append_to_parquet_table(ii, filepath=None, writer=None):
filename = f"/home/user/files/2020-01-09T{ii:02}_00_00Z_PT1H.parquet"
print("Merging: {}".format(filename))
table = pq.read_table(filename)
if writer is None:
writer = pq.ParquetWriter(filepath, table.schema)
writer.write_table(table=table)
del(table)
return writer
if __name__ == '__main__':
writer = None
filepath = '2020-01-09T_PT1H.parquet'
for ii in range(24):
writer = append_to_parquet_table(ii, filepath, writer)
if writer:
writer.close()
print("Finished")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment