# Extract a csv file from a gzip but keep the original gzip zcat file_compressed.csv.gz # cut the two columns of the file and save it cut -d',' -f1,2 > file_uncompressed.csv zcat file_compressed.csv.gz | cut -d',' -f1,2 > file_uncompressed.csv # Since its a csv with a header keep this header head -n1 file_uncompressed.csv # but get the result of the file, sort it, uniq to # remove duplicates, +2 so we start at line 2 tail -n +2 file_uncompressed.csv | sort | uniq # gzip the result and save into new file gzip > file_compressed_no_dups.csv.gz (head -n1 file_uncompressed.csv && tail -n +2 file_uncompressed.csv | sort | uniq) | gzip > file_compressed_no_dups.csv.gz