Skip to content

Instantly share code, notes, and snippets.

@djfdyuruiry
Created May 21, 2022 21:06
Show Gist options
  • Save djfdyuruiry/9faae06e874bfa0facd80b397fe9058b to your computer and use it in GitHub Desktop.
Save djfdyuruiry/9faae06e874bfa0facd80b397fe9058b to your computer and use it in GitHub Desktop.
Example of how to convert biom format data to a tsv file
# requires the package 'json-stream'
import json_stream
import os
import shutil
scratchDir = "scratch"
def get_biom_row_count(biomFileName):
biomFile = open(biomFileName)
biomData = json_stream.load(biomFile)
shape = biomData["shape"]
rowCount = shape[0]
columnCount = shape[1]
biomFile.close()
return rowCount, columnCount
def build_tsv_file(biomFileName, tsvFileName):
rowCount, columnCount = get_biom_row_count(biomFileName)
tsvFile = open(f"{scratchDir}/{tsvFileName}", "w")
for rowIdx in range(rowCount):
rowFile = open(f"{scratchDir}/{rowIdx}")
columns = []
for _ in range(columnCount + 1):
columns.append("0.0")
for columnDataStr in rowFile.readlines():
columnData = columnDataStr.split(",")
columnIdx = int(columnData[0])
columnValue = columnData[1].replace("\n", "")
if columnIdx != 0:
columnValue = f"{float(columnValue)}"
columns[columnIdx] = columnValue
rowFile.close()
tsvFile.write("\t".join(columns))
tsvFile.write("\n")
tsvFile.close()
def dump_biom_rows(biomFileName):
biomFile = open(biomFileName)
biomData = json_stream.load(biomFile)
manualRowIdx = 0
for row in biomData["rows"]:
rowId = row["id"]
rowHandle = open(f"{scratchDir}/{manualRowIdx}", "w")
rowHandle.write(f"0,{rowId}\n")
rowHandle.close()
manualRowIdx = manualRowIdx + 1
for dataRow in biomData["data"]:
rowIdx = dataRow[0]
rowHandle = open(f"{scratchDir}/{rowIdx}", "a")
rowHandle.write(f"{dataRow[1] + 1},{dataRow[2]}\n")
rowHandle.close()
biomFile.close()
def init_scratch_dir():
if os.path.isdir(scratchDir):
shutil.rmtree(scratchDir)
os.makedirs(scratchDir)
init_scratch_dir()
dump_biom_rows("min_sparse_otu_table.biom")
build_tsv_file("min_sparse_otu_table.biom", "min_sparse_otu_table.tsv")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment