Skip to content

Instantly share code, notes, and snippets.

@mhebrard
Last active May 26, 2017 09:48
Show Gist options
  • Save mhebrard/34c0985f00a29b466a4e49a6feaa9d96 to your computer and use it in GitHub Desktop.
Save mhebrard/34c0985f00a29b466a4e49a6feaa9d96 to your computer and use it in GitHub Desktop.
Qiime Convertor

Script to convert qiime OTU assignment TXT file to JSON format.

##Input See otu_qiime_L6.txt file. the file should have 2 lines of header, and the sample name in the second line as the example. OTUs should be specify in order in the first column. (you can omit the last level for all lines, but you need to fill all gaps)

// valid
kingdom;phylum;class;order;family;genus
// valid
kingdom;phylum;
// ERROR
kingdom;class;family

##Install and run

  • clone the repository (or download all file)
  • install nodejs
  • install the packages
npm install
  • run the script
node qiime2json.js otu_qiime_L6.txt

##Vizualization The json output file can be used with the majority of the charts from Chi-LIB

  • clonal evolution
  • dendrogram
  • radial
  • sunburst
  • treemap
# Constructed from biom file
#OTU ID s2 s1
Unassigned;Other;Other;Other;Other;Other 0.05 0.02
k__Bacteria;Other;Other;Other;Other;Other 0.10 0.13
k__Bacteria;p__Actinobacteria;c__Actinobacteria;o__Actinomycetales;f__Corynebacteriaceae;g__Corynebacterium 0.08 0.10
k__Bacteria;p__Actinobacteria;c__Actinobacteria;o__Actinomycetales;f__Dermabacteraceae;g__Brachybacterium 0.02 0.0
k__Bacteria;p__Actinobacteria;c__Actinobacteria;o__Actinomycetales;f__Micromonosporaceae;g__Actinoplanes 0.05 0.05
k__Bacteria;p__Bacteroidetes;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Bacteroides 0.0 0.02
k__Bacteria;p__Firmicutes;c__Bacilli;o__Bacillales;f__Bacillaceae;g__Bacillus 0.0 0.08
k__Bacteria;p__Firmicutes;c__Bacilli;o__Bacillales;f__Listeriaceae;g__Listeria 0.10 0.05
k__Bacteria;p__Firmicutes;c__Bacilli;o__Bacillales;f__Planococcaceae;g__Lysinibacillus 0.05 0.0
k__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Enterococcaceae;g__Enterococcus 0.04 0.01
k__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Enterococcaceae;g__Vagococcus 0.08 0.09
k__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Lactobacillaceae;g__Lactobacillus 0.06 0.05
k__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Lactobacillaceae;g__Pediococcus 0.02 0.005
k__Bacteria;p__Proteobacteria;c__Alphaproteobacteria;o__Rhizobiales;f__Phyllobacteriaceae;g__Phyllobacterium 0.05 0.03
k__Bacteria;p__Proteobacteria;c__Alphaproteobacteria;o__Rhodobacterales;f__Rhodobacteraceae;g__Anaerospora 0.09 0.03
k__Bacteria;p__Proteobacteria;c__Alphaproteobacteria;o__Rickettsiales;f__mitochondria;Other 0.06 0.09
k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__Citrobacter 0.01 0.02
k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__Enterobacter 0.03 0.01
k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__Erwinia 0.06 0.0
k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__Proteus 0.03 0.06
k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__Salmonella 0.02 0.06
{
"name": "root",
"children": [
{
"name": "Unassigned",
"id": "2-0",
"children": [
{
"name": "Other",
"id": "2-1",
"children": [
{
"name": "Other",
"id": "2-2",
"children": [
{
"name": "Other",
"id": "2-3",
"children": [
{
"name": "Other",
"id": "2-4",
"children": [
{
"name": "Other",
"id": "2-5",
"children": [
],
"size": 0.05
}
]
}
]
}
]
}
]
}
]
},
{
"name": "Bacteria",
"id": "k__Bacteria",
"children": [
{
"name": "Other",
"id": "3-1",
"children": [
{
"name": "Other",
"id": "3-2",
"children": [
{
"name": "Other",
"id": "3-3",
"children": [
{
"name": "Other",
"id": "3-4",
"children": [
{
"name": "Other",
"id": "3-5",
"children": [
],
"size": 0.1
}
]
}
]
}
]
}
]
},
{
"name": "Actinobacteria",
"id": "p__Actinobacteria",
"children": [
{
"name": "Actinobacteria",
"id": "c__Actinobacteria",
"children": [
{
"name": "Actinomycetales",
"id": "o__Actinomycetales",
"children": [
{
"name": "Corynebacteriaceae",
"id": "f__Corynebacteriaceae",
"children": [
{
"name": "Corynebacterium",
"id": "g__Corynebacterium",
"children": [
],
"size": 0.08
}
]
},
{
"name": "Dermabacteraceae",
"id": "f__Dermabacteraceae",
"children": [
{
"name": "Brachybacterium",
"id": "g__Brachybacterium",
"children": [
],
"size": 0.02
}
]
},
{
"name": "Micromonosporaceae",
"id": "f__Micromonosporaceae",
"children": [
{
"name": "Actinoplanes",
"id": "g__Actinoplanes",
"children": [
],
"size": 0.05
}
]
}
]
}
]
}
]
},
{
"name": "Bacteroidetes",
"id": "p__Bacteroidetes",
"children": [
{
"name": "Bacteroidia",
"id": "c__Bacteroidia",
"children": [
{
"name": "Bacteroidales",
"id": "o__Bacteroidales",
"children": [
{
"name": "Bacteroidaceae",
"id": "f__Bacteroidaceae",
"children": [
{
"name": "Bacteroides",
"id": "g__Bacteroides",
"children": [
],
"size": 0
}
]
}
]
}
]
}
]
},
{
"name": "Firmicutes",
"id": "p__Firmicutes",
"children": [
{
"name": "Bacilli",
"id": "c__Bacilli",
"children": [
{
"name": "Bacillales",
"id": "o__Bacillales",
"children": [
{
"name": "Bacillaceae",
"id": "f__Bacillaceae",
"children": [
{
"name": "Bacillus",
"id": "g__Bacillus",
"children": [
],
"size": 0
}
]
},
{
"name": "Listeriaceae",
"id": "f__Listeriaceae",
"children": [
{
"name": "Listeria",
"id": "g__Listeria",
"children": [
],
"size": 0.1
}
]
},
{
"name": "Planococcaceae",
"id": "f__Planococcaceae",
"children": [
{
"name": "Lysinibacillus",
"id": "g__Lysinibacillus",
"children": [
],
"size": 0.05
}
]
}
]
},
{
"name": "Lactobacillales",
"id": "o__Lactobacillales",
"children": [
{
"name": "Enterococcaceae",
"id": "f__Enterococcaceae",
"children": [
{
"name": "Enterococcus",
"id": "g__Enterococcus",
"children": [
],
"size": 0.04
},
{
"name": "Vagococcus",
"id": "g__Vagococcus",
"children": [
],
"size": 0.08
}
]
},
{
"name": "Lactobacillaceae",
"id": "f__Lactobacillaceae",
"children": [
{
"name": "Lactobacillus",
"id": "g__Lactobacillus",
"children": [
],
"size": 0.06
},
{
"name": "Pediococcus",
"id": "g__Pediococcus",
"children": [
],
"size": 0.02
}
]
}
]
}
]
}
]
},
{
"name": "Proteobacteria",
"id": "p__Proteobacteria",
"children": [
{
"name": "Alphaproteobacteria",
"id": "c__Alphaproteobacteria",
"children": [
{
"name": "Rhizobiales",
"id": "o__Rhizobiales",
"children": [
{
"name": "Phyllobacteriaceae",
"id": "f__Phyllobacteriaceae",
"children": [
{
"name": "Phyllobacterium",
"id": "g__Phyllobacterium",
"children": [
],
"size": 0.05
}
]
}
]
},
{
"name": "Rhodobacterales",
"id": "o__Rhodobacterales",
"children": [
{
"name": "Rhodobacteraceae",
"id": "f__Rhodobacteraceae",
"children": [
{
"name": "Anaerospora",
"id": "g__Anaerospora",
"children": [
],
"size": 0.09
}
]
}
]
},
{
"name": "Rickettsiales",
"id": "o__Rickettsiales",
"children": [
{
"name": "mitochondria",
"id": "f__mitochondria",
"children": [
{
"name": "Other",
"id": "17-5",
"children": [
],
"size": 0.06
}
]
}
]
}
]
},
{
"name": "Gammaproteobacteria",
"id": "c__Gammaproteobacteria",
"children": [
{
"name": "Enterobacteriales",
"id": "o__Enterobacteriales",
"children": [
{
"name": "Enterobacteriaceae",
"id": "f__Enterobacteriaceae",
"children": [
{
"name": "Citrobacter",
"id": "g__Citrobacter",
"children": [
],
"size": 0.01
},
{
"name": "Enterobacter",
"id": "g__Enterobacter",
"children": [
],
"size": 0.03
},
{
"name": "Erwinia",
"id": "g__Erwinia",
"children": [
],
"size": 0.06
},
{
"name": "Proteus",
"id": "g__Proteus",
"children": [
],
"size": 0.03
},
{
"name": "Salmonella",
"id": "g__Salmonella",
"children": [
],
"size": 0.02
}
]
}
]
}
]
}
]
}
]
}
]
}
{
"name": "Quime Convertor",
"version": "1.0.0",
"description": "",
"main": "quime2metabin.js",
"dependencies": {
"graceful-fs": "^4.1.11",
"xo": "^0.18.2"
},
"devDependencies": {
"xo": "^0.18.2"
},
"scripts": {
"test": "xo"
},
"keywords": [],
"author": "Maxime HEBRARD",
"license": "MIT",
"xo": {
"space": 2,
"env": [
"browser",
"node"
]
}
}
// Command lines
// >node qiime2json.js path/to/my/Quime/File.txt path/to/my/output
//
// Quiime
// Unassigned;Other;Other;Other;Other;Other 6.75517826634e-05 0.00298838097282
// k__Bacteria;p__Actinobacteria;c__Actinobacteria;o__Actinomycetales;f__Corynebacteriaceae;g__Corynebacterium 8.44397283292e-06 0.0
// Flare
// {"name":"Not assigned",, id:"Notassigned", "children":[], size: 1}
const fs = require('graceful-fs');
// Variables
const ranks = ['kingdom', 'phylum', 'class', 'order', 'family', 'genus'];
// Read file
if (process.argv[2]) {
fs.readFile(process.argv[2], 'utf8', (err, data) => {
if (err) {
console.log('file error', err);
} else {
const lines = data.split('\n');
// Get samples count
const header = lines[1].split('\t');
console.log(header);
for (let s = 1; s < header.length; s++) {
// For each sample
const sample = header[s];
console.log('sample:', sample);
// Bulid the tree
const tree = buildTree(lines, sample, s);
const out = process.argv[3] || 'qiime2json-output';
writeFile(`${out}-s${s}.json`, tree, s);
}
}
});
}
function buildTree(lines, sample, s) {
const map = {};
const root = {
name: 'root',
id: 'root',
children: []
};
lines.forEach((l, i) => {
// Ignore comments and empty lines
if (l[0] !== '#' && l.length > 0) {
// Parce columns
const vals = l.split('\t');
// Parce taxa string
const taxa = vals[0].split(';');
const value = Number(vals[s]);
let parent = root;
taxa.forEach((m, j) => {
const rank = ranks[j];
console.log('taxa', m, rank, parent.name);
// Get current node
let node = map[m];
if (node === undefined) {
// Node not exist > create it
node = {
name: '',
id: '',
children: []
};
// Parce taxa
const taxon = m.split('__');
if (taxon.length > 1) { // Rank defined
node.name = taxon[1];
node.id = m;
// If taxon defined, we map the node
map[m] = node;
} else { // No rank
node.name = taxon[0];
node.id = `${i}-${j}`;
// If taxon is not defined, no need to map
}
// Add new node to parent
parent.children.push(node);
} // End node undefined
parent = node;
});
// Add value to leaf
parent.size = value;
console.log('----');
}
});
return root;
}
function writeFile(path, data, s) {
fs.writeFile(path, JSON.stringify(data), err => {
if (err) {
console.log(`write error: ${s}: ${err}`);
} else {
console.log(`data saved: ${s} in ${path}`);
}
});
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment