Skip to content

Instantly share code, notes, and snippets.

@nrtkbb
Created April 1, 2015 16:42
Show Gist options
  • Save nrtkbb/75dac4a15a0e08f2374a to your computer and use it in GitHub Desktop.
Save nrtkbb/75dac4a15a0e08f2374a to your computer and use it in GitHub Desktop.
tsvのファイル名とカラム名の一覧をtsvで出力するコマンド
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import re
import sys
def read_first_line(filename, filepath):
filepath = os.path.join(filepath, filename)
try:
with open(filepath, mode='r') as f:
line = f.readline()
while line and line[0] == '#':
line = f.readline()
return line
except IOError:
print(u'Not allowed to read file to this path "{}".'
.format(filepath))
if __name__ == '__main__':
scriptdir = os.path.abspath(os.path.dirname(__file__))
masterdir = os.path.join(scriptdir, u'your/tsv/dir')
for masterfile in [f for f in os.listdir(masterdir) if re.match('^[a-z_]+\.tsv$', f)]:
firstline = read_first_line(masterfile, masterdir)
if not firstline:
continue
columns = firstline.rstrip('\n').split('\t')
res = ['{}\t{}'.format(masterfile, columns[0])]
for column in columns[1:]:
res.append('\t{}'.format(column))
print '\n'.join(res)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment