johnjosephhorton · October 2, 2011 03:23
diff --git a/bls_data_get.py b/bls_data_get.py
 import urllib2
 import csv

 FIRST_LINE = 11
 LAST_LINE = 38

 def get_level(l):
    for i, char in enumerate(l):
        if char != " ":
            break
    return i

 def clean_line(l):
    l = l.replace("\r","")
    l = l.replace("\n","")
    l = l.split("    ")
    return [y.strip() for y in l if y!=""]

 f = urllib2.urlopen("ftp://ftp.bls.gov/pub/suppl/empsit.tab1.txt")
 lines = [line for line in f][FIRST_LINE:LAST_LINE]
 levels = map(get_level, lines)
 data_rows = map(clean_line, lines)
 headings = [y[0] for y in data_rows]

 d_order = dict(zip(headings, range(len(headings))))
 d_level = dict(zip(headings, levels))

 def one_up(heading):
    candidates = headings[:d_order[heading]]
    heading_level = d_level[heading]
    candidates.reverse()
    for c in candidates:
        if d_level[c] < heading_level:
            return c
    else:
        return None

 def crumb_trail(heading):
    if one_up(heading) is None:
        return [heading]
    else:
        return crumb_trail(one_up(heading)) + [heading]

 crumb_trails = map(crumb_trail, headings)
 max_depth = max(map(len, crumb_trails))

 g = open("bls.csv", "w")
 header = ["level_%s" % i for i in range(max_depth)] + [
    "depth", "level", "normal_seasonal_movement", "estimated_over_month_change",
 "sa_adjusted_over_month_change"]

 hier = dict(zip(set(levels), range(len(set(levels)))))
 def get_hier(heading): return hier[d_level[heading]]

 out = csv.writer(g)
 out.writerow(header)
 for trail, data_row in zip(crumb_trails, data_rows):
    industry = [None for i in range(max_depth)]
    indices = map(get_hier, trail)
    depth = max(indices)
    for name, i in zip(trail, indices):
        industry[i] = name
    out.writerow(industry + [depth] + data_row[:3])
 g.close()
	import urllib2
	import csv

	FIRST_LINE = 11
	LAST_LINE = 38

	def get_level(l):
	for i, char in enumerate(l):
	if char != " ":
	break
	return i

	def clean_line(l):
	l = l.replace("\r","")
	l = l.replace("\n","")
	l = l.split(" ")
	return [y.strip() for y in l if y!=""]

	f = urllib2.urlopen("ftp://ftp.bls.gov/pub/suppl/empsit.tab1.txt")
	lines = [line for line in f][FIRST_LINE:LAST_LINE]
	levels = map(get_level, lines)
	data_rows = map(clean_line, lines)
	headings = [y[0] for y in data_rows]

	d_order = dict(zip(headings, range(len(headings))))
	d_level = dict(zip(headings, levels))

	def one_up(heading):
	candidates = headings[:d_order[heading]]
	heading_level = d_level[heading]
	candidates.reverse()
	for c in candidates:
	if d_level[c] < heading_level:
	return c
	else:
	return None

	def crumb_trail(heading):
	if one_up(heading) is None:
	return [heading]
	else:
	return crumb_trail(one_up(heading)) + [heading]

	crumb_trails = map(crumb_trail, headings)
	max_depth = max(map(len, crumb_trails))

	g = open("bls.csv", "w")
	header = ["level_%s" % i for i in range(max_depth)] + [
	"depth", "level", "normal_seasonal_movement", "estimated_over_month_change",
	"sa_adjusted_over_month_change"]

	hier = dict(zip(set(levels), range(len(set(levels)))))
	def get_hier(heading): return hier[d_level[heading]]

	out = csv.writer(g)
	out.writerow(header)
	for trail, data_row in zip(crumb_trails, data_rows):
	industry = [None for i in range(max_depth)]
	indices = map(get_hier, trail)
	depth = max(indices)
	for name, i in zip(trail, indices):
	industry[i] = name
	out.writerow(industry + [depth] + data_row[:3])
	g.close()