Created
June 27, 2016 02:03
-
-
Save tdunning/4e8cc9736b72a2ffd7224895b7c6ef22 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import Counter | |
exchange_codes = { | |
'A': 'NYSE MKT Stock Exchange', | |
'B': 'NASDAQ OMX BX Stock Exchange', | |
'C': 'National Stock Exchange', | |
'D': 'FINRA', | |
'I': 'International Securities Exchange', | |
'J': 'Direct Edge A Stock Exchange', | |
'K': 'Direct Edge X Stock Exchange', | |
'M': 'Chicago Stock Exchange', | |
'N': 'New York Stock Exchange', | |
'T': 'NASDAQ OMX Stock Exchange', | |
'P': 'NYSEArcaSM', | |
'S': 'Consolidated Tape System', | |
'T': 'NASDAQ Stock Exchange ', | |
'Q': 'NASDAQ Stock Exchange ', | |
'W': 'CBOE Stock Exchange', | |
'X': 'NASDAQ OMX PSX Stock Exchange', | |
'Y': 'BATS Y-Exchange', | |
'Z': 'BATS Exchange' | |
} | |
def parse(line): | |
r = {'date':line[0:9], | |
'exchange':exchange_codes[line[9:10]], | |
'symbol':(line[10:16].strip() + '.' + line[16:26].strip()).strip('.'), | |
'saleCondition': line[26:30], | |
'tradeVolume': line[30:39].lstrip('0'), | |
'tradePrice': (line[39:46] + '.' + line[46:50]).strip('0'), | |
'tradeStopStockIndicator': line[50:51], | |
'tradeCorrectionIndicator': line[51:53], | |
'tradeSequenceNumber': line[53:69].lstrip('0'), | |
'tradeSource': line[69:70], | |
'tradeReportingFacility': line[70:71]} | |
if (len(line) != 73): | |
raise ValueError("Expected line to be 73 characters, got " + str(len(line))) | |
return r | |
i = 0 | |
stocks = Counter() | |
exchanges = Counter() | |
hours = Counter() | |
conditions = Counter() | |
conditionCodes = Counter() | |
code0 = Counter() | |
code1 = Counter() | |
code2 = Counter() | |
code3 = Counter() | |
try: | |
with open("taqtrade20131218", "r") as input: | |
for x in input: | |
if (i > 0): | |
v = parse(x) | |
stocks.update([v['symbol']]) | |
exchanges.update([v['exchange']]) | |
hour = v['date'][0:2] | |
hours.update([hour]) | |
conditions.update([v['saleCondition']]) | |
conditionCodes.update(v['saleCondition']) | |
code0.update(v['saleCondition'][0]) | |
code1.update(v['saleCondition'][1]) | |
code2.update(v['saleCondition'][2]) | |
code3.update(v['saleCondition'][3]) | |
i = i+1 | |
print("Hour\tCount") | |
except KeyboardInterrupt: | |
print("interrupted after " + str(i/1e6) + " M lines") | |
print("Hour\tcnt") | |
k = hours.keys() | |
k.sort() | |
for hour in k: | |
print(hour + "\t" + str(hours[hour])) | |
print(str(len(stocks.keys())) + " unique stocks") | |
print("\nStock\tcnt") | |
k = stocks.most_common(20) | |
for stock,cnt in k: | |
print(stock + "\t" + str(cnt)) | |
print("\nExchange\tcnt") | |
k = exchanges.keys() | |
k.sort() | |
for ex in k: | |
print(ex + "\t" + str(exchanges[ex])) | |
print("\nCodes\tcnt") | |
k = conditions.keys() | |
k.sort() | |
for code in k: | |
print(code + "\t" + str(conditions[code])) | |
print("\nChar\tpos\tcnt") | |
k = code0.keys() | |
k.sort() | |
for code in k: | |
print(code + "\t0\t" + str(code0[code])) | |
k = code1.keys() | |
k.sort() | |
for code in k: | |
print(code + "\t1\t" + str(code1[code])) | |
k = code2.keys() | |
k.sort() | |
for code in k: | |
print(code + "\t2\t" + str(code2[code])) | |
k = code3.keys() | |
k.sort() | |
for code in k: | |
print(code + "\t3\t" + str(code3[code])) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment