Created
March 6, 2012 23:10
-
-
Save davidwtbuxton/1989671 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# http://www.reddit.com/r/learnpython/comments/qkh43/new_to_python_searching_csv_files/ | |
# http://stackoverflow.com/questions/9564322/loop-through-rows-of-one-csv-file-to-find-corresponding-data-in-another | |
# http://stackoverflow.com/questions/9577997/search-through-csv-from-specific-row-down | |
import csv | |
# Difference constants. Note these are floats, so don't expect perfect decimal | |
# mathematics. | |
DELTA_HI = 0.001 | |
DELTA_LO = 0.0015 | |
def main(filename1, filename2): | |
# Mapping of ID to date string | |
source_dts = dict(csv.reader(open(filename1, 'rU'), delimiter=',')) | |
# Invert the mapping, date string to ID. Having date as key makes searching easy | |
source_dts = dict((v, k) for k, v in source_dts.items()) | |
# Will hold details of matching first rows (i.e. before finding delta row) | |
matches = [] | |
# Will hold details of first rows and deltas (i.e. final results) | |
deltas = [] | |
for row in csv.reader(open(filename2, 'rU'), delimiter=','): | |
dt, bid, ask = row[3:] | |
# Calculate deltas. Need this for checking matches and for storing. | |
bid = float(bid) | |
hi = bid + DELTA_HI | |
lo = bid - DELTA_LO | |
# Check if we have a match from the first file. A match is when datetime is | |
# in the same minute. | |
key = dt[:16] | |
if key in source_dts: | |
# Store a 3-tuple of (high, low, rowdata) | |
data = hi, lo, ([dt, source_dts[key]] + row) | |
matches.append(data) | |
# Remove source entry so we don't match it again | |
del source_dts[key] | |
# Check if we have a match for a previous row. A match is when the bid is | |
# within a previous row's low / high. | |
for idx, (p_hi, p_lo, p_row) in enumerate(matches): | |
# This row's bid has exceeded the delta | |
if (hi > p_hi) or (lo < p_lo): | |
deltas.append((p_row, row)) | |
# Remove from previous rows so we don't match again | |
del matches[idx] | |
break | |
# Deltas should have 2-tuples of row data. For each tuple, first is row data | |
# for the datetime specified in file1, second is earliest row data after first | |
# with a bid that exceeds the delta. | |
return deltas | |
if __name__ == "__main__": | |
import sys | |
f1, f2 = sys.argv[1:3] | |
results = main(f1, f2) | |
for a, b in results: | |
print a, b |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment