Skip to content

Instantly share code, notes, and snippets.

@macdice
Last active August 29, 2015 13:58
Show Gist options
  • Save macdice/10001471 to your computer and use it in GitHub Desktop.
Save macdice/10001471 to your computer and use it in GitHub Desktop.
-- Midnight graph hacking with your host Thomas Munro <[email protected]>
create table data_source (
id serial primary key,
name text not null
);
create table currency (
id text primary key
);
create table quote_stream (
id serial primary key,
data_source integer not null references data_source(id),
base_currency text not null references currency(id),
quote_currency text not null references currency(id)
);
create table quote (
quote_stream integer not null references quote_stream(id),
time timestamptz not null,
bid decimal not null,
ask decimal not null
);
create index quote_time_idx on quote(quote_stream, time);
create or replace function last_quote_at_time(quote_stream integer, t timestamptz)
returns table (quote_stream integer, t timestamptz, bid decimal, ask decimal)
as
$$
select quote_stream, time, bid, ask
from quote
where quote_stream = $1
and time <= $2
order by time desc
limit 1
$$
language sql;
create or replace function last_quotes_at_time(t timestamptz)
returns table (quote_stream integer, t2 timestamptz, bid decimal, ask decimal)
as
$$
declare
quote_stream integer;
record record;
begin
for quote_stream in select id from quote_stream
loop
return query select * from last_quote_at_time(quote_stream, t);
end loop;
return;
end;
$$
language plpgsql;
create or replace function two_way_rates_at_time(t timestamptz)
returns table (quote_stream integer, side text, from_currency text, to_currency text, rate decimal)
as
$$
-- each quote is expanded to a pair of simple X->Y rates
with quote_at_time as (select q.quote_stream,
i.base_currency,
i.quote_currency,
q.bid,
q.ask,
q.t2
from last_quotes_at_time($1) as q
join quote_stream i on q.quote_stream = i.id)
select quote_stream, 'BUY', quote_currency, base_currency, ask
from quote_at_time
union all
select quote_stream, 'SELL', base_currency, quote_currency, 1 / bid
from quote_at_time
$$
language sql;
create type edge_descriptor as (
quote_stream integer,
side text,
from_currency text,
to_currency text,
rate decimal
);
-- From http://stackoverflow.com/questions/8798055/finding-the-position-of-a-value-in-postgresql-arrays
create or replace function array_search(needle anyelement, haystack anyarray)
returns int as $$
select i
from generate_subscripts($2, 1) as i
where $2[i] = $1
order by i
$$ language sql stable;
-- The Bellman-Ford algorithm hacked up from pseudo-code at:
-- http://en.wikipedia.org/wiki/Bellman%E2%80%93Ford_algorithm
-- Adjusted for products rather than sums, using natural log, as described at:
-- http://math.stackexchange.com/questions/271809/does-dijkstras-algorithm-work-when-i-multiply-weights-of-successive-nodes
-- Also hacked to track the 'route' through the graph via edges,
-- unlike the Wikipedia pseudo-code which only tracks the predecessor,
-- since we might have more than one edge between two given vertices
-- (I mean we might have two brokers quoting EUR/USD with different
-- rates and we need to find the best one, so there are TWO edges
-- between EUR and USD).
create or replace function cheapest_path_at_time(t timestamptz, from_currency text, to_currency text)
returns table (quote_stream integer, side text, rate decimal)
as
$$
declare
vertices text[]; -- the distinct vertices
edges edge_descriptor[]; -- the edges
edge_from integer[]; -- the edge sources as index to vertices
edge_to integer[]; -- the edge targets as index to vertices
edge_weight double precision[]; -- the edge weights, log(rate)
distance double precision[]; -- computed distances
predecessor integer[]; -- computed path as index to vertices
route integer[]; -- arrival edge index indexed by vertex
edge integer; -- temp variable
u integer; -- ditto
v integer; -- ditto
w double precision; -- ditto
to_currency_index integer; -- to currency as index to vertices
path integer[]; -- where we build the result
begin
edges := array(select two_way_rates_at_time(t));
vertices := array(select distinct e.from_currency from unnest(edges) e);
edge_from := array(select array_search(e.from_currency, vertices) from unnest(edges) e);
edge_to := array(select array_search(e.to_currency, vertices) from unnest(edges) e);
edge_weight := array(select ln(e.rate) from unnest(edges) e);
--raise notice 'edge_from = %', edge_from;
--raise notice 'edge_to = %', edge_to;
--raise notice 'edges = %', edges;
--raise notice 'vertices = %', vertices;
--raise notice 'edge_weight = %', edge_weight;
to_currency_index := array_search(to_currency, vertices);
if to_currency_index is null then
raise exception 'Currency % is unknown or unreachable', to_currency;
-- or just return no rows?
end if;
-- Step 1: initialize graph
for v in 1..array_length(vertices, 1) loop
if vertices[v] = from_currency then
distance[v] := 0;
else
distance[v] := 'Infinity';
end if;
predecessor[v] := null;
route[v] := null;
end loop;
--raise notice 'distance = %', distance;
--raise notice 'predecessor = %', predecessor;
--raise notice 'route = %', route;
-- Step 2: relax edges repeatedly
for i in 1..array_length(vertices, 1) - 1 loop
--raise notice 'i = %', i;
for edge in 1..array_length(edges, 1) loop
--raise notice 'edge = %', edge;
u := edge_from[edge];
v := edge_to[edge];
w := edge_weight[edge];
if distance[u] + w < distance[v] then
distance[v] := distance[u] + w;
predecessor[v] := u;
route[v] := edge;
end if;
end loop;
end loop;
--raise notice 'distance = %', distance;
--raise notice 'predecessor = %', predecessor;
--raise notice 'route = %', route;
-- Step 3: check for negative-weight cycles
for edge in 1..array_length(edges, 1) loop
u := edge_from[edge];
v := edge_to[edge];
w := edge_weight[edge];
if distance[u] + w < distance[v] then
raise exception 'Graph contains a negative-weight cycle';
end if;
end loop;
-- decode the results to find the path to to_currency,
-- building path in the right order
edge := route[to_currency_index];
while edge is not null loop
path := edge || path;
edge := route[edge_from[edge]];
end loop;
-- generate the result rows
foreach edge in array path loop
quote_stream := edges[edge].quote_stream;
side := edges[edge].side;
rate := edges[edge].rate;
return next;
end loop;
end;
$$
language plpgsql;
insert into data_source values (1, 'Banque de Foo'), (2, 'Banco de Bar');
insert into currency values ('AUD'), ('USD'), ('EUR'), ('GBP'), ('BTC');
insert into quote_stream values (1, 1, 'BTC', 'USD'),
(2, 1, 'AUD', 'USD'),
(3, 1, 'EUR', 'USD'),
(4, 1, 'EUR', 'GBP'),
(5, 2, 'EUR', 'USD');
insert into quote values (1, '2014-01-01 12:00:00+00'::timestamptz, 440, 450),
(1, '2014-01-01 12:05:00+00'::timestamptz, 450, 460),
(2, '2014-01-01 12:01:00+00'::timestamptz, 0.9242, 0.9244),
(3, '2014-01-01 12:01:00+00'::timestamptz, 1.3790, 1.3792),
(4, '2014-01-01 12:01:00+00'::timestamptz, 0.8294, 0.8296),
(5, '2014-01-01 12:01:00+00'::timestamptz, 1.3790, 1.3791);
-- Ok check dis:
-- Here are our quote streams:
hack=> select * from quote_stream;
id | data_source | base_currency | quote_currency
----+-------------+---------------+----------------
1 | 1 | BTC | USD
2 | 1 | AUD | USD
3 | 1 | EUR | USD
4 | 1 | EUR | GBP
5 | 2 | EUR | USD
(5 rows)
-- Here are the active (ie most recent) quotes at time 'now()':
hack=> select * from last_quotes_at_time(now());
quote_stream | t2 | bid | ask
--------------+------------------------+--------+--------
1 | 2014-01-01 12:05:00+00 | 450 | 460
2 | 2014-01-01 12:01:00+00 | 0.9242 | 0.9244
3 | 2014-01-01 12:01:00+00 | 1.3790 | 1.3792
4 | 2014-01-01 12:01:00+00 | 0.8294 | 0.8296
5 | 2014-01-01 12:01:00+00 | 1.3790 | 1.3791
(5 rows)
-- Here is the same information exploded into individual edges:
hack=> select * from two_way_rates_at_time(now());
quote_stream | side | from_currency | to_currency | rate
--------------+------+---------------+-------------+------------------------
1 | BUY | USD | BTC | 460
2 | BUY | USD | AUD | 0.9244
3 | BUY | USD | EUR | 1.3792
4 | BUY | GBP | EUR | 0.8296
5 | BUY | USD | EUR | 1.3791
1 | SELL | BTC | USD | 0.00222222222222222222
2 | SELL | AUD | USD | 1.0820168794633196
3 | SELL | EUR | USD | 0.72516316171138506164
4 | SELL | EUR | GBP | 1.2056908608632747
5 | SELL | EUR | USD | 0.72516316171138506164
(10 rows)
-- Given those the rates, and the desire to get from BTC to GBP,
-- find the cheapest path:
hack=> select * from cheapest_path_at_time(now(), 'BTC', 'GBP');
quote_stream | side | rate
--------------+------+------------------------
1 | SELL | 0.00222222222222222222
5 | BUY | 1.3791
4 | SELL | 1.2056908608632747
(3 rows)
In English, this says you should:
sell BTC/USD to go BTC->USD with quote stream 1 (data source 1)
buy EUR/USD to go USD->EUR with quote stream 5 (data source 2)
sell EUR/GBP to go EUR->GBP with quote stream 4 (data source 1)
Note that in the second step it correctly picked stream 5 over stream 3
because it's one pip cheaper to buy (ie the ask quote).
The total effective rate is the product of the rates shown, so
0.003695040591555365, or 1/270.63... which is about right for
the BTC/GBP rate.
Hardly tested but it seems to work.
Two problems I imagine:
* there may occasionally be negative weight cycles, that is, a path
you can go around repeatedly to get free money, but these would
most likely be mirages caused by stale data or some other data
quality issue. These cycles do occasionally exist in real
life currency markets -- see http://scenic.princeton.edu/network20q/wiki/index.php?title=Bellman_Ford_Application:_Currency_Arbitrage
that may sound like a fun prospect but it breaks this code because
it means there is no 'cheapest' path, it could always go around
once more to get cheaper.
* this doesn't account for stale quotes, like, some feed you set
up that has a most recent quote from last week sitting there
which is actually bogus -- probably need to introduce the concept
of quote expiry time, and ignore quotes older than that
@richardsj
Copy link

Luvvley! How come the cheapest_path_at_time() results are in reverse order?

@macdice
Copy link
Author

macdice commented Apr 7, 2014

I could fix that by explicitly reversing the results... The result of BF is the shortest path from your chosen source node to every other node, and for each node it tells you which one is the 'predecessor' in the path from the source (unlike the Wikipedia article's pseudocode I actually record not just the predecessor node but also the edge used, as 'route', since you could have multiple edges between two nodes eg quotes from several banks for the same rate, whereas the pseudocode given would just tell you the order of the nodes to visit) . So in that last bit at line 194 you can see I have to find the destination node, and then hop back edge by edge to the source, to get the full path, feeding the result rows out as I go. Backwards!

@macdice
Copy link
Author

macdice commented Apr 7, 2014

Fixed, the result of cheapest_path_at_time is now in forwards order.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment