- MariaDB (10.1.23-MariaDB-9+deb9u1)
- Virtual server (2.5Ghz single core, 3GB RAM)
- 701,241 spans
- 3,508,959 annotations
Old
SELECT DISTINCT
`zipkin_spans`.`trace_id_high`,
`zipkin_spans`.`trace_id`,
MAX(`zipkin_spans`.`start_ts`)
FROM
`zipkin_spans`
JOIN
`zipkin_annotations` ON (`zipkin_spans`.`trace_id_high` = `zipkin_annotations`.`trace_id_high`
AND `zipkin_spans`.`trace_id` = `zipkin_annotations`.`trace_id`
AND `zipkin_spans`.`id` = `zipkin_annotations`.`span_id`)
WHERE
(`zipkin_spans`.`start_ts` BETWEEN ? AND ?
AND `zipkin_annotations`.`endpoint_service_name` = ?)
GROUP BY `zipkin_spans`.`trace_id_high` , `zipkin_spans`.`trace_id`
ORDER BY MAX(`zipkin_spans`.`start_ts`) DESC
LIMIT 10
New
SELECT
trace_id_high, trace_id
FROM
(SELECT
trace_id_high, trace_id, id, MAX(start_ts) AS start_ts
FROM
zipkin_spans
WHERE
start_ts BETWEEN ? AND ?
GROUP BY trace_id_high , trace_id, id) AS zipkin_spans
WHERE
(trace_id_high, trace_id, id) IN (SELECT trace_id_high, trace_id, span_id FROM zipkin_annotations WHERE endpoint_service_name = ?)
ORDER BY start_ts DESC
LIMIT 10
Query | 1h time span | 12h | 24h |
---|---|---|---|
Old | <1s | ~41s | ~50s |
New | <1s | <1s | <1s |
Old
SELECT DISTINCT
`zipkin_spans`.`trace_id_high`,
`zipkin_spans`.`trace_id`,
MAX(`zipkin_spans`.`start_ts`)
FROM
`zipkin_spans`
JOIN
`zipkin_annotations` ON (`zipkin_spans`.`trace_id_high` = `zipkin_annotations`.`trace_id_high`
AND `zipkin_spans`.`trace_id` = `zipkin_annotations`.`trace_id`
AND `zipkin_spans`.`id` = `zipkin_annotations`.`span_id`)
WHERE
(`zipkin_spans`.`start_ts` BETWEEN ? AND ?
AND `zipkin_annotations`.`endpoint_service_name` = ?
AND `zipkin_spans`.`duration` >= ?)
GROUP BY `zipkin_spans`.`trace_id_high` , `zipkin_spans`.`trace_id`
ORDER BY MAX(`zipkin_spans`.`start_ts`) DESC
LIMIT 10
New
SELECT
trace_id_high, trace_id
FROM
(SELECT
trace_id_high, trace_id, id, MAX(start_ts) AS start_ts
FROM
zipkin_spans
WHERE
start_ts BETWEEN ? AND ?
AND duration >= ?
GROUP BY trace_id_high , trace_id, id) AS zipkin_spans
WHERE
(trace_id_high, trace_id, id) IN (SELECT trace_id_high, trace_id, span_id FROM zipkin_annotations WHERE endpoint_service_name = ?)
ORDER BY start_ts DESC
LIMIT 10
Query | 1h, long* | 12h, long | 24h, long |
---|---|---|---|
Old | ~1s | ~20s | ~28s |
New | <1s | ~2s | ~2s |
- long being >= 100ms (about 1/7th of spans fit this criteria)
Old
SELECT DISTINCT
`zipkin_spans`.`trace_id_high`,
`zipkin_spans`.`trace_id`
FROM
`zipkin_spans`
JOIN
`zipkin_annotations` ON (`zipkin_spans`.`trace_id_high` = `zipkin_annotations`.`trace_id_high`
AND `zipkin_spans`.`trace_id` = `zipkin_annotations`.`trace_id`
AND `zipkin_spans`.`id` = `zipkin_annotations`.`span_id`)
JOIN
`zipkin_annotations` AS `a0` ON (`zipkin_spans`.`trace_id_high` = `a0`.`trace_id_high`
AND `zipkin_spans`.`trace_id` = `a0`.`trace_id`
AND `zipkin_spans`.`id` = `a0`.`span_id`
AND `a0`.`a_key` = ?
AND `a0`.`endpoint_service_name` = ?)
WHERE
(`zipkin_spans`.`start_ts` BETWEEN ? AND ?
AND `zipkin_annotations`.`endpoint_service_name` = ?)
GROUP BY `zipkin_spans`.`trace_id_high` , `zipkin_spans`.`trace_id`
ORDER BY MAX(`zipkin_spans`.`start_ts`) DESC
LIMIT 10
New
SELECT
trace_id_high, trace_id
FROM
(SELECT
trace_id_high, trace_id, id, MAX(start_ts) AS start_ts
FROM
zipkin_spans
WHERE
start_ts BETWEEN ? AND ?
GROUP BY trace_id_high , trace_id, id) AS zipkin_spans
WHERE
(trace_id_high, trace_id, id) IN (SELECT trace_id_high, trace_id, span_id FROM zipkin_annotations WHERE endpoint_service_name = ?)
AND (trace_id_high, trace_id, id) IN (SELECT trace_id_high, trace_id, span_id FROM zipkin_annotations WHERE a_key = ?)
ORDER BY start_ts DESC
LIMIT 10
Query | Annotation | 1h | 12h | 24h |
---|---|---|---|---|
Old | Common | ~2m 30s | ~3m | ~3m |
Old | Uncommon | ~2s | ~3s | ~3s |
Old | Both (an additional join) | ~6s | ~8s | ~7s |
New | Common | <1s | ~1s | <1s |
New | Uncommon | <1s | ~3s | ~4s |
New | Both (an additional in clause) | <1s | ~3s | ~4s |
Old
SELECT DISTINCT
`zipkin_spans`.`trace_id_high`,
`zipkin_spans`.`trace_id`
FROM
`zipkin_spans`
JOIN
`zipkin_annotations` ON (`zipkin_spans`.`trace_id_high` = `zipkin_annotations`.`trace_id_high`
AND `zipkin_spans`.`trace_id` = `zipkin_annotations`.`trace_id`
AND `zipkin_spans`.`id` = `zipkin_annotations`.`span_id`)
JOIN
`zipkin_annotations` AS `a0` ON (`zipkin_spans`.`trace_id_high` = `a0`.`trace_id_high`
AND `zipkin_spans`.`trace_id` = `a0`.`trace_id`
AND `zipkin_spans`.`id` = `a0`.`span_id`
AND `a0`.`a_type` = 6
AND `a0`.`a_key` = ?
AND `a0`.`a_value` = ?
AND `a0`.`endpoint_service_name` = ?)
WHERE
(`zipkin_spans`.`start_ts` BETWEEN ? AND ?
AND `zipkin_annotations`.`endpoint_service_name` = ?)
GROUP BY `zipkin_spans`.`trace_id_high` , `zipkin_spans`.`trace_id`
ORDER BY MAX(`zipkin_spans`.`start_ts`) DESC
LIMIT 10
New
SELECT
trace_id_high, trace_id
FROM
(SELECT
trace_id_high, trace_id, id, MAX(start_ts) AS start_ts
FROM
zipkin_spans
WHERE
start_ts BETWEEN ? AND ?
GROUP BY trace_id_high , trace_id, id) AS zipkin_spans
WHERE
(trace_id_high, trace_id, id) IN (SELECT trace_id_high, trace_id, span_id FROM zipkin_annotations WHERE endpoint_service_name = ?)
AND (trace_id_high, trace_id, id) IN (SELECT trace_id_high, trace_id, span_id FROM zipkin_annotations WHERE a_type = 6 AND a_key = ? AND a_value = ?)
ORDER BY start_ts DESC
LIMIT 10
Query | Annotation | 1h | 12h | 24h |
---|---|---|---|---|
Old | Common key, common value | ~3s | ~1m 28s | ~1m 48s |
Old | Common key, uncommon value | 0 (no results) | ~1m 30s | ~1m 46s |
Old | Uncommon key | ~3s | ~4s | ~4s |
New | Common key, common value | <1s | <1s | <1s |
New | Common key, uncommon value | ~1s (no results) | ~45s | ~50s |
New | Uncommon key | ~3s | ~4s | ~4s |
After trying to optimize the queries I did some analysis of the indexes. I created the following indexes;
create index a1 on zipkin_annotations (endpoint_service_name, trace_id_high, trace_id, span_id);
create index a2 on zipkin_annotations (a_key, trace_id_high, trace_id, span_id)
create index a3 on zipkin_annotations (a_type, a_key, a_value(50), trace_id_high, trace_id, span_id)
create index s1 on zipkin_spans (start_ts, duration, trace_id_high, trace_id, id)
To help with the various sub-queries used.
Query | 1h time span | 12h | 24h |
---|---|---|---|
Old | <1s | ~41s | ~50s |
New | <1s | <1s | <1s |
New, indexed | <1s | <1s | <1s |
Query | 1h, long | 12h, long | 24h, long |
---|---|---|---|
Old | ~1s | ~20s | ~28s |
New | <1s | ~2s | ~2s |
New, indexed | <1s | <1s | <1s |
Query | Annotation | 1h | 12h | 24h |
---|---|---|---|---|
Old | Common | ~2m 30s | ~3m | ~3m |
Old | Uncommon | ~2s | ~3s | ~3s |
Old | Both (an additional join) | ~6s | ~8s | ~7s |
New | Common | <1s | ~1s | <1s |
New | Uncommon | <1s | ~3s | ~4s |
New | Both (an additional in clause) | <1s | ~3s | ~4s |
New, indexed | Common | <1s | <1s | <1s |
New, indexed | Uncommon | <1s | ~1s | <1s |
New, indexed | Both (an additional in clause) | <1s | <1s | <1s |
Query | Annotation | 1h | 12h | 24h |
---|---|---|---|---|
Old | Common key, common value | ~3s | ~1m 28s | ~1m 48s |
Old | Common key, uncommon value | 0 (no results) | ~1m 30s | ~1m 46s |
Old | Uncommon key | ~3s | ~4s | ~4s |
New | Common key, common value | <1s | <1s | <1s |
New | Common key, uncommon value | ~1s (no results) | ~45s | ~50s |
New | Uncommon key | ~3s | ~4s | ~4s |
New, indexed | Common key, common value | <1s | <1s | <1s |
New, indexed | Common key, uncommon value | ~1s (no results) | <1s | <1s |
New, indexed | Uncommon key | <1s | <1s | ~1s |
These results are very unscientific and simply me running them a couple of times and recording the time. No idea how it interacts with optimizations/caches on the MariaDB side. They seem a bit too good to be true right now and hopefully someone else can try and replicate my results.
I have also not done any analysis on the write cost of the new indexes (or if some old ones can be removed) so that should warrant some further testing.