Skip to content

Instantly share code, notes, and snippets.

@simg
Last active September 30, 2019 05:18
Show Gist options
  • Save simg/2f28e9dcb6207dbaa11a285021935fe2 to your computer and use it in GitHub Desktop.
Save simg/2f28e9dcb6207dbaa11a285021935fe2 to your computer and use it in GitHub Desktop.
Benchmarking the performance of creating entity relationships with Postgres Arrays compared to "join tables"

A very common pattern in relational databases is the use of a join table to create one-to-many or many-to-many relationships between entities.

eg, something like:

CREATE TABLE table1 ( 
  id serial PRIMARY KEY 
, acolumn character varying); 
            
CREATE TABLE table2 ( 
  id serial PRIMARY KEY 
, bcolumn character varying); 
          
CREATE TABLE table1_table2 ( 
  table1_id int REFERENCES table1 (id) ON UPDATE CASCADE ON DELETE CASCADE 
, table2_id int REFERENCES table2 (id) ON UPDATE CASCADE 
, CONSTRAINT table1_table2_pkey PRIMARY KEY (table1_id, table2_id) 
);

This has often kept me awake at nights "worrying" about the performance penalty of all that extra disk IO to maintain these relations compared to what should be possible using Postgres Array fields.

eg, something like:

CREATE TABLE table3 ( 
  id serial PRIMARY KEY 
, ccolumn character varying 
, table4key integer[] );

Anyway, I finally got around to running some benchmarks on the two approaches, which are shown below:

The differences aren't quite as much as I'd imagined:

Number of Relations Benchmark - operations / second 1 5 25 100
INSERT: Join Table 627 521 316 136
INSERT: Array References 599 536 438 266
UPDATE: Join Table 2767 2727 2361 1883
UPDATE: Array References 2904 2629 2255 1867
SELECT ROW: Join Table 814 782 755 758
SELECT ROW: Array References 944 701 254 46
SELECT OBJECT: Join Tables 550 533 546 496
SELECT OBJECT: Array References 2509 2442 2459 2512
SELECT with Limit rows 10 50 1000 10000
SELECT LIST: Join Table 3210 3174 3236 3137
SELECT LIST: Array References 3309 3299 3293 3164

SELECT ROW - selects a single row from the primary table and joins it to the secondary table

SELECT OBJECT - selects and returns single row and the client initiates a second select to the secondary table (which is how most ORMS / clients actually work when doing "eager fetches")

SELECT LIST - selects a large-ish number of rows from a JOIN between the primary and secondary tables.

I haven't yet got around to running REVERSE searches (like finding rows in the primary table that have a relationship with specific rows in the secondary table, but I'll update this page when I get around to it.)

It's quite easy to create an index on Array columns, so I'm not anticipating any problems.

eg:

CREATE INDEX table4id ON table3 USING Btree (table4Key);

I suspect there are further advantages to arrays but I'll need better benchmarks to highlight them. (Like my benchmark is only single threaded and I think it's possible that arrays will scale better than join tables, but it's just a hunch).

var benchmark = require("benchmark")
, pg = require("pg")
var conString = "postgres://postgres:<yourpassword>@localhost/pgtest";
var table1HighestKey = 0; // TODO: find a way to avoid globals
var table3HighestKey = 0;
// used to record start key when testing at each level of relations so that SELECT ROW functions can pick rows with the correct number of relations
var table1BreakKeys = {}; // TODO: find a way to avoid globals
var table3BreakKeys = {};
var nextId = 1;
pg.connect(conString, function(err, db, done) {
if (err) return console.error('error fetching client from pool', err);
createTables(db, () => {
var bench = new benchmark.Suite("Postgres Arrays vs Join Tables", {
onComplete: () => {
done();
console.log("done");
process.exit();
}
});
var num_relations = [1, 5, 25, 100];
num_relations.forEach((num) => {
bench
.add({
name:"INSERT: Join Tables (" + num + " relations)"
, defer: true
, onStart: () => {
console.log(" "); // add a blank line add the end of each num_relations group
}
, fn : (deferred) => {
insertRowJoinTables(db, num, () => {
deferred.resolve();
});
}
})
.add({
name:"INSERT: Array References (" + num + " relations)"
, defer: true
, fn : (deferred) => {
insertRowArrayReferences(db, num, () => {
deferred.resolve();
});
}
})
.add({
name:"UPDATE: Join Tables (" + num + " relations)"
, defer: true
, setup: () => {
nextId = 1;
}
, fn: (deferred) => {
updateRowJoinTables(db, num, () => {
nextId++;
deferred.resolve();
});
}
})
.add({
name:"UPDATE: Array References (" + num + " relations)"
, defer: true
, setup: () => {
nextId = 1;
}
, fn: (deferred) => {
updateRowArrayReferences(db, num, () => {
nextId++;
deferred.resolve();
});
}
})
.add({
name:"SELECT ROW: Join Tables (" + num + " relations)"
, defer: true
, setup: () => {
nextId = table1BreakKeys[num] || 1; // if no breakKeys value default to 1
}
, fn: (deferred) => {
selectRowJoinTables(db, () => {
nextId++;
deferred.resolve();
});
}
})
.add({
name:"SELECT ROW: Array References (" + num + " relations)"
, defer: true
, setup: () => {
nextId = table3BreakKeys[num] || 1; // if no breakKeys value default to 1
}
, fn: (deferred) => {
selectRowArrayReferences(db, () => {
nextId++;
deferred.resolve();
});
}
})
.add({
name:"SELECT OBJECT SUBSELECT: Join Tables (" + num + " relations)"
, defer: true
, setup: () => {
nextId = table1BreakKeys[num] || 1; // if no breakKeys value default to 1
}
, fn: (deferred) => {
selectObjectsSubselectTables(db, () => {
nextId++;
deferred.resolve();
});
}
})
.add({
name:"SELECT OBJECT: Array References (" + num + " relations)"
, defer: true
, setup: () => {
nextId = table3BreakKeys[num] || 1; // if no breakKeys value default to 1
}
, fn: (deferred) => {
selectObjectArrayReferences(db, () => {
nextId++;
deferred.resolve();
});
}
})
});
var limits = [10, 50, 1000, 10000];
limits.forEach((limit) => {
bench.add({
name:"SELECT LIST: Join Tables (LIMIT " + limit + ")"
, defer: true
, onStart: () => {
console.log(" "); // add a blank line add the end of each num_relations group
}
, fn: (deferred) => {
selectListJoinTables(db, limit, () => {
deferred.resolve();
});
}
})
.add({
name:"SELECT LIST: Array References (LIMIT " + limit + ")"
, defer: true
, fn: (deferred) => {
selectListArrayReferences(db, limit, () => {
deferred.resolve();
});
}
});
});
bench.on('cycle', function(event) {
console.log(String(event.target));
})
.run({async:true, defer:true});
});
});
function insertRowArrayReferences (db, num_relations, done) {
var sql = `
START TRANSACTION;
WITH t4 AS (
INSERT INTO table4 (dcolumn) VALUES
` + buildRowSql('test dcolumn insert', num_relations) + `
RETURNING *
)
INSERT INTO table3 (ccolumn, table4key) VALUES ('test ccolumn', (
SELECT ARRAY(select id from t4)
)) RETURNING *;
END TRANSACTION;
`;
db.query(sql, [], (err, result) => {
if (result && result.rows.length > 0) {
if (result.rows[0].id > table3HighestKey) {
table3HighestKey = result.rows[0].id;
}
if (table3BreakKeys[num_relations] == undefined) {
table3BreakKeys[num_relations] = result.rows[0].id;
}
}
done();
});
}
function insertRowJoinTables (db, num_relations, done) {
var sql = `
START TRANSACTION;
WITH t1 AS (
INSERT INTO table1 (acolumn) VALUES ('test acolumn') RETURNING *
), t2 AS (
INSERT INTO table2 (bcolumn) VALUES
` + buildRowSql('test dcolumn insert', num_relations) + `
RETURNING *
)
INSERT INTO table1_table2 (table1_id, table2_id)
SELECT (SELECT id from t1), id from t2;
END TRANSACTION;
`;
db.query(sql, [], (err, result) => {
if (result && result.rows.length > 0) {
if (result.rows[0].id > table1HighestKey) {
table1HighestKey = result.rows[0].id;
}
if (table1BreakKeys[num_relations] == undefined) {
table1BreakKeys[num_relations] = result.rows[0].id;
}
}
done();
});
}
function updateRowArrayReferences (db, num_relations, done) {
var sql = `
START TRANSACTION;
WITH t4 AS (
INSERT INTO table4 (dcolumn) VALUES
` + buildRowSql('test dcolumn update', num_relations) + `
RETURNING *
)
UPDATE table3
set table4key = array_cat(table4key, (SELECT ARRAY(select id from t4)))
where id = $1;
END TRANSACTION;
`;
db.query(sql, [nextId], () => {
done();
});
}
function updateRowJoinTables (db, num_relations, done) {
var sql = `
START TRANSACTION;
WITH t2 AS (
INSERT INTO table2 (bcolumn) VALUES
` + buildRowSql('test dcolumn update', num_relations) + `
RETURNING *
)
INSERT INTO table1_table2 (table1_id, table2_id)
SELECT $1, (SELECT id from t2);
END TRANSACTION;
`;
db.query(sql, [nextId], () => {
done();
});
}
function selectRowArrayReferences (db, done) {
var sql = `
SELECT * FROM table3 t3 JOIN table4 t4 ON t4.id = ANY (t3.table4key)
WHERE t3.id = $1;
`;
db.query(sql, [nextId], () => {
done();
});
}
function selectRowJoinTables (db, done) {
var sql = `
SELECT * FROM table1 t1
JOIN table1_table2 jt on t1.id = jt.table1_id
JOIN table2 t2 on jt.table2_id = t2.id
WHERE t1.id = $1;
`;
db.query(sql, [nextId], () => {
done();
});
}
function selectObjectArrayReferences (db, done) {
var sql = "SELECT * FROM table3 WHERE t3.id = $1;";
db.query(sql, [nextId], (err, results) => {
if (err) return done(err);
if (results.rows.length > 0) {
var sql = "select * from table4 t4 where id in ($1)";
db.query(sql, [results.rows[0].table4key], (err, results) => {
done();
});
} else {
done();
}
});
}
function selectObjectsSubselectTables (db, done) {
var sql = "SELECT * FROM table1 t1 WHERE id = $1;";
db.query(sql, [nextId], (err, results) => {
if (err) return done(err);
if (results.rows.length > 0) {
var sql = `select * from table2
where id in (
SELECT table2_id from table1_table2 where table1_id = $1
)
`;
db.query(sql, [nextId], (err, results) => {
done();
});
} else {
done();
}
});
}
function selectListArrayReferences (db, limit, done) {
var sql = `
SELECT * FROM table3 t3 JOIN table4 t4 ON t4.id = ANY (t3.table4key) LIMIT $1^;
`;
db.query(sql, [limit], () => {
done();
});
}
function selectListJoinTables (db, limit, done) {
var sql = `
SELECT * FROM table1 t1
JOIN table1_table2 jt on t1.id = jt.table1_id
JOIN table2 t2 on jt.table2_id = t2.id
LIMIT $1^;
`;
db.query(sql, [limit], () => {
done();
});
}
function createTables (db, done) {
console.log("creating database tables");
var sql = `DROP TABLE IF EXISTS table1 CASCADE;
CREATE TABLE table1
(
id serial PRIMARY KEY
, acolumn character varying
);
DROP TABLE IF EXISTS table2 CASCADE;
CREATE TABLE table2
(
id serial PRIMARY KEY
, bcolumn character varying
);
DROP TABLE IF EXISTS table1_table2 CASCADE;
CREATE TABLE table1_table2 (
table1_id int REFERENCES table1 (id) ON UPDATE CASCADE ON DELETE CASCADE
, table2_id int REFERENCES table2 (id) ON UPDATE CASCADE
, CONSTRAINT table1_table2_pkey PRIMARY KEY (table1_id, table2_id)
);
DROP TABLE IF EXISTS table3 CASCADE;
CREATE TABLE table3
(
id serial PRIMARY KEY
, ccolumn character varying
, table4key integer[]
);
DROP TABLE IF EXISTS table4 CASCADE;
CREATE TABLE table4
(
id serial PRIMARY KEY
, dcolumn character varying
);
`;
db.query(sql, [], (err, result) => {
if (err) return console.error(err);
done();
});
}
function randomInt (min, max) {
return Math.floor(Math.random() * (max - min)) + min;
}
function buildRowSql (row, times) {
var ary = [];
for (var i=0; i < times; i++) {
ary.push("('"+row+"')");
}
return ary.join(",");
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment