-
-
Save dedsm/fc74f04eb70d78459ff0847ef16f2e7a to your computer and use it in GitHub Desktop.
import logging | |
from django.contrib.gis.db.backends.postgis.base import \ | |
DatabaseWrapper as PostgisDBWrapper | |
from django.db import ProgrammingError | |
from .schema import TimescaleSchemaEditor | |
logger = logging.getLogger(__name__) | |
class DatabaseWrapper(PostgisDBWrapper): | |
SchemaEditorClass = TimescaleSchemaEditor | |
def prepare_database(self): | |
"""Prepare the configured database. | |
This is where we enable the `timescaledb` extension | |
if it isn't enabled yet.""" | |
super().prepare_database() | |
with self.cursor() as cursor: | |
try: | |
cursor.execute('CREATE EXTENSION IF NOT EXISTS timescaledb') | |
except ProgrammingError: # permission denied | |
logger.warning( | |
'Failed to create "timescaledb" extension. ' | |
'Usage of timescale capabilities might fail' | |
'If timescale is needed, make sure you are connected ' | |
'to the database as a superuser ' | |
'or add the extension manually.', | |
exc_info=True | |
) |
from django.contrib.gis.db.backends.postgis.schema import PostGISSchemaEditor | |
from timescale.fields import TimescaleDateTimeField | |
class TimescaleSchemaEditor(PostGISSchemaEditor): | |
sql_add_hypertable = ( | |
"SELECT create_hypertable(" | |
"{table}, {partition_column}, " | |
"chunk_time_interval => interval {interval})" | |
) | |
sql_drop_primary_key = ( | |
'ALTER TABLE {table} ' | |
'DROP CONSTRAINT {pkey}' | |
) | |
def drop_primary_key(self, model): | |
""" | |
Hypertables can't partition if the primary key is not | |
the partition column. | |
So we drop the mandatory primary key django creates. | |
""" | |
db_table = model._meta.db_table | |
table = self.quote_name(db_table) | |
pkey = self.quote_name(f'{db_table}_pkey') | |
sql = self.sql_drop_primary_key.format(table=table, pkey=pkey) | |
self.execute(sql) | |
def create_hypertable(self, model, field): | |
""" | |
Create the hypertable with the partition column being the field. | |
""" | |
partition_column = self.quote_value(field.column) | |
interval = self.quote_value(field.interval) | |
table = self.quote_value(model._meta.db_table) | |
sql = self.sql_add_hypertable.format( | |
table=table, partition_column=partition_column, interval=interval | |
) | |
self.execute(sql) | |
def create_model(self, model): | |
super().create_model(model) | |
for field in model._meta.local_fields: | |
if not isinstance(field, TimescaleDateTimeField): | |
continue | |
self.drop_primary_key(model) | |
self.create_hypertable(model, field) |
from django.db import models | |
class TimeBucket(models.Func): | |
function = 'time_bucket' | |
def __init__(self, expression, interval): | |
if not isinstance(interval, models.Value): | |
interval = models.Value(interval) | |
super().__init__(interval, expression) |
from django.db.models import DateTimeField | |
class TimescaleDateTimeField(DateTimeField): | |
def __init__(self, *args, interval, **kwargs): | |
self.interval = interval | |
super().__init__(*args, **kwargs) | |
def deconstruct(self): | |
name, path, args, kwargs = super().deconstruct() | |
kwargs['interval'] = self.interval | |
return name, path, args, kwargs |
Figured out the DatabaseWrapper
.. Use it in settings.py
as the DATABASES[name]['ENGINE']
.
I won't be using the PostGIS extension. Does it have to extend the postgis
backend to function properly? Or can it extend straight from postgresql_psycopg2
?
Thanks for the code.
As bjornuppeke said, it is used as a database engine. Other than that you want to delete the "timescale--db--backend--" so the relative imports don't break.
If you put this into a "timescale" folder in the settings, you want to use it as:
DATABASES["default"]["ENGINE"] = 'config.settings.timescale'
Is it possible to do the equivalent of
SELECT time_bucket('5 minutes', time) AS five_min, avg(cpu)
FROM metrics
GROUP BY five_min
ORDER BY five_min DESC LIMIT 10;
In Django? Using the custom FUNC
and grouping by the aggregated/annotated value seems to also group by the original time value.
When I try to execute the following SQL statement as a Django ORM statement, like the following
metrics.objects.values(five_min=time_bucket(time, "5 minutes")).annotate(avg_cpu=Avg("cpu"))
Becomes:
SELECT
time_bucket('5 minutes', time) AS five_min,
avg(cpu) AS avg_cpu
FROM metrics
GROUP BY time, time_bucket('5 minutes', time);
Where the additional GROUP BY time
is undesirable.
Is there any work around to this?
Is it possible to do the equivalent of
SELECT time_bucket('5 minutes', time) AS five_min, avg(cpu) FROM metrics GROUP BY five_min ORDER BY five_min DESC LIMIT 10;
In Django? Using the custom
FUNC
and grouping by the aggregated/annotated value seems to also group by the original time value.When I try to execute the following SQL statement as a Django ORM statement, like the following
metrics.objects.values(five_min=time_bucket(time, "5 minutes")).annotate(avg_cpu=Avg("cpu"))
Becomes:
SELECT time_bucket('5 minutes', time) AS five_min, avg(cpu) AS avg_cpu FROM metrics GROUP BY time, time_bucket('5 minutes', time);
Where the additional
GROUP BY time
is undesirable.Is there any work around to this?
I'm guessing you have some sort of default ordering that includes the time column?
can you try:
metrics.objects.values(five_min=time_bucket(time, "5 minutes")).order_by().annotate(avg_cpu=Avg("cpu"))
Oh god I did not even think to check the default ordering on the model despite reading so many articles about ORDER BY
and GROUP BY
not playing nicely. @desm thank you so much you have brought an end to hours of scouring documentation and much suffering. That fixed it!
no problem, I hope to have time in the near future to make this gist a full fledged django package
Working on a standalone project based off this gist - https://github.com/schlunsen/django-timescaledb
Help and pull requests are more than welcome
@schlunsen nice!
Was it a conscious choice to use Postgis as a Base engine? Perhaps it would make sense to have TimescaleDB mixin classes and then inherit them along with Postgres / Postgis to form TimescalePostgresWrapper
/ TimescalePostgisWrapper
and similar schema editor classes?
P.S. Should we continue this discussion here or would you prefer an issue / PR in your repo?
@schlunsen nice!
Was it a conscious choice to use Postgis as a Base engine? Perhaps it would make sense to have TimescaleDB mixin classes and then inherit them along with Postgres / Postgis to form
TimescalePostgresWrapper
/TimescalePostgisWrapper
and similar schema editor classes?P.S. Should we continue this discussion here or would you prefer an issue / PR in your repo?
@moorchegue Thanks for the feedback!
No it wasn't really conscious choice, but I like your solution and will update accordingly when I have time.
A pull request is also more than welcome.
@schlunsen that's awesome, about the Base engine, I used it like that because I know I'll use postgis, but in all my todo list I had for a separate project I wanted to implement something like https://github.com/SectorLabs/django-postgres-extra/blob/master/psqlextra/backend/base_impl.py that deals with it nicely, I also based most of the work for timescale with that project.
Hopefully I'll have enough time to make some PRs in the new project.
This has been implemented in now in https://github.com/schlunsen/django-timescaledb
I've found a way to make django Autofield to be virtual, so django does not add it to queries. This makes possible to connect to timescale db, without database modifications
this gist and in consequence the django-timescaledb goes beyond accessing a preexisting table and you can create, and hopefully in a future be able to modify via migrations the details of the tables, along with some class helpers to help with the time series queries.
@dedsm yep, removing pk constraint is the nice hack, but without virtual field django still requires to have uniq the id
column on the table, that blocks django integration in case of timeseriesdb tables was used in another systems.
Found this Gist in the TimescaleDB Slack history. Very interesting! Could you provide an example of how to use the
DatabaseWrapper
and theTimeBucket
expression?