Skip to content

Instantly share code, notes, and snippets.

@frutik
Last active January 21, 2022 08:16
Show Gist options
  • Save frutik/03b2d94f7733db32745d18546daccc93 to your computer and use it in GitHub Desktop.
Save frutik/03b2d94f7733db32745d18546daccc93 to your computer and use it in GitHub Desktop.
brew install apache-drill
drill-embedded
ALTER SESSION SET `store.format`='json';
ALTER SESSION SET `store.json.all_text_mode` = true;
alter session set `planner.enable_decimal_data_type`=true;
select * from dfs.`/Users/akornilov/work/prices` limit 1;
select AVG(CAST(price as DECIMAL(28, 2)) ) as avg_price from dfs.`/Users/akornilov/work/prices`;
CREATE TABLE dfs.tmp.`/pricesss` AS SELECT * from dfs.root.`/Users/akornilov/work/prices`;
USE `s3`.`root`;
select * from s3.searches limit 1;
select searches.id.location.country, searches.id.location.city, count(*) from s3.searches group by searches.id.location.country, searches.id.location.city order by 3 desc;
select min(`@timestamp`) from s3.searches where searches.query.id = 'product-search-template-hits' ;
select searches.id.ua.visitid1 as visitor, `@timestamp`, searches.query.search as query, REGEXP_REPLACE(TRIM(leading 'value": ' from TRIM(leading '{"total": ' from sresults)), ',.*$','') as results from s3.searches where searches.query.id = 'product-search-template-hits' and searches.query.search is not null order by searches.id.ua.visitid1, `@timestamp` limit 100;
SHOW SCHEMAS
show databases
docker run -i --name drill-1.17.0-1 -p 8047:8047 -t apache/drill:1.17.0 /bin/bash
docker run -i -v /data:/tmp --name drill-1.17.0-1 -p 8047:8047 -t apache/drill:1.17.0 /bin/bash
http://localhost:8047/storage/s3
alter session set `store.format`='csv';
create table dfs.tmp.`ex/queries.csv` as select searches.id.ua.visitid1 as visitor, `@timestamp` as timestamp, searches.query.search as query, REGEXP_REPLACE(TRIM(leading 'value": ' from TRIM(leading '{"total": ' from sresults)), ',.*$','') as results from s3.searches where searches.query.id = 'product-search-template-hits' and searches.query.search is not null order by searches.id.ua.visitid1, `@timestamp` limit 100;
create table dfs.tmp.test2 (v, ts, r) as select searches.id.ua.visitid1, `@timestamp`, CONVERT_FROM(sresults, 'JSON') from s3.searches where searches.query.id = 'product-card-offers' and STRPOS(sresults, 'relation') = 0 order by searches.id.ua.visitid1, `@timestamp`;
select v, ts, test2.r.ids[0] from dfs.tmp.test2 order by 1,2;
@frutik
Copy link
Author

frutik commented Sep 15, 2020

{ "type": "file", "connection": "s3a://tt-importer-test", "config": { "fs.s3a.access.key": "A", "fs.s3a.secret.key": "c" }, "workspaces": { "tmp": { "location": "/tmp", "writable": true, "defaultInputFormat": null, "allowAccessOutsideWorkspace": false }, "root": { "location": "/", "writable": false, "defaultInputFormat": null, "allowAccessOutsideWorkspace": false } }, "formats": { "psv": { "type": "text", "extensions": [ "tbl" ], "delimiter": "|" }, "csv": { "type": "text", "extensions": [ "csv" ], "delimiter": "," }, "tsv": { "type": "text", "extensions": [ "tsv" ], "delimiter": "\t" }, "parquet": { "type": "parquet" }, "json": { "type": "json", "extensions": [ "json" ] }, "avro": { "type": "avro" }, "sequencefile": { "type": "sequencefile", "extensions": [ "seq" ] }, "csvh": { "type": "text", "extensions": [ "csvh" ], "extractHeader": true, "delimiter": "," }, "syslog": { "type": "syslog", "extensions": [ "syslog" ] }, "shp": { "type": "shp" }, "excel": { "type": "excel" }, "ltsv": { "type": "ltsv", "extensions": [ "ltsv" ] } }, "enabled": true }

@frutik
Copy link
Author

frutik commented Nov 13, 2020

SELECT a.uid a, b.uid b, COUNT(*) cnt
FROM my_table a JOIN my_table b ON b.id = a.id AND b.uid > a.uid
GROUP BY a.uid, b.uid

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment