Last active
September 26, 2017 06:33
-
-
Save vascoosx/91fb5a654a7a5680a7e61b52d9b52a5c to your computer and use it in GitHub Desktop.
Apache Drill + R intro
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<configuration> | |
<property> | |
<name>fs.s3a.access.key</name> | |
<value>ACCESSKEY</value> | |
</property> | |
<property> | |
<name>fs.s3a.secret.key</name> | |
<value>SecretKey</value> | |
</property> | |
<property> | |
<name>fs.s3a.connection.maximum</name> | |
<value>100</value> | |
</property> | |
</configuration> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# To test start `drill-embedded` before running | |
library(sergeant) | |
library(data.table) | |
library(jsonlite) | |
a <- sample(letters,1000,replace=TRUE) | |
dt <- data.table(a=a) | |
dt[,"l"] <- lapply(1:1000,function(x) sample(letters,3)) | |
write(toJSON(dt),"drill_data.json") | |
(drill.file <- file.path(getwd(),"drill_data.json")) | |
dc <- drill_connection("localhost") | |
drill_query(dc, | |
"select * | |
from | |
(select m.a, | |
flatten(m.l), | |
row_number() over() | |
from dfs.`C:/Users/current.dir/drill_data.json` m) t(a,b,r) | |
where a = b") | |
# This does not work | |
ds <- src_drill("localhost") | |
db <- tbl(ds,"dfs.`C:/Users/sh.otsuka/Documents/drill_data.json`") | |
filter(db, a %in% l) | |
# SQL Query SELECT * | |
# FROM dfs.`C:/Users/current.dir/drill_data.json` | |
# WHERE (`a` IN `l`) | |
# ^ | |
# LIMIT 10 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"type": "file", | |
"enabled": true, | |
"connection": "s3a://mybucket/", | |
"config": null, | |
"workspaces": { | |
"root": { | |
"location": "/", | |
"writable": false, | |
"defaultInputFormat": null | |
}, | |
"tmp": { | |
"location": "/tmp", | |
"writable": true, | |
"defaultInputFormat": null | |
} | |
}, | |
"formats": { | |
"psv": { | |
"type": "text", | |
"extensions": [ | |
"tbl" | |
], | |
"delimiter": "|" | |
}, | |
"csv": { | |
"type": "text", | |
"extensions": [ | |
"csv" | |
], | |
"delimiter": "," | |
}, | |
"tsv": { | |
"type": "text", | |
"extensions": [ | |
"tsv" | |
], | |
"delimiter": "\t" | |
}, | |
"parquet": { | |
"type": "parquet" | |
}, | |
"json": { | |
"type": "json", | |
"extensions": [ | |
"json", | |
"log" | |
] | |
}, | |
"avro": { | |
"type": "avro" | |
}, | |
"sequencefile": { | |
"type": "sequencefile", | |
"extensions": [ | |
"seq" | |
] | |
}, | |
"csvh": { | |
"type": "text", | |
"extensions": [ | |
"csvh" | |
], | |
"extractHeader": true, | |
"delimiter": "," | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment