Skip to content

Instantly share code, notes, and snippets.

@vascoosx
Last active September 26, 2017 06:33
Show Gist options
  • Save vascoosx/91fb5a654a7a5680a7e61b52d9b52a5c to your computer and use it in GitHub Desktop.
Save vascoosx/91fb5a654a7a5680a7e61b52d9b52a5c to your computer and use it in GitHub Desktop.
Apache Drill + R intro
<configuration>
<property>
<name>fs.s3a.access.key</name>
<value>ACCESSKEY</value>
</property>
<property>
<name>fs.s3a.secret.key</name>
<value>SecretKey</value>
</property>
<property>
<name>fs.s3a.connection.maximum</name>
<value>100</value>
</property>
</configuration>
# To test start `drill-embedded` before running
library(sergeant)
library(data.table)
library(jsonlite)
a <- sample(letters,1000,replace=TRUE)
dt <- data.table(a=a)
dt[,"l"] <- lapply(1:1000,function(x) sample(letters,3))
write(toJSON(dt),"drill_data.json")
(drill.file <- file.path(getwd(),"drill_data.json"))
dc <- drill_connection("localhost")
drill_query(dc,
"select *
from
(select m.a,
flatten(m.l),
row_number() over()
from dfs.`C:/Users/current.dir/drill_data.json` m) t(a,b,r)
where a = b")
# This does not work
ds <- src_drill("localhost")
db <- tbl(ds,"dfs.`C:/Users/sh.otsuka/Documents/drill_data.json`")
filter(db, a %in% l)
# SQL Query SELECT *
# FROM dfs.`C:/Users/current.dir/drill_data.json`
# WHERE (`a` IN `l`)
# ^
# LIMIT 10
{
"type": "file",
"enabled": true,
"connection": "s3a://mybucket/",
"config": null,
"workspaces": {
"root": {
"location": "/",
"writable": false,
"defaultInputFormat": null
},
"tmp": {
"location": "/tmp",
"writable": true,
"defaultInputFormat": null
}
},
"formats": {
"psv": {
"type": "text",
"extensions": [
"tbl"
],
"delimiter": "|"
},
"csv": {
"type": "text",
"extensions": [
"csv"
],
"delimiter": ","
},
"tsv": {
"type": "text",
"extensions": [
"tsv"
],
"delimiter": "\t"
},
"parquet": {
"type": "parquet"
},
"json": {
"type": "json",
"extensions": [
"json",
"log"
]
},
"avro": {
"type": "avro"
},
"sequencefile": {
"type": "sequencefile",
"extensions": [
"seq"
]
},
"csvh": {
"type": "text",
"extensions": [
"csvh"
],
"extractHeader": true,
"delimiter": ","
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment