Last active
August 29, 2015 14:18
-
-
Save jprante/22c28851bed9aff8d9da to your computer and use it in GitHub Desktop.
Combining MySQL blob, PDF, Elasticsearch JDBC plugin, and attachment mapper type plugin
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# MySQL 5.1+ with http://dev.mysql.com/doc/refman/5.1/en/string-functions.html#function_load-file | |
# ES 1.4.4+ | |
# plugins required: jdbc, mapper attachment | |
# test.pdf is a PDF that can be parsed by Tika | |
/usr/local/mysql/bin/mysql -u root test <<EOT | |
drop table test; | |
create table test ( | |
id integer, | |
content blob | |
); | |
insert into test values (0,LOAD_FILE('/Users/joerg/Desktop/test.pdf')); | |
EOT | |
curl -XDELETE 'localhost:9200/_river/my_test_river/' | |
curl -XDELETE 'localhost:9200/mytest' | |
curl -XPOST 'localhost:9200/_river/my_test_river/_meta' -d ' | |
{ | |
"type" : "jdbc", | |
"jdbc" : { | |
"url" : "jdbc:mysql://localhost:3306/test", | |
"user" : "", | |
"password" : "", | |
"sql" : "select id as _id, content as \"content\" from test", | |
"index" : "mytest", | |
"type" : "mydocs", | |
"index_settings" : { }, | |
"type_mapping" : { | |
"mydocs" : { | |
"properties" : { | |
"content" : { | |
"type" : "attachment", | |
"path" : "full", | |
"fields" : { | |
"content" : { | |
"type" : "string", | |
"store" : true | |
} | |
} | |
} | |
} | |
} | |
} | |
} | |
} | |
' | |
echo "sleeping while river should run..." | |
sleep 5 | |
curl -XGET 'localhost:9200/mytest/_mapping?pretty' | |
# search for a word in the test,pdf file and show parsed PDF text only | |
curl -XPOST 'localhost:9200/mytest/_search?pretty' -d ' | |
{ | |
"fields" : [ "content" ], | |
"query": { | |
"match": { | |
"content" : "Online" | |
} | |
} | |
}' | |
curl -XDELETE 'localhost:9200/_river/my_test_river/' | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ bash mysql-blob-river.sh | |
{"error":"TypeMissingException[[_all] type[[my_test_river]] missing: No index has the type.]","status":404}{"acknowledged":true}{"_index":"_river","_type":"my_test_river","_id":"_meta","_version":1,"created":true}sleeping while river should run... | |
{ | |
"mytest" : { | |
"mappings" : { | |
"mydocs" : { | |
"properties" : { | |
"content" : { | |
"type" : "attachment", | |
"path" : "full", | |
"fields" : { | |
"content" : { | |
"type" : "string", | |
"store" : true | |
}, | |
"author" : { | |
"type" : "string" | |
}, | |
"title" : { | |
"type" : "string" | |
}, | |
"name" : { | |
"type" : "string" | |
}, | |
"date" : { | |
"type" : "date", | |
"format" : "dateOptionalTime" | |
}, | |
"keywords" : { | |
"type" : "string" | |
}, | |
"content_type" : { | |
"type" : "string" | |
}, | |
"content_length" : { | |
"type" : "integer" | |
}, | |
"language" : { | |
"type" : "string" | |
} | |
} | |
} | |
} | |
} | |
} | |
} | |
} | |
{ | |
"took" : 3, | |
"timed_out" : false, | |
"_shards" : { | |
"total" : 5, | |
"successful" : 5, | |
"failed" : 0 | |
}, | |
"hits" : { | |
"total" : 1, | |
"max_score" : 0.023972876, | |
"hits" : [ { | |
"_index" : "mytest", | |
"_type" : "mydocs", | |
"_id" : "0", | |
"_score" : 0.023972876, | |
"fields" : { | |
"content" : [ "\n2933380\n\nAufsatzbestellung\n\nTyp: Online\nMedea-Nummer: 2933380\n" ] | |
} | |
} ] | |
} | |
} | |
{"acknowledged":true} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment