Skip to content

Instantly share code, notes, and snippets.

@suzuki
Created November 12, 2014 11:57
Show Gist options
  • Save suzuki/1ec0040be67c82a7d706 to your computer and use it in GitHub Desktop.
Save suzuki/1ec0040be67c82a7d706 to your computer and use it in GitHub Desktop.
{
"type": "web",
"crawl": {
"index": "webindex",
"url": ["http://suzuki.tdiary.net/"],
"includeFilter": ["http://suzuki.tdiary.net/.*"],
"excludeFilter": [
"http://suzuki.tdiary.net/images/.*",
"http://suzuki.tdiary.net/js/.*",
"http://suzuki.tdiary.net/theme/.*",
"http://suzuki.tdiary.net/update.rb.*"
],
"maxDepth": 3,
"maxAccessCount": 200,
"numOfThread": 1,
"interval": 4000,
"overwrite": true,
"userAgent": "Elasticsearch Bot (suzuki.tdiary.net)",
"robotsTxt": true,
"target": [
{
"pattern": {
"url": "http://suzuki.tdiary.net/.*",
"mimeType": "text/html"
},
"properties" : {
"title": {
"text": "title"
},
"body": {
"text": "body",
"trimSpaces": true
},
"meta": {
"attr": "meta[property=twitter:description]",
"args": ["content"]
}
}
}
]
},
"schedule": {
"cron": "0 55 7 * * ?"
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment