Skip to content

Instantly share code, notes, and snippets.

@blahah
Created June 14, 2015 20:33
Show Gist options
  • Save blahah/19bc7c8f69e895bafef0 to your computer and use it in GitHub Desktop.
Save blahah/19bc7c8f69e895bafef0 to your computer and use it in GitHub Desktop.
jmir scraperjson scraper
{
"url": "www\\.jmir\\.org",
"elements": {
"publisher": {
"selector": "//meta[@name='citation_publisher']",
"attribute": "content"
},
"journal": {
"selector": "//meta[@name='citation_journal_title']",
"attribute": "content"
},
"title": {
"selector": "//meta[@name='citation_title']",
"attribute": "content"
},
"authors": {
"selector": "//meta[@name='DC.Contributor']",
"attribute": "content"
},
"date": {
"selector": "//meta[@name='citation_date']",
"attribute": "content"
},
"doi": {
"selector": "//meta[@name='citation_doi']",
"attribute": "content"
},
"volume": {
"selector": "//meta[@name='citation_volume']",
"attribute": "content"
},
"issue": {
"selector": "//meta[@name='citation_issue']",
"attribute": "content"
},
"firstpage": {
"selector": "//meta[@name='citation_firstpage']",
"attribute": "content"
},
"description": {
"selector": "//meta[@name='description']",
"attribute": "content"
},
"abstract": {
"selector": "//meta[@name='description']",
"attribute": "content"
},
"fulltext_html": {
"selector": "//meta[@name='citation_abstract_html_url']",
"attribute": "content",
"download": {
"rename": "fulltext.html"
}
},
"fulltext_pdf_link": {
"selector": "//meta[@name='citation_abstract_pdf_url']",
"attribute": "content"
},
"fulltext_pdf": {
"selector": "//a[text()='click here']",
"attribute": "href",
"follow": "fulltext_pdf_link",
"download": {
"rename": "fulltext.pdf"
}
},
"fulltext_xml": {
"selector": "//li[@class='article-xml-link']//a",
"attribute": "href",
"download": {
"rename": "fulltext.xml"
}
},
"supplementary_material": {
"selector": "//h4[starts-with(text(),'Multimedia Appendix')]/following-sibling::*[2]",
"attribute": "href",
"download": true
},
"figure": {
"selector": "//figure/a/img",
"attribute": "src",
"download": true
},
"figure_caption": {
"selector": "//figure/figcaption"
},
"license": {
"selector": "//meta[@name='DC.Rights']",
"attribute": "content"
},
"copyright": {
"selector": "//h4[@id='Copyright']/following-sibling::*[1]"
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment