Last active
December 17, 2015 23:09
-
-
Save skopp/5687291 to your computer and use it in GitHub Desktop.
diffbot dev customizing - this is in HAML, just saved without the extension (Gists don't allow subdirs apparently)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
!!! | |
%html{:lang => "en"} | |
%head | |
%meta{:charset => "utf-8"}/ | |
%title Customize Instructions | |
%meta{:content => "width=device-width, initial-scale=1.0", :name => "viewport"}/ | |
/ Default styles | |
%link{:href => "/dev/css/bootstrap.min.css", :rel => "stylesheet"}/ | |
%link{:href => "/dev/css/fonts.css", :rel => "stylesheet"}/ | |
%link{:href => "/dev/css/default.css", :rel => "stylesheet"}/ | |
%link{:href => "/dev/css/help.css", :rel => "stylesheet"}/ | |
%body.help | |
.container-fluid.popup | |
.tabbable.tabs-left | |
%ul.nav.nav-tabs | |
%li.active | |
%a{"data-toggle" => "tab", :href => "#basic"} Basic Selectors | |
%li | |
%a{"data-toggle" => "tab", :href => "#combinators"} Combinators | |
%li | |
%a{"data-toggle" => "tab", :href => "#pseudo"} Pseudo Selectors | |
%li | |
%a{"data-toggle" => "tab", :href => "#filters"} Filters | |
/ <li><a href="#tips" data-toggle="tab">Tips and Examples</a></li> | |
.tab-content | |
#basic.tab-pane.active | |
%h3.page-header Basic Selectors | |
%table.table.table-bordered{:border => "0", :cellpadding => "5"} | |
%tbody | |
%tr | |
%th{:align => "left"} Pattern | |
%th{:align => "left"} Matches | |
%th{:align => "left"} Example | |
%tr | |
%td | |
%code * | |
%td any element | |
%td | |
%code * | |
%tr | |
%td | |
%code tagname | |
%td elements with the given tag name | |
%td | |
%code> div | |
, | |
%code p | |
%tr | |
%td | |
%code namespace|type | |
%td | |
elements of type 'type' in the namespace | |
%i ns | |
%td | |
%code fb|name | |
finds | |
%code <fb:name> | |
elements | |
%tr | |
%td | |
%code #id | |
%td elements with attribute ID of "id" | |
%td | |
%code> div#container | |
, | |
%code #header | |
%tr | |
%td | |
%code .class | |
%td elements with a class name of "class" | |
%td | |
%code> div.left | |
, | |
%code .post-body | |
%tr | |
%td | |
%code element[attr] | |
or | |
%code [attr] | |
%td elements with an attribute named "attr" (with any value) | |
%td | |
%code> a[href] | |
, | |
%code [title] | |
%tr | |
%td | |
%code element[attr=val] | |
or | |
%code [attr=val] | |
%td elements with an attribute named "attr" and value equal to "val" | |
%td | |
%code> img[width=500] | |
, | |
%code a[rel=nofollow] | |
%tr | |
%td | |
%code [^attrPrefix] | |
%td elements with an attribute name starting with "attrPrefix". Use to find elements with HTML5 datasets | |
%td | |
%code> [^data-] | |
, | |
%code div[^data-] | |
%tr | |
%td | |
%code [attr^=valPrefix] | |
%td elements with an attribute named "attr", and value starting with "valPrefix" | |
%td | |
%code a[href^=http:] | |
%tr | |
%td | |
%code [attr$=valSuffix] | |
%td elements with an attribute named "attr", and value ending with "valSuffix" | |
%td | |
%code img[src$=.png] | |
%tr | |
%td | |
%code [attr*=valContaining] | |
%td elements with an attribute named "attr", and value containing "valContaining" | |
%td | |
%code a[href*=/search/] | |
%tr | |
%td | |
%code | |
[attr~= | |
%em> regex | |
] | |
%td elements with an attribute named "attr", and value matching the regular expression | |
%td | |
%code img[src~=(?i)\\.(png|jpe?g)] | |
%tr | |
%td{:colspan => "2"} | |
%em The above may be combined in any order | |
%td | |
%code div.header[title] | |
#combinators.tab-pane | |
%h3.page-header Combinators | |
%table.table.table-bordered{:border => "0", :cellpadding => "5"} | |
%tbody | |
%tr | |
%th{:align => "left"} Pattern | |
%th{:align => "left"} Matches | |
%th{:align => "left"} Example | |
%tr | |
%td | |
%code E F | |
%td an F element descended from an E element | |
%td | |
%code> div a | |
, | |
%code .logo h1 | |
%tr | |
%td | |
%code E > F | |
%td an F direct child of E | |
%td | |
%code ol > li | |
%tr | |
%td | |
%code E + F | |
%td an F element immediately preceded by sibling E | |
%td | |
%code> li + li | |
, | |
%code div.head + div | |
%tr | |
%td | |
%code E ~ F | |
%td an F element preceded by sibling E | |
%td | |
%code h1 ~ p | |
%tr | |
%td | |
%code E, F, G | |
%td all matching elements E, F, or G | |
%td | |
%code a[href], div, h3 | |
#pseudo.tab-pane | |
%h3.page-header Pseudo Selectors | |
%table.table.table-bordered{:border => "0", :cellpadding => "5"} | |
%tbody | |
%tr | |
%th{:align => "left"} Pattern | |
%th{:align => "left"} Matches | |
%th{:align => "left"} Example | |
%tr | |
%td | |
%code | |
\:lt( | |
%em> n | |
) | |
%td | |
elements whose sibling index is less than | |
%em n | |
%td | |
%code td:lt(3) | |
finds the first 2 cells of each row | |
%tr | |
%td | |
%code | |
\:gt( | |
%em> n | |
) | |
%td | |
elements whose sibling index is greater than | |
%em n | |
%td | |
%code td:gt(1) | |
finds cells after skipping the first two | |
%tr | |
%td | |
%code | |
\:eq( | |
%em> n | |
) | |
%td | |
elements whose sibling index is equal to | |
%em n | |
%td | |
%code td:eq(0) | |
finds the first cell of each row | |
%tr | |
%td | |
%code | |
\:has( | |
%em> selector | |
) | |
%td | |
elements that contains at least one element matching the | |
%em selector | |
%td | |
%code div:has(p) | |
finds divs that contain p elements | |
%tr | |
%td | |
%code | |
\:not( | |
%em> selector | |
) | |
%td | |
elements that do not match the | |
%em selector | |
%td | |
%code div:not(.logo) | |
finds all divs that do not have the "logo" class. | |
%br/ | |
%code div:not(:has(div)) | |
finds divs that do not contain divs. | |
%tr | |
%td | |
%code | |
\:contains( | |
%em> text | |
) | |
%td elements that contains the specified text. The search is case insensitive. The text may appear in the found element, or any of its descendants. | |
%td | |
%code p:contains(jsoup) | |
finds p elements containing the text "jsoup". | |
%tr | |
%td | |
%code | |
\:matches( | |
%em> regex | |
) | |
%td elements whose text matches the specified regular expression. The text may appear in the found element, or any of its descendants. | |
%td | |
%code td:matches(\\d+) | |
finds table cells containing digits. | |
%code div:matches((?i)login) | |
finds divs containing the text, case insensitively. | |
%tr | |
%td | |
%code | |
\:containsOwn( | |
%em> text | |
) | |
%td elements that directly contains the specified text. The search is case insensitive. The text must appear in the found element, not any of its descendants. | |
%td | |
%code p:containsOwn(jsoup) | |
finds p elements with own text "jsoup". | |
%tr | |
%td | |
%code | |
\:matchesOwn( | |
%em> regex | |
) | |
%td elements whose own text matches the specified regular expression. The text must appear in the found element, not any of its descendants. | |
%td | |
%code td:matchesOwn(\\d+) | |
finds table cells directly containing digits. | |
%code div:matchesOwn((?i)login) | |
finds divs containing the text, case insensitively. | |
%tr | |
%td{:colspan => "2"} | |
%em The above may be combined in any order and with other selectors. | |
%td | |
%code .light:contains(name):eq(0) | |
#filters.tab-pane | |
%h3.page-header Filters | |
%p You can choose to filter the output of your selector(s) using three filter types. | |
%table.table.table-bordered{:border => "0", :cellpadding => "5"} | |
%tbody | |
%tr | |
%th{:align => "left"} Filter | |
%th{:align => "left"} Description | |
%tr | |
%td | |
%strong Attribute | |
%td | |
Retrieves the specified attribute value of an element. For example, to extract the link (http://blog.diffbot.com) from the anchor tag | |
= succeed "," do | |
%code <a href="http://www.blog.diffbot.com" class="outbound"> | |
you would enter | |
%code HREF | |
as your attribute filter. You can only use a single attribute filter per rule. | |
%tr | |
%td | |
%strong Ignore | |
%td Ignores the specified selectors (and all descendants) if they are found within the primary CSS selector. You may use any of the selector formats specified in this help screen. | |
%tr | |
%td | |
%strong Replace | |
%td Allows you to specify match and replace regular expressions to alter the output returned by the Diffbot API. To remove matching content, simply leave the "replace with" field blank. | |
/ | |
<div class="tab-pane" id="tips"> | |
<h3 class="page-header">Tips and Examples</h3> | |
<p>Here are some common usages for custom rules:</p> | |
<table class="table table-bordered" border="0" cellpadding="5"> | |
<tbody> | |
<tr> | |
<th align="left">How to...?</th> | |
<th align="left">Details</th> | |
</tr> | |
<tr> | |
<td>Extract <code>META</code> values from a page.</td> | |
<td>Use the <code>element[attr=val]</code> selector, along with an Attribute filter. For instance, to extract the Application Name from <code><meta name="application-name" content="Diffbot"></code>, use a selector of <code>meta[name=application-name]</code> and an Attribute filter of <code>content</code>.</td> | |
</tr> | |
<tr> | |
<td>Repeat the same content from multiple domains.</td> | |
<td>Use the <code>element[attr=val]</code> selector, along with an Attribute filter. For instance, to extract the Application Name from <code><meta name="application-name" content="Diffbot"></code>, use a selector of <code>meta[name=application-name]</code> and an Attribute filter of <code>content</code>.</td> | |
</tr> | |
</div> | |
%script{:src => "/dev/js/libs/jquery/jquery-1.7.2.min.js"} | |
%script{:src => "/dev/js/libs/bootstrap/bootstrap.min.js"} | |
%script{:src => "/dev/js/libs/bootstrap/tab.js"} | |
%script{:src => "//static.getclicky.com/js", :type => "text/javascript"} | |
:javascript | |
try{ clicky.init(66589068); }catch(e){} %noscript | |
%p | |
%img{:alt => "Clicky", :height => "1", :src => "//in.getclicky.com/66589068ns.gif", :width => "1"}/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment