Created
September 27, 2012 18:12
-
-
Save generalredneck/3795489 to your computer and use it in GitHub Desktop.
XPath - A Fun Walk With A Powerful Query Language
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
require_once 'Console/Table.php'; | |
$groupsDoc = new DOMDocument(); | |
$groupsDoc->load("groups.xml"); | |
$groupsDoc->formatOutput = TRUE; | |
file_put_contents("readable-groups.xml",$groupsDoc->saveXML()); | |
$xpath = new DOMXpath($groupsDoc); | |
/******************************************************************************* | |
__ __ ____ _ _____ _ _ | |
\ \/"/ U| _"\ uU /"\ u |_ " _| |'| |'| | |
/\ /\ \| |_) |/ \/ _ \/ | | /| |_| |\ | |
U / \ u | __/ / ___ \ /| |\ U| _ |u | |
/_/\_\ |_| /_/ \_\ u |_|U |_| |_| | |
,-,>> \\_ ||>>_ \\ >> _// \\_ // \\ | |
\_) (__)(__)__) (__) (__)(__) (__)(_") ("_) | |
A Fun Walk With A Powerful Query Language | |
Allan Chappell | |
AllPlayers.com | |
Eat n' | |
Geek | |
September 26, 2012 | |
******************************************************************************** | |
==WHAT IS XPATH?== | |
* XPath is a syntax for defining parts of an XML document | |
* XPath uses path expressions to navigate in XML documents | |
* XPath contains a library of standard functions | |
==STANDARDIZED== | |
Check out the standard at http://www.w3.org/TR/xpath/ | |
==XPath Path Expressions== | |
* XPath uses path expressions to select nodes or node-sets in an XML document. | |
* These path expressions look very much like a traditional computer file system. | |
******************************************************************************** | |
==XPATH TERMINOLOGY== | |
* NODES (7 of these) | |
- element | |
- attribute | |
- text | |
- namespace | |
- processing-instruction | |
- comment | |
- document nodes | |
<?xml version="1.0" encoding="utf-8"?> <= Processing Instruction | |
<!-- Comment --> | |
<result is_array="true"> <============== Root Element Node with Attribute | |
<item> | |
<uuid>499f7aa0-573f-11e1-887f-12313d2a2278 <==Text </uuid> | |
<title>Spartans (Rakoski)</title> | |
<description>Town North YMCA Soccer, 2012 Spring Flag Football</description> | |
<location> | |
<street>4332 Northaven Road</street> | |
<city>Dallas</city> | |
<state>TX</state> | |
<zip>75229</zip> | |
<country>us</country> | |
<latitude>32.901306</latitude> | |
<longitude>-96.835542</longitude> | |
</location> | |
</item> | |
</result> | |
******************************************************************************** | |
==NODE RELATIONSHIPS== | |
* Parent - Each node has one of these. The root node is a child of the document. | |
* Children - Nodes can have 0 or more of these. | |
* Siblings - Nodes that have the same parent. | |
* Ancestors - A node's parent's parent and that node's parent, etc. | |
* Decendents - A node's children's children and that node's child etc. | |
<result is_array="true"> <============================= location's Ancestor | |
<item> <============================= uuid's Parent | |
<uuid>499f7aa0-573f-11e1-887f-12313d2a2278</uuid> <== title's Sibling | |
<title>Spartans (Rakoski)</title> <================== item's Child | |
<description>Town North YMCA Soccer, 2012 Spring Flag Football</description> | |
<location> | |
<street>4332 Northaven Road</street> <============= result's Decendant | |
<city>Dallas</city> | |
<state>TX</state> | |
<zip>75229</zip> | |
<country>us</country> | |
<latitude>32.901306</latitude> | |
<longitude>-96.835542</longitude> | |
</location> | |
</item> | |
</result> | |
******************************************************************************** | |
==XPATH SYNTAX== | |
nodename Selects all nodes with the name "nodename" | |
/ Selects from the root node | |
// Selects nodes in the document from the current node that | |
match the selection no matter where they are | |
. Selects the current node | |
.. Selects the parent of the current node | |
@ Selects attributes | |
*/ | |
$results = array(); | |
// Returns all nodes with the name item. | |
// NOTE: This form is in context. | |
$results['All Items'] = $xpath->query('item'); | |
// Returns the root node result. | |
// NOTE: / means absolute path! | |
$results['Root Result Node'] = $xpath->query('/result'); | |
// Returns all items' uuids. | |
$results["All Items' Uuids"] = $xpath->query('item/uuid'); | |
// Returns all uuids no matter where they are. | |
$results['All Uuids'] = $xpath->query('//uuid'); | |
// Returns all uuids under items. | |
$results['All Uuids Under Items'] = $xpath->query('//item//uuid'); | |
// An attribute | |
$results['IsArray Attribute'] = $xpath->query('/result/@is_array'); | |
display_results($results); | |
/******************************************************************************* | |
==PREDICATES== | |
Used to find a node that has a specific value or contains another specific node. | |
NOTE: There is a heavy use of functions in predicates. More on XPATH functions | |
later. | |
ANOTHER NOTE: It is worth noting that you are not limited to just one of these. | |
It is perfectly legal to have: | |
/result/item[2][/active/text()="active"] | |
This would get the 2nd item that is "active" | |
*/ | |
$results = array(); | |
// [#] selects the element in that position | |
$results['First Item in Result'] = $xpath->query('/result/item[1]'); | |
// [last()] selects the last element that would have been returned | |
$results['Last Item in Result'] = $xpath->query('/result/item[last()]'); | |
// [xpath-query] selects the node that contains those nodes | |
$results['Item From Dallas'] = $xpath->query('item[location/city/text()="Dallas"]'); | |
// getting complicated! | |
$results["Items' First Group Above"] = $xpath->query('/result/item[/result/item/groups_above_uuid/item[1]/text() = uuid/text()]'); | |
display_results($results); | |
/******************************************************************************* | |
==SELECTING UNKNOWN NODES== | |
* Matches any element node | |
@* Matches any attribute node | |
node() Matches any node type | |
*/ | |
// Great for traversing through everything, or gathering like attributes on | |
// different item types. | |
//For all items in Dallas, get all the location information | |
echo center_text('Location Information for Dallas Based Items') . "\n"; | |
$tbl = new Console_Table(); | |
foreach ($xpath->query('item[location/city/text()="Dallas"]/location/*') as $item) { | |
$tbl->addRow(array($item->nodeName, $item->textContent)); | |
} | |
echo $tbl->getTable(); | |
echo "\n Press Enter to Continue:\n"; | |
fgets(STDIN); | |
/* | |
/******************************************************************************* | |
==XPATH OPERATORS== | |
| Computes two node-sets | |
+ Addition 6 + 4 10 | |
- Subtraction 6 - 4 2 | |
* Multiplication 6 * 4 24 | |
div Division 8 div 4 2 | |
= Equal price=9.80 true if price is 9.80 | |
false if price is 9.90 | |
!= Not equal price!=9.80 true if price is 9.90 | |
false if price is 9.80 | |
< Less than price<9.80 true if price is 9.00 | |
false if price is 9.80 | |
<= Less than price<=9.80 true if price is 9.00 | |
or equal to false if price is 9.90 | |
> Greater than price>9.80 true if price is 9.90 | |
false if price is 9.80 | |
>= Greater than price>=9.80 true if price is 9.90 | |
or equal to false if price is 9.70 | |
or or price=9.80 true if price is 9.80 | |
or price=9.70 false if price is 9.50 | |
and and price>9.00 and true if price is 9.80 | |
price<9.90 false if price is 8.50 | |
mod Modulus | |
(division 5 mod 2 1 | |
remainder) | |
/******************************************************************************* | |
==COMMON FUNCTIONS== | |
fn:concat(string,string,...) Returns the concatenation of the strings | |
fn:string-join((string,string,...),sep) Returns a string created by i | |
concatenating the string arguments and | |
using the sep argument as the separator | |
fn:normalize-space(string) Removes leading and trailing spaces from | |
the specified string, and replaces all | |
internal sequences of white space with | |
one and returns the result. If there is | |
no string argument it does the same on | |
the current node | |
fn:contains(string1,string2) Returns true if string1 contains | |
string2, otherwise it returns false | |
fn:position() Returns the index position of the node | |
that is currently being processed | |
fn:last() Returns the number of items in the | |
processed node list | |
/******************************************************************************* | |
==USES FOR XPATH== | |
* Anything XML like: | |
- RSS Feeds | |
- XML APIs | |
- Web pages | |
- Microsoft Office File Manipulation | |
* Selenium Testing | |
==DEMO OF POWERPOINT TO HTML== | |
/******************************************************************************* | |
___ _ _ U _____ u ____ _____ U ___ u _ _ ____ | |
/ " \ U |"|u| |\| ___"|// __"| u |_ " _| ___ \/"_ \/ | \ |"| / __"| u | |
| |"| | \| |\| | | _|" <\___ \/ | | |_"_| | | | |<| \| |><\___ \/ | |
/| |_| |\ | |_| | | |___ u___) | /| |\ | | .-,_| |_| |U| |\ |u u___) | | |
U \__\_\u<<\___/ |_____| |____/>> u |_|U U/| |\u\_)-\___/ |_| \_| |____/>> | |
\\// (__) )( << >> )( (__)_// \\_.-,_|___|_,-. \\ || \\,-.)( (__) | |
(_(__) (__) (__) (__)(__) (__) (__)\_)-' '-(_/ (__) (_") (_/(__) | |
/******************************************************************************/ | |
function display_results($results) { | |
foreach ($results as $name => $resultList) { | |
echo center_text($name) . "\n"; | |
$tbl = new Console_Table(); | |
$tbl->setHeaders( | |
array('XPath', 'Value') | |
); | |
$firstNode = NULL; | |
foreach ($resultList as $resultNode) { | |
// Get the first Node so we can show it's XML | |
if(empty($firstNode)) { | |
$firstNode = $resultNode; | |
} | |
// Get Text Node | |
$xpath = new DOMXpath($resultNode->ownerDocument); | |
$textResult = $xpath->evaluate('text()',$resultNode); | |
$text = ""; | |
if($textResult->length != 0) { | |
$text = $xpath->evaluate('text()',$resultNode)->item(0)->nodeValue; | |
} | |
else if ($resultNode instanceof DOMAttr) { | |
$text = $resultNode->value; | |
} | |
$tbl->addRow(array($resultNode->getNodePath(), $text)); | |
} | |
echo $tbl->getTable(); | |
echo "\n XML of the first Node: \n"; | |
echo $firstNode->ownerDocument->saveXML($firstNode); | |
echo "\n Press Enter to Continue:\n"; | |
fgets(STDIN); | |
} | |
} | |
function center_text($word){ | |
$tot_width = 80; | |
$symbol = "-"; | |
$middle = round($tot_width/2); | |
$length_word = strlen($word); | |
$middle_word = round($length_word / 2); | |
$last_position = $middle + $middle_word; | |
$number_of_spaces = $middle - $middle_word; | |
$result = sprintf("%'{$symbol}{$last_position}s", $word); | |
for ($i = 0; $i < $number_of_spaces; $i++){ | |
$result .= "$symbol"; | |
} | |
return $result; | |
} |
Thank you. Been a long time since I looked at this. 10 years old and I bet it still would run!
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Well done, sir!