Created
March 27, 2012 14:32
-
-
Save Hubro/2216437 to your computer and use it in GitHub Desktop.
Function for converting an xml report from Yahoo Web Analytics into a json format
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
# Function for converting an xml report into a json format | |
function report_to_json($path) | |
{ | |
# Check if the file exists | |
if(!file_exists($path)) return false; | |
# Set content type to json and charset to utf-8 | |
# header('Content-type: application/json; charset=utf-8'); | |
# Variable to hold the output data array | |
$outputData = array(); | |
# Load the xml file as a DOMDocument | |
$dom = DOMDocument::load($path); | |
$domxpath = new DOMXpath($dom); | |
# Ready some xpath queries | |
$headersQuery = '/GetReportResponse/TopHeadRows/Row/Item/Name'; | |
$leftItemsQuery = '/GetReportResponse/LeftHeadItems/Item'; | |
$dataRowsQuery = '/GetReportResponse/DataRows/Row'; | |
# Fetch the DOM lists for above queries | |
$headersDom = $domxpath->query($headersQuery); | |
$leftItemsDom = $domxpath->query($leftItemsQuery); | |
$dataRowsDom = $domxpath->query($dataRowsQuery); | |
# Prepare the headers | |
$headers = array("id", "Category", "children"); | |
$extraHeaders = count($headers); | |
# Loop through the headers and fetch the value | |
for($i = 0; $i < $headersDom->length; $i++) | |
$headers[] = trim($headersDom->item($i)->nodeValue); | |
##################### | |
# Fetch item values # | |
##################### | |
# Ultimate row container | |
$rows = array(); | |
# Always contains the previous occurrence of a row per indentation level. | |
# Used for calculating parent IDs. | |
$last = array(); | |
# Loop once per data row | |
for($i = 0; $i < $dataRowsDom->length; $i++) | |
{ | |
# Prepare an array for this row | |
unset($row); | |
$row = array(); | |
# Fetch the datarow and get the cells | |
$dataRow = $dataRowsDom->item($i); | |
$dataCells = $domxpath->query('./Cell', $dataRow); | |
# Fetch the left head item | |
$leftItem = $leftItemsDom->item($i); | |
# The indent for this row | |
$indent = $domxpath->query('./Indent', $leftItem) | |
->item(0)->nodeValue; | |
$indent = intval(trim($indent)); | |
# Find the parent of this row | |
if($indent > 0) $parent = &$last[$indent-1]; | |
# Last | |
$last[$indent] = &$row; | |
# Loop through all headers and set the values on this row accordingly | |
foreach($headers as $h => $header) | |
{ | |
# Header 0 is the id | |
if($h === 0) | |
{ | |
$row[$header] = $i+1; | |
} | |
# Header 1 is the category, value taken from leftItem | |
else if($h == 1) | |
{ | |
$cat = $domxpath->query('./Name', $leftItem) | |
->item(0)->nodeValue; | |
$cat = trim($cat); | |
$row[$header] = $cat; | |
} | |
# Header 2 is the children, just set it to an array | |
else if($h == 2) | |
{ | |
$row[$header] = array(); | |
} | |
# The rest of the headers have their values in the datarow | |
else if($h > 2) | |
{ | |
$dataCell = $dataCells->item($h-$extraHeaders); | |
$cellValue = $domxpath->query('./FormattedValue', $dataCell) | |
->item(0)->nodeValue; | |
$cellValue = trim($cellValue); | |
$row[$header] = $cellValue; | |
} | |
} | |
# Add this row to the collected data rows or as the child of it's parent | |
if($indent < 1) $rows[] = &$row; | |
else $parent["children"][] = &$row; | |
} | |
// var_dump($rows); | |
return prettify(json_encode(($rows))); | |
} | |
# Function to prettify the output json | |
function prettify($json) | |
{ | |
$result = ''; | |
$pos = 0; | |
$strLen = strlen($json); | |
$indentStr = ' '; | |
$newLine = "\n"; | |
$prevChar = ''; | |
$outOfQuotes = true; | |
for ($i=0; $i<=$strLen; $i++) { | |
// Grab the next character in the string. | |
$char = substr($json, $i, 1); | |
// Are we inside a quoted string? | |
if ($char == '"' && $prevChar != '\\') { | |
$outOfQuotes = !$outOfQuotes; | |
// If this character is the end of an element, | |
// output a new line and indent the next line. | |
} else if(($char == '}' || $char == ']') && $outOfQuotes) { | |
$result .= $newLine; | |
$pos --; | |
for ($j=0; $j<$pos; $j++) { | |
$result .= $indentStr; | |
} | |
} | |
// Add the character to the result string. | |
$result .= $char; | |
// If the last character was the beginning of an element, | |
// output a new line and indent the next line. | |
if (($char == ',' || $char == '{' || $char == '[') && $outOfQuotes) { | |
$result .= $newLine; | |
if ($char == '{' || $char == '[') { | |
$pos ++; | |
} | |
for ($j = 0; $j < $pos; $j++) { | |
$result .= $indentStr; | |
} | |
} | |
$prevChar = $char; | |
} | |
return $result; | |
} | |
# Debug | |
if(!defined('ASTRUPS')) | |
{ | |
header('Content-type: application/json'); | |
echo report_to_json('data/example-report.xml'); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment