Created
March 9, 2011 19:00
-
-
Save faisalman/862741 to your computer and use it in GitHub Desktop.
PHP class for parsing Microsoft Excel 2003 XML Spreadsheet
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This repository has been moved to http://github.com/faisalman/simple-excel-php |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* Usage example, run this on webserver and see the result on webbrowser | |
*/ | |
//ini_set('display_errors','On'); | |
echo "\nSample of XML 2003 Spreadsheet file (example.xml):\n"; | |
echo "<pre>"; | |
echo htmlspecialchars(file_get_contents('example.xml')); | |
echo "</pre><hr/>"; | |
// load the class file | |
require_once 'XML2003Parser.php'; | |
// instantiate new object | |
$excel = new XML2003Parser('example.xml'); | |
//$excel->loadXMLFile('example.xml'); -> unnecessary since file is already loaded on construct (see line above) | |
// get array of the table | |
$table = $excel->getTableData(); | |
// display instruction | |
echo "\$excel = new XML2003Parser(); | |
<br/>\$excel->loadXMLFile('example.xml'); | |
<br/>\$table = \$excel->getTableData(); | |
<br/>then print the given array in \$table to an HTML table:"; | |
// print as HTML table | |
echo "<table border=1>"; | |
foreach($table["table_contents"] as $row){ | |
echo "<tr>"; | |
foreach($row["row_contents"] as $cell){ | |
echo "<td>"; | |
echo $cell["value"]; | |
echo "</td>"; | |
} | |
echo "</tr>"; | |
} | |
echo "</table>(see how from PHP source of this page)<hr/>"; | |
echo "<pre>Output of getColumnData(4):\n"; | |
print_r($excel->getColumnData(4)); // print an array of all data within column 4 | |
echo "\nOutput of getRowData(3):\n"; | |
print_r($excel->getRowData(3)); // print an array of all data within row 3 | |
echo "\nOutput of getCellData(2,1):\n"; | |
print_r($excel->getCellData(2,1)); // print the data within row 2 column 1 | |
echo "\n\nOutput of getTableData():\n"; | |
print_r($table); // print an array of all data | |
echo "</pre>"; | |
// load another different XML file | |
//$excel->loadXMLFile('example-2.xml'); | |
?> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0"?> | |
<?mso-application progid="Excel.Sheet"?> | |
<Workbook xmlns="urn:schemas-microsoft-com:office:spreadsheet" | |
xmlns:o="urn:schemas-microsoft-com:office:office" | |
xmlns:x="urn:schemas-microsoft-com:office:excel" | |
xmlns:ss="urn:schemas-microsoft-com:office:spreadsheet" | |
xmlns:html="http://www.w3.org/TR/REC-html40"> | |
<DocumentProperties xmlns="urn:schemas-microsoft-com:office:office"> | |
<Author>Faisalman</Author> | |
<Keywords>Example file</Keywords> | |
<LastAuthor>Faisalman</LastAuthor> | |
<Created>2011-03-10T19:20:21Z</Created> | |
<Version>12.00</Version> | |
</DocumentProperties> | |
<ExcelWorkbook xmlns="urn:schemas-microsoft-com:office:excel"> | |
<WindowHeight>7935</WindowHeight> | |
<WindowWidth>20055</WindowWidth> | |
<WindowTopX>240</WindowTopX> | |
<WindowTopY>75</WindowTopY> | |
<ProtectStructure>False</ProtectStructure> | |
<ProtectWindows>False</ProtectWindows> | |
</ExcelWorkbook> | |
<Styles> | |
<Style ss:ID="Default" ss:Name="Normal"> | |
<Alignment ss:Vertical="Bottom"/> | |
<Borders/> | |
<Font ss:FontName="Calibri" x:CharSet="1" x:Family="Swiss" ss:Size="11" | |
ss:Color="#000000"/> | |
<Interior/> | |
<NumberFormat/> | |
<Protection/> | |
</Style> | |
</Styles> | |
<Worksheet ss:Name="Sheet1"> | |
<Table ss:ExpandedColumnCount="15" ss:ExpandedRowCount="3" x:FullColumns="1" | |
x:FullRows="1" ss:DefaultRowHeight="15"> | |
<Row ss:AutoFitHeight="0"> | |
<Cell><Data ss:Type="String">id</Data></Cell> | |
<Cell><Data ss:Type="String">nama_kota</Data></Cell> | |
<Cell><Data ss:Type="String">id_tipe</Data></Cell> | |
<Cell><Data ss:Type="String">id_wilayah</Data></Cell> | |
</Row> | |
<Row ss:AutoFitHeight="0"> | |
<Cell><Data ss:Type="Number">1</Data></Cell> | |
<Cell><Data ss:Type="String">Kab. Bogor</Data></Cell> | |
<Cell><Data ss:Type="Number">1</Data></Cell> | |
<Cell><Data ss:Type="Number">1</Data></Cell> | |
</Row> | |
<Row ss:AutoFitHeight="0"> | |
<Cell><Data ss:Type="Number">2</Data></Cell> | |
<Cell><Data ss:Type="String">Kab. Sukabumi</Data></Cell> | |
<Cell><Data ss:Type="Number">1</Data></Cell> | |
<Cell><Data ss:Type="Number">1</Data></Cell> | |
</Row> | |
<Row ss:AutoFitHeight="0"> | |
<Cell><Data ss:Type="Number">3</Data></Cell> | |
<Cell><Data ss:Type="String">Kab. Cianjur</Data></Cell> | |
<Cell><Data ss:Type="Number">1</Data></Cell> | |
<Cell><Data ss:Type="Number">1</Data></Cell> | |
</Row> | |
<Row ss:AutoFitHeight="0"> | |
<Cell><Data ss:Type="Number">4</Data></Cell> | |
<Cell><Data ss:Type="String">Kab. Bandung</Data></Cell> | |
<Cell><Data ss:Type="Number">1</Data></Cell> | |
<Cell><Data ss:Type="Number">4</Data></Cell> | |
</Row> | |
</Table> | |
<WorksheetOptions xmlns="urn:schemas-microsoft-com:office:excel"> | |
<PageSetup> | |
<Header x:Margin="0.3"/> | |
<Footer x:Margin="0.3"/> | |
<PageMargins x:Bottom="0.75" x:Left="0.7" x:Right="0.7" x:Top="0.75"/> | |
</PageSetup> | |
<Unsynced/> | |
<Selected/> | |
<Panes> | |
<Pane> | |
<Number>3</Number> | |
<ActiveRow>2</ActiveRow> | |
<ActiveCol>12</ActiveCol> | |
</Pane> | |
</Panes> | |
<ProtectObjects>False</ProtectObjects> | |
<ProtectScenarios>False</ProtectScenarios> | |
</WorksheetOptions> | |
</Worksheet> | |
</Workbook> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* Excel 2003 XML-Parser | |
* | |
* PHP library for parsing Microsoft Excel 2003 XML Spreadsheet | |
* http://gist.github.com/862741 | |
* | |
* Copyright (c) 2011 Faisalman <[email protected]> | |
* | |
* Permission is hereby granted, free of charge, to any person obtaining a copy | |
* of this software and associated documentation files (the "Software"), to deal | |
* in the Software without restriction, including without limitation the rights | |
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
* copies of the Software, and to permit persons to whom the Software is | |
* furnished to do so, subject to the following conditions: | |
* | |
* The above copyright notice and this permission notice shall be included in | |
* all copies or substantial portions of the Software. | |
* | |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
* THE SOFTWARE. | |
* | |
* @author Faisalman | |
* @copyright 2011 (c) Faisalman | |
* @example see example.php | |
* @license http://www.opensource.org/licenses/mit-license | |
* @link http://gist.github.com/862741 | |
* @package SimpleExcel | |
* @version 0.0.1 | |
*/ | |
class XML2003Parser | |
{ | |
/** | |
* Holds the parsed result | |
* @access private | |
* @var array | |
*/ | |
private $table_arr; | |
/** | |
* @param string $url Path to XML file (optional) | |
* @param bool $escape Set whether input had to be escaped from HTML tags, default to TRUE | |
* @return void | |
*/ | |
public function __construct($url = NULL, $escape = TRUE){ | |
if(isset($url)) $this->loadXMLFile($url,$escape); | |
} | |
/** | |
* Extract attributes from SimpleXMLElement object | |
* @access private | |
* @param object $attrs_obj | |
* @return array | |
*/ | |
private function getAttributes($attrs_obj){ | |
$attrs_arr = array(); | |
foreach($attrs_obj as $attrs){ | |
$attrs = (array)$attrs; | |
foreach($attrs as $attr){ | |
$attr_keys = array_keys($attr); | |
$attrs_arr[$attr_keys[0]] = $attr[$attr_keys[0]]; | |
} | |
} | |
return $attrs_arr; | |
} | |
/** | |
* Get data of the specified cell as an array | |
* @param int $row_num Row number | |
* @param int $col_num Column number | |
* @return mixed Returns an array or FALSE if cell doesn't exist | |
*/ | |
public function getCellData($row_num, $col_num){ | |
// check whether the cell exists | |
if(!isset($this->table_arr['table_contents'][$row_num-1]['row_contents'][$col_num-1])){ | |
return FALSE; | |
} | |
return $this->table_arr['table_contents'][$row_num-1]['row_contents'][$col_num-1]; | |
} | |
/** | |
* Get data of the specified column as an array | |
* @param int $col_num Column number | |
* @return mixed Returns an array or FALSE if table doesn't exist | |
*/ | |
public function getColumnData($col_num){ | |
$col_arr = array(); | |
if(!isset($this->table_arr['table_contents'])){ | |
return FALSE; | |
} | |
// get the specified column within every row | |
foreach($this->table_arr['table_contents'] as $row){ | |
array_push($col_arr,$row['row_contents'][$col_num-1]); | |
} | |
// return the array, if empty then return FALSE | |
return $col_arr; | |
} | |
/** | |
* Get data of the specified row as an array | |
* @param int $row_num Row number | |
* @return mixed Returns an array FALSE if row doesn't exist | |
*/ | |
public function getRowData($row_num){ | |
if(!isset($this->table_arr['table_contents'][$row_num-1]['row_contents'])){ | |
return FALSE; | |
} | |
$row = $this->table_arr['table_contents'][$row_num-1]['row_contents']; | |
$row_arr = array(); | |
// get the specified column within every row | |
foreach($row as $cell){ | |
array_push($row_arr,$cell); | |
} | |
// return the array, if empty then return FALSE | |
return $row_arr; | |
} | |
/** | |
* Get data of all cells as an array | |
* @return mixed Returns an array or FALSE if table doesn't exist | |
*/ | |
public function getTableData(){ | |
return isset($this->table_arr) ? $this->table_arr : FALSE; | |
} | |
/** | |
* Load the XML file to be parsed | |
* @param string $url Path to XML file | |
* @param bool $escape Set whether input had to be escaped from HTML tags, default to TRUE | |
* @return bool Returns TRUE if file exist and valid, FALSE if does'nt | |
* @todo Check for valid XML 2003 namespace | |
*/ | |
public function loadXMLFile($url, $escape = TRUE){ | |
$this->table_arr = array( | |
'doc_props' => array(), | |
'table_contents' => array() | |
); | |
// assign simpleXML object | |
if($simplexml_table = simplexml_load_file($url)){ | |
// check XML namespace and return if the loaded file isn't a valid XML 2003 spreadsheet | |
$xmlns = $simplexml_table->getDocNamespaces(); | |
if($xmlns['ss'] != 'urn:schemas-microsoft-com:office:spreadsheet'){ | |
return FALSE; | |
} | |
} else { | |
// when error loading file | |
return FALSE; | |
} | |
// extract document properties | |
$doc_props = (array)$simplexml_table->DocumentProperties; | |
$this->table_arr['doc_props'] = $doc_props; | |
$rows = $simplexml_table->Worksheet->Table->Row; | |
$row_num = 1; | |
// loop through all rows | |
foreach($rows as $row){ | |
$cells = $row->Cell; | |
$row_attrs = $row->xpath('@ss:*'); | |
$row_attrs_arr = $this->getAttributes($row_attrs); | |
$row_arr = array(); | |
$col_num = 1; | |
// loop through all row's cells | |
foreach($cells as $cell){ | |
// check whether ss:Index attribute exist | |
$cell_index = $cell->xpath('@ss:Index'); | |
// if exist, push empty value until the specified index | |
if(count($cell_index) > 0){ | |
$gap = $cell_index[0]-count($row_arr); | |
for($i = 1; $i < $gap; $i++){ | |
array_push($row_arr,array( | |
'row_num' => $row_num, | |
'col_num' => $col_num, | |
'datatype' => '', | |
'value' => '', | |
//'cell_attrs' => '', | |
//'data_attrs' => '' | |
)); | |
$col_num += 1; | |
} | |
} | |
// get all cell and data attributes | |
$cell_attrs = $cell->xpath('@ss:*'); | |
$cell_attrs_arr = $this->getAttributes($cell_attrs); | |
$data_attrs = $cell->Data->xpath('@ss:*'); | |
$data_attrs_arr = $this->getAttributes($data_attrs); | |
$cell_datatype = $data_attrs_arr['Type']; | |
// extract data from cell | |
$cell_value = (string)$cell->Data; | |
// filter from any HTML tags | |
if($escape) $cell_value = htmlspecialchars($cell_value); | |
// push column array | |
array_push($row_arr,array( | |
'row_num' => $row_num, | |
'col_num' => $col_num, | |
'datatype' => $cell_datatype, | |
'value' => $cell_value, | |
//'cell_attrs' => $cell_attrs_arr, | |
//'data_attrs' => $data_attrs_arr | |
)); | |
$col_num += 1; | |
} | |
// push row array | |
array_push($this->table_arr['table_contents'],array( | |
'row_num' => $row_num, | |
'row_contents' => $row_arr, | |
//'row_attrs' => $row_attrs_arr | |
)); | |
$row_num += 1; | |
} | |
// load succeed :) | |
return TRUE; | |
} | |
} | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hi donatj, thanks for the heads up, I do account cell index already (see line #185) but seems I forgot to check row index as well :P
Edit: this repo has been moved to http://github.com/faisalman/simple-excel-php since 3 months ago, if you don't mind please give your feedback/suggestion there.
Thanks a lot ;)