Skip to content

Instantly share code, notes, and snippets.

@nerdsrescueme
Created October 13, 2011 20:17
Show Gist options
  • Select an option

  • Save nerdsrescueme/1285380 to your computer and use it in GitHub Desktop.

Select an option

Save nerdsrescueme/1285380 to your computer and use it in GitHub Desktop.
HTML
<?php
namespace Atom\Parser;
class Html {
private $separator = '-{}-';
private $single_tags = 'meta|img|hr|br|link|!--|!DOCTYPE|input';
//-- Don't edit below this --
private $html;
private $level = -1;
public $level_array = array();
function __construct($html)
{
$this->html = $this->clean($html);
}
private function get_element($data)
{
$data = explode($this->separator, $data);
$data = explode('-', $data[1]);
return $this->level_array[$data[0]][$data[1]];
}
private function to_html($string, $level)
{
$items = $this->get_replacements($string);
foreach($items as $item)
{
$elem = $this->get_element($item);
$string = str_replace(
$item,
$level === 0 ? $elem['htmlText'] : '<'.$elem['tag'].$elem['attr'].'>'.$elem['htmlText'].'</'.$elem['tag'].'>',
$string
);
}
return $string;
}
private function replace_single_tags()
{
$result = preg_match_all('/<('.$this->single_tags.')(.[^><]*)?>/is', $this->html, $matches);
if($result > 0)
{
foreach ($matches[0] as $value)
{
$this->html = str_replace($value, '', $this->html);
}
}
}
private function replace_simple_tags()
{
$result = preg_match_all('/<(.[^\s]*)(.[^><]*)?>(.[^<]*)?<\/\1>/is', $this->html, $matches);
if ($result>0)
{
$this->level++ and $level = array();
foreach($matches[0] as $id => $value)
{
if($this->level === 0)
{
$html_text = $value;
}
else
{
$html_text=$this->to_html($matches[3][$id], $this->level-1);
}
$level[] = array('str' => $value, 'rep' => $this->separator.$this->level.'-'.$id.$this->separator, 'tag' => $matches[1][$id], 'level' => $this->level, 'text' => $matches[3][$id], 'attr' => $matches[2][$id] , 'htmlText' => $html_text);
$this->html = str_replace($value, $this->separator.$this->level.'-'.$id.$this->separator, $this->html);
}
$this->level_array[$this->level] = $level;
}
}
private function replace_remaining_tags()
{
$result = preg_match_all('/<(.[^\s]*)(.[^><]*)?>(.*)?<\/\1>/is', $this->html, $matches);
if($result > 0)
{
$this->level++ and $level = array();
foreach($matches[0] as $id => $value)
{
if($this->level === 0)
{
$html_text = $matches[3][$id];
}
else
{
$html_text = $this->to_html($matches[3][$id], $this->level-1);
}
$level[] = array('str' => $value, 'rep' => $this->separator.$this->level.'-'.$id.$this->separator, 'tag' => $matches[1][$id], 'level' => $this->level, 'text' => $matches[3][$id], 'attr' => $matches[2][$id] , 'htmlText' => $html_text);
$this->html = str_replace($value, $this->separator.$this->level.'-'.$id.$this->separator, $this->html);
}
$this->level[$this->level] = $level;
}
}
private function simple_tags_exist()
{
return preg_match('/<(.[^\s]*)(.[^><]*)?>(.[^<]*)?<\/\1>/is', $this->html) > 0;
}
private function single_tags_exist()
{
return preg_match('/<('.$this->single_tags.')(.[^><]*)?>/is', $this->html) > 0;
}
private function clean($html)
{
// Strip characters unnecessary to parsing.
$html = str_replace(array("\n","\r",'&nbsp;',"\t"),'',$html);
// Strip extra whitespace
$html = preg_replace('| +|', ' ', $html);
return $html;
}
public function to_array($html = '')
{
if($html!='')
{
$this->html = $this->clean($html);
}
while($this->simple_tags_exist() or $this->single_tags_exist())
{
$this->replace_single_tags();
$this->replace_simple_tags();
}
$this->replace_remaining_tags();
return $this->get_array($this->html);
}
private function get_replacements($data)
{
$final = array();
$data = explode($this->separator, $data);
$count = count($data);
for($i=0; $i<($count-1)/2; $i++)
{
$final[] = $this->separator.$data[$i*2+1].$this->separator;
}
return $final;
}
private function starts_with_text($data)
{
$data = substr(trim(str_replace(array("\n","\r"),'',$data)), 0, 1);
return $data != '<' and $data != '>';
}
private function in_array(array $array, $string)
{
foreach($array as $item)
{
if(strpos($str, $item) !== false)
{
return true;
}
}
return false;
}
private function get_array($html, $parent = '')
{
$final=array();
if(strpos($html, $this->separator) !==false )
{
$replacements = $this->get_replacements($html);
foreach($replacements as $replacement)
{
$repl = $replacement;
$replacement = explode($this->separator, $replacement);
$replacement = explode('-',$replacement[1]);
$element = $this->level_array[$replacement[0]][$replacement[1]];
$this->level_array[$replacement[0]][$replacement[1]]['parent'] = $parent;
$final[] = array( 'tag' => $element['tag'], 'innerHTML' => $element['htmlText'], 'repl' => $element['rep'],'stratr' => $element['attr'], 'level' => $element['level'], 'parent' => $parent, 'children' => $this->get_array($element['text'], $repl));
}
}
return $final;
}
public function loadNode($rep)
{
$element = $this->get_element($rep);
return array( 'tag' => $element['tag'], 'innerHTML' => $element['htmlText'], 'repl' => $element['rep'], 'stratr' => $element['attr'], 'level' => $element['level'], 'parent' => $element['parent']);
}
}
@akhlaqs
Copy link
Copy Markdown

akhlaqs commented Apr 14, 2016

what do this code

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment