Skip to content

Instantly share code, notes, and snippets.

@xeoncross
Created May 2, 2011 03:43
Show Gist options
  • Save xeoncross/951158 to your computer and use it in GitHub Desktop.
Save xeoncross/951158 to your computer and use it in GitHub Desktop.
PHP functions
<?php
/**
* Return an array of locales based on HTTP "Accept-Language" header.
*
* @return array
*/
function accept_languages()
{
if(preg_match_all('/[\-a-z]{2,}/i', getenv('HTTP_ACCEPT_LANGUAGE'), $matches))
{
return $matches[0];
}
return array();
}
/*
en-US,en;q=0.8,en-GB;q=0.6,de;q=0.4,ja;q=0.2
Array
(
[0] => en-US
[1] => en
[2] => en-GB
[3] => de
[4] => ja
)
*/
<?php
/**
* Class Distance Levenstein
* https://github.com/wassimchegham/Levenstein
*/
class distanceLevenstein{
private $_C;
private $_OP;
private $_strA;
private $_strB;
private $_strA_len;
private $_strB_len;
public function __construct($strA="", $strB="")
{
$this->_C = array();
$this->_OP = array();
$this->_strA = $strA;
$this->_strB = $strB;
$this->_strA_len = strlen($this->_strA)+1;
$this->_strB_len = strlen($this->_strB)+1;
}
public function doLevensteinDistance()
{
// initialisation
for( $i=0; $i<=$this->_strA_len; $i++ )
{
$this->_C[$i][0] = $i; // 0<=i<=N; C[i, 0] <-- i
}
for( $j=1; $j<=$this->_strB_len; $j++ )
{
$this->_C[0][$j] = $j; // 1<=j<=M; C[0, j] <-- j
}
// corps
for( $i=1; $i<=$this->_strA_len; $i++ )
{
for( $j=1; $j<=$this->_strB_len; $j++ )
{
$x = $this->_C[$i-1][$j]+1;
$y = $this->_C[$i][$j-1]+1;
$ai = $this->_charAt($this->_strA, $i);
$bj = $this->_charAt($this->_strB, $j);
$l = ( $ai == $bj ) ? 0 : 1;
$z = $this->_C[$i-1][$j-1]+$l;
$this->_C[$i][$j] = min( $x, min($y, $z) );
//----
if( $this->_C[$i][$j] == $x )
{
$this->_OP[$i][$j] = "supprimer('".$ai."')";
}
else if( $this->_C[$i][$j] == $y )
{
$this->_OP[$i][$j] = "ajouter('".$bj."')";
}
else {
if( $ai == $bj )
{
$this->_OP[$i][$j] = "rien()";
}
else {
$this->_OP[$i][$j] = "echanger('".$ai."' avec '".$bj."')";
}
}
}
}
}
public function getMatriceC()
{
foreach( $this->_C as $k=>$v )
{
echo implode(' ', $v)."\n";
}
}
public function getNbMinOperations()
{
return $this->_C[$this->_strA_len][$this->_strB_len];
}
public function getMatriceOP()
{
$i=$this->_strA_len;
$j=$this->_strB_len;
$seq="";
while( $i != 0 and $j != 0 )
{
if( preg_match('/^supprimer/', $this->_OP[$i][$j]) != 0 )
{
$seq = $this->_OP[$i][$j] . ', ' . $seq;
$i--;
}
else if( preg_match('/^ajouter/', $this->_OP[$i][$j]) != 0 ) {
$seq = $this->_OP[$i][$j] . ', ' . $seq;
$j--;
}
else if( preg_match('/^echanger/', $this->_OP[$i][$j]) != 0 )
{
$seq = $this->_OP[$i][$j] . ', ' . $seq;
$i--;
$j--;
}
else if( preg_match('/^rien/', $this->_OP[$i][$j]) != 0 ){
$i--;
$j--;
}
}
return $seq;
}
private function _charAt($string, $index)
{
return substr($string, $index-1, 1);
}
}
//---
$chaineA = ( isset($_GET['chaineA']) )?$_GET['chaineA']:'';
$chaineB = ( isset($_GET['chaineB']) )?$_GET['chaineB']:'';
$dl = new distanceLevenstein($chaineA, $chaineB);
echo "<form action='' method='get' >";
echo "<h2>chaine A: <input name='chaineA' value='".$chaineA."' /></h2>";
echo "<h2>chaine B: <input name='chaineB' value='".$chaineB."' /></h2>";
echo "<input type='submit' value='calculer la distance d edition' />";
echo "</form>";
$dl->doLevensteinDistance($chaineA, $chaineB);
echo "<h3>Matrice C:</h3>";
echo "<pre>";
echo $dl->getMatriceC();
echo "</pre>";
echo sprintf("<h3>Nombre minimal d'operations pour passer de '%s' a '%s': %s</h3>", $chaineA, $chaineB, $dl->getNbMinOperations());
echo "<h3>Operations a effectuer:</h3>";
echo "<pre>";
echo $dl->getMatriceOP();
echo "</pre>";
?>
Author: lewis [ at t] hcoms [d dot t] co [d dot t] uk.
Those of you trying to use split for CSV, it won't always work as expected. Instead, try using a simple stack method:
<?php
/**
* Create a 2D array from a CSV string
*
* @param mixed $data 2D array
* @param string $delimiter Field delimiter
* @param string $enclosure Field enclosure
* @param string $newline Line seperator
* @return
*/
function parse($data, $delimiter = ',', $enclosure = '"', $newline = "\n"){
$pos = $last_pos = -1;
$end = strlen($data);
$row = 0;
$quote_open = false;
$trim_quote = false;
$return = array();
// Create a continuous loop
for ($i = -1;; ++$i){
++$pos;
// Get the positions
$comma_pos = strpos($data, $delimiter, $pos);
$quote_pos = strpos($data, $enclosure, $pos);
$newline_pos = strpos($data, $newline, $pos);
// Which one comes first?
$pos = min(($comma_pos === false) ? $end : $comma_pos, ($quote_pos === false) ? $end : $quote_pos, ($newline_pos === false) ? $end : $newline_pos);
// Cache it
$char = (isset($data[$pos])) ? $data[$pos] : null;
$done = ($pos == $end);
// It it a special character?
if ($done || $char == $delimiter || $char == $newline){
// Ignore it as we're still in a quote
if ($quote_open && !$done){
continue;
}
$length = $pos - ++$last_pos;
// Is the last thing a quote?
if ($trim_quote){
// Well then get rid of it
--$length;
}
// Get all the contents of this column
$return[$row][] = ($length > 0) ? str_replace($enclosure . $enclosure, $enclosure, substr($data, $last_pos, $length)) : '';
// And we're done
if ($done){
break;
}
// Save the last position
$last_pos = $pos;
// Next row?
if ($char == $newline){
++$row;
}
$trim_quote = false;
}
// Our quote?
else if ($char == $enclosure){
// Toggle it
if ($quote_open == false){
// It's an opening quote
$quote_open = true;
$trim_quote = false;
// Trim this opening quote?
if ($last_pos + 1 == $pos){
++$last_pos;
}
}
else {
// It's a closing quote
$quote_open = false;
// Trim the last quote?
$trim_quote = true;
}
}
}
return $return;
}
?>
This *should* work for any valid CSV string, regardless of what it contains inside its quotes (using RFC 4180). It should also be faster than most of the others I've seen. It's very simple in concept, and thoroughly commented.
<?php
/**
* Fix invalid HTML fragments
*
* @param string $html fragment
* @return string
*/
function fix_html($html)
{
// DOMDocument triggers lots of warnings
$level = error_reporting(E_ERROR);
$dom = new DOMDocument;
// Wrap in a marker div
$dom->loadHTML('<div>' . $html . '</div>');
$html = substr($dom->saveXML($dom->getElementsByTagName('div')->item(0)), 5, -6);
// Restore the error level
error_reporting($level);
return $html;
}
?>
@xeoncross
Copy link
Author

If you have a string of HTML from a comment form or whatever, you can't trust the user closed (or opened) all tags correctly. This script insures that the HTML is valid and not going to mess-up your layout if you print it. It DOES NOT protect against malicious HTML like the kind used in XSS.

@adamramadhan
Copy link

oh okay.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment