Created
July 4, 2013 07:31
-
-
Save renepenner/7279041f7fe2ef708a30 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
class SpinningItem{ | |
public function __construct($content){$this->content = $content;} | |
} | |
class TextItem extends SpinningItem{} | |
class OptionsItem extends SpinningItem{} | |
function parse_spinning_string($string){ | |
if(preg_match_all('/\{((?>[^{}]+)|(?R))*\}/', $string, $matches)){ | |
foreach($matches[0] as $key => $match){ | |
$string = preg_replace('/'.preg_quote($match,'/').'/', '***~'.$key.'~***', $string,1); | |
} | |
$splits = preg_split('/\*\*\*~[0-9]+~\*\*\*/', $string); | |
$out = array(); | |
foreach($splits as $key => $split){ | |
if($split != ''){ | |
$out[] = new TextItem($split); | |
} | |
if(isset($matches[0][$key])){ | |
$tmp = substr($matches[0][$key],1,-1); | |
preg_match_all('/\{((?>[^{}]+)|(?R))*\}/', $tmp, $submatches); | |
foreach($submatches[0] as $key => $match){ | |
$tmp = preg_replace('/'.preg_quote($match,'/').'/', '***~'.$key.'~***', $tmp,1); | |
} | |
$tmp = preg_split('/\|/', $tmp); | |
foreach($tmp as $k => $v){ | |
$tmp[$k] = preg_replace_callback('/\*\*\*~([0-9]+)~\*\*\*/', function($match)use($submatches){ return $submatches[0][$match[1]]; }, $v); | |
$tmp[$k] = parse_spinning_string($tmp[$k]); | |
} | |
$out[] = new OptionsItem($tmp); | |
} | |
} | |
return $out; | |
} | |
return $string; | |
} | |
function getShuffleText($spinning_tree) | |
{ | |
$text = ''; | |
foreach($spinning_tree as $p){ | |
if($p instanceof TextItem) | |
$text .= $p->content; | |
if($p instanceof OptionsItem){ | |
$tmp = $p->content[rand(0,count($p->content)-1)]; | |
$text .= is_string($tmp) ? $tmp : getShuffleText($tmp); | |
} | |
} | |
return $text; | |
} | |
function getshingleHashes($text, $m){ | |
$tokens = explode(' ', $text); | |
$shingles = array(); | |
$shingles_hash = array(); | |
for ($i = 0; $i < count($tokens) - $m+1; $i++) { | |
$shingles[] = implode(" ", array_slice($tokens, $i, $m)); | |
} | |
foreach ($shingles as $shigle) { | |
$shingles_hash[md5($shigle)] = $shigle; | |
} | |
return $shingles_hash; | |
} | |
function getDifferency($s1, $s2, $m=3){ | |
$shingles_hash_s1 = getshingleHashes($s1, $m); | |
$shingles_hash_s2 = getshingleHashes($s2, $m); | |
$match = 0; | |
foreach ($shingles_hash_s1 as $hash => $content) { | |
if( isset($shingles_hash_s2[$hash]) ){ | |
$match++; | |
} | |
} | |
return ($match*100)/(max(count($shingles_hash_s1), count($shingles_hash_s1))-1); | |
} | |
function getAllText($spinning_tree){ | |
$text = ''; | |
$placeholder = array(); | |
$p_id = 0; | |
foreach($spinning_tree as $item){ | |
if($item instanceof TextItem){ | |
$text .= $item->content; | |
} | |
if($item instanceof OptionsItem){ | |
$text .= '###~placeholder_'.$p_id.'~###'; | |
$placeholder[$p_id++] = $item->content; | |
} | |
} | |
$texte = array($text); | |
foreach($placeholder as $p_id => $p){ | |
$tmp_texte = array(); | |
foreach($p as $i){ | |
foreach($texte as $t_id => $t){ | |
if(is_string($i)){ | |
$tmp_texte[] = preg_replace('/###~placeholder_'.$p_id.'~###/', $i, $t, 1); | |
}elseif(is_array($i)){ | |
foreach(getAllText($i) as $sub_i){ | |
$tmp_texte[] = preg_replace('/###~placeholder_'.$p_id.'~###/', $sub_i, $t, 1); | |
} | |
} | |
} | |
} | |
$texte = $tmp_texte; | |
} | |
return $texte; | |
} | |
//$string = "Ein {schöner|toller|netter|interessanter} {Punkt|Fakt|Vorteil|Vorzug|Nutzen} ist das {ein|der} Wintergarten in ###STADT### nicht nur {gut aussieht|toll ausschaut}, sondern auch {den Wert|die Wertigkeit} {Ihrer Immobilie| Ihres Hauses| Ihrer Wohnung} {steigert|erhöht|vermehrt}."; | |
$string = " | |
{Wenn du|Du bist} {auf der Suche|im Findungsprozess} nach {einem|dem richtigen|dem perfekten} {Ausbildungsplatz|Ausbildungsstelle} | |
in ~~~CITY~~~ bist, dann gehörst du zu den {glücklichen|glückseligen|fröhlichen} die sich {unter|aus} ~~~CITY-COUNT~~~ | |
{Metallbaufirmen|Metallfirmen|Metall Firmen|Metallbauunternehmen|Metall Unternehmen|Metallverarbeitenden Unternhemen| Metallverarbeitenden Firmen} | |
aus ~~~CITY~~~ {eine Firma|ein Unternehmen|einen Laden|eine Geschäftsstelle|einen Betrieb} {aussuchen|auswählen} kannst. | |
"; | |
$string = trim(preg_replace("/[\n\r]+/", " ", $string)); | |
$spinning_tree = parse_spinning_string($string); | |
$texts = getAllText($spinning_tree); | |
$final_texts = array(); | |
$placeholder = array( | |
array('CITY' => 'Bielefeld', 'CITY-COUNT' => '234'), | |
array('CITY' => 'Berlin', 'CITY-COUNT' => '73'), | |
array('CITY' => 'Köln', 'CITY-COUNT' => '23'), | |
array('CITY' => 'Hamburg', 'CITY-COUNT' => '410') | |
); | |
$placeholder_pos = 0; | |
while(count($texts) > 0){ | |
$text = array_pop($texts); | |
foreach($placeholder[$placeholder_pos] as $key => $value){ | |
$text = preg_replace('/~~~'.$key.'~~~/', $value, $text); | |
} | |
$diff = 0; | |
foreach($final_texts as $final_text){ | |
$d = getDifferency($text, $final_text); | |
if($d > $diff) $diff = $d; | |
} | |
if($diff < 30){ | |
$final_texts[] = $text; | |
$placeholder_pos++; | |
if($placeholder_pos >= count($placeholder)) | |
break; | |
} | |
} | |
print_r($final_texts); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment