Skip to content

Instantly share code, notes, and snippets.

@renepenner
Created July 4, 2013 07:31
Show Gist options
  • Save renepenner/7279041f7fe2ef708a30 to your computer and use it in GitHub Desktop.
Save renepenner/7279041f7fe2ef708a30 to your computer and use it in GitHub Desktop.
<?php
class SpinningItem{
public function __construct($content){$this->content = $content;}
}
class TextItem extends SpinningItem{}
class OptionsItem extends SpinningItem{}
function parse_spinning_string($string){
if(preg_match_all('/\{((?>[^{}]+)|(?R))*\}/', $string, $matches)){
foreach($matches[0] as $key => $match){
$string = preg_replace('/'.preg_quote($match,'/').'/', '***~'.$key.'~***', $string,1);
}
$splits = preg_split('/\*\*\*~[0-9]+~\*\*\*/', $string);
$out = array();
foreach($splits as $key => $split){
if($split != ''){
$out[] = new TextItem($split);
}
if(isset($matches[0][$key])){
$tmp = substr($matches[0][$key],1,-1);
preg_match_all('/\{((?>[^{}]+)|(?R))*\}/', $tmp, $submatches);
foreach($submatches[0] as $key => $match){
$tmp = preg_replace('/'.preg_quote($match,'/').'/', '***~'.$key.'~***', $tmp,1);
}
$tmp = preg_split('/\|/', $tmp);
foreach($tmp as $k => $v){
$tmp[$k] = preg_replace_callback('/\*\*\*~([0-9]+)~\*\*\*/', function($match)use($submatches){ return $submatches[0][$match[1]]; }, $v);
$tmp[$k] = parse_spinning_string($tmp[$k]);
}
$out[] = new OptionsItem($tmp);
}
}
return $out;
}
return $string;
}
function getShuffleText($spinning_tree)
{
$text = '';
foreach($spinning_tree as $p){
if($p instanceof TextItem)
$text .= $p->content;
if($p instanceof OptionsItem){
$tmp = $p->content[rand(0,count($p->content)-1)];
$text .= is_string($tmp) ? $tmp : getShuffleText($tmp);
}
}
return $text;
}
function getshingleHashes($text, $m){
$tokens = explode(' ', $text);
$shingles = array();
$shingles_hash = array();
for ($i = 0; $i < count($tokens) - $m+1; $i++) {
$shingles[] = implode(" ", array_slice($tokens, $i, $m));
}
foreach ($shingles as $shigle) {
$shingles_hash[md5($shigle)] = $shigle;
}
return $shingles_hash;
}
function getDifferency($s1, $s2, $m=3){
$shingles_hash_s1 = getshingleHashes($s1, $m);
$shingles_hash_s2 = getshingleHashes($s2, $m);
$match = 0;
foreach ($shingles_hash_s1 as $hash => $content) {
if( isset($shingles_hash_s2[$hash]) ){
$match++;
}
}
return ($match*100)/(max(count($shingles_hash_s1), count($shingles_hash_s1))-1);
}
function getAllText($spinning_tree){
$text = '';
$placeholder = array();
$p_id = 0;
foreach($spinning_tree as $item){
if($item instanceof TextItem){
$text .= $item->content;
}
if($item instanceof OptionsItem){
$text .= '###~placeholder_'.$p_id.'~###';
$placeholder[$p_id++] = $item->content;
}
}
$texte = array($text);
foreach($placeholder as $p_id => $p){
$tmp_texte = array();
foreach($p as $i){
foreach($texte as $t_id => $t){
if(is_string($i)){
$tmp_texte[] = preg_replace('/###~placeholder_'.$p_id.'~###/', $i, $t, 1);
}elseif(is_array($i)){
foreach(getAllText($i) as $sub_i){
$tmp_texte[] = preg_replace('/###~placeholder_'.$p_id.'~###/', $sub_i, $t, 1);
}
}
}
}
$texte = $tmp_texte;
}
return $texte;
}
//$string = "Ein {schöner|toller|netter|interessanter} {Punkt|Fakt|Vorteil|Vorzug|Nutzen} ist das {ein|der} Wintergarten in ###STADT### nicht nur {gut aussieht|toll ausschaut}, sondern auch {den Wert|die Wertigkeit} {Ihrer Immobilie| Ihres Hauses| Ihrer Wohnung} {steigert|erhöht|vermehrt}.";
$string = "
{Wenn du|Du bist} {auf der Suche|im Findungsprozess} nach {einem|dem richtigen|dem perfekten} {Ausbildungsplatz|Ausbildungsstelle}
in ~~~CITY~~~ bist, dann gehörst du zu den {glücklichen|glückseligen|fröhlichen} die sich {unter|aus} ~~~CITY-COUNT~~~
{Metallbaufirmen|Metallfirmen|Metall Firmen|Metallbauunternehmen|Metall Unternehmen|Metallverarbeitenden Unternhemen| Metallverarbeitenden Firmen}
aus ~~~CITY~~~ {eine Firma|ein Unternehmen|einen Laden|eine Geschäftsstelle|einen Betrieb} {aussuchen|auswählen} kannst.
";
$string = trim(preg_replace("/[\n\r]+/", " ", $string));
$spinning_tree = parse_spinning_string($string);
$texts = getAllText($spinning_tree);
$final_texts = array();
$placeholder = array(
array('CITY' => 'Bielefeld', 'CITY-COUNT' => '234'),
array('CITY' => 'Berlin', 'CITY-COUNT' => '73'),
array('CITY' => 'Köln', 'CITY-COUNT' => '23'),
array('CITY' => 'Hamburg', 'CITY-COUNT' => '410')
);
$placeholder_pos = 0;
while(count($texts) > 0){
$text = array_pop($texts);
foreach($placeholder[$placeholder_pos] as $key => $value){
$text = preg_replace('/~~~'.$key.'~~~/', $value, $text);
}
$diff = 0;
foreach($final_texts as $final_text){
$d = getDifferency($text, $final_text);
if($d > $diff) $diff = $d;
}
if($diff < 30){
$final_texts[] = $text;
$placeholder_pos++;
if($placeholder_pos >= count($placeholder))
break;
}
}
print_r($final_texts);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment