ace-dent · January 2, 2016 15:49
diff --git a/string_pack.php b/string_pack.php
 <?php
 	/**
 	  * MAGICSTRINGPACKER
 	  *
 	  * Pack small strings or Codes (e.g. 2 letter ISO country codes), into a compact 'magic string' minimized by:
 	  * 1> Running Codes into each other (eliminating matching first and last letters) 'AB'+'BA' => 'ABA' ;
 	  * 2> Avoiding spaces / code break characters, by joining Codes 'AA,BB' => 'AABB';
 	  * Only where 1&2 generates allowable sequences (unique, with no collisions).
 	  * This is useful for matching data against a compact reference (magic string), where hashing isn't an option.
 	  * E.g. Checking country codes in a spreadsheet against the list of EU countries.
 	  *
 	  * This software is provided 'as-is', without any express or implied warranty. 
 	  *		
 	  * @author Andrew C.E. Dent
 	  *
 	  * @license GPL
 	  * @license http://opensource.org/licenses/gpl-license.php GNU Public License
 	  * @version 1.2.1
 	  */

 	// The_Beginning:
 	echo "Start... \n";
 	$verbose = true; // Provide useful feedback for testing. Set to False for batch runs.
 	
 	
 	// Small strings -or codes- that will be packed
 	$codes_to_pack = array( 
 		// Country codes for EU VAT
 		"AT","BE","BG","CY","CZ","DE","DK","EE","ES",
 		"FI","FR","\GB","EL","HU","IE","IT","LT","LU", // VAT codes. Greece uses language code EL not country code GR
 		"LV","MT","NL","PL","PT","RO","SE","SI","SK",
 		//"EU", // General non-domicile EU registration
 		"HR", // Croatia joined EU 1st July 2013
 		"UK","IM","GR", // Extra country codes for checking Shipping destination against 
 		"MC" //  Monaco, French dependency
 	);
 	// To select the most commonly occuring code and fix its position at the start of the magic string
 	// prefix with a slash ('GB' > '\GB'). This hack prevents string matches and may reduce compression.
 	// Any slashes are ignored for statistics and stripped out at the end of the process.
 	// This is useful in practice when matching data against the magic string, for early matching and escape.
 	
 		
 	// Strings that are allowable in resulting compact form (unused Codes)
 	// These codes may be generated when we remove the break character between two input codes
 	// e.g. AA+BB > AABB, as 'AB' substring does not collide with another valid code.
 	$codes_allowed = array(
 		// Subset of codes -not- in use from ISO 3166-1:2006.
 		// Selection based on EU countries. Some commented out for smaller array and speed gain.
 		// "AB","AC","AH","AJ","AK","AP",//"AV","AY",
 		"BC","BK","BL","BP","BQ","BU",//"BX",
 		"CB","CE","CJ","CP","CQ","CT",//"CW",
 		"EA","EB","ED","EF","EI","EJ","EK",/*"EL" - Greece*/ "EM","EN","EO","EP","EQ",//"EV","EW","EX","EY","EZ",
 		"GC","GJ","GK","GO",//"GV","GX","GZ",
 		"IA","IB","IC","IF","IG","IH","II","IJ","IK","IP","IU",//"IV","IW","IX","IY","IZ",
 		"KA","KB","KC","KD","KF","KJ","KK","KL","KO","KQ","KS","KT","KU",//"KV","KX",
 		"LD","LE","LF","LG","LH","LJ","LL","LM","LN","LO","LP","LQ",//"LW","LX","LZ",
 		"MB","MF","MI","MJ",
 		"OA","OB","OC","OD","OE","OF","OG","OH","OI","OJ","OK","OL","ON","OP","OQ","OR","OS","OT","OU",//"OV","OW","OX","OY","OZ",
 		//"QB", "QC", "QD", "QE", "QF", "QG", "QH", "QI", "QJ", "QK", "QL"
 		"RA","RB","RC","RD","RF","RG","RH","RI","RJ","RK","RL","RM","RN","RP","RQ","RR","RT",//"RV","RX","RY","RZ",
 		"SF","SP","SQ","SS","SU",//"SW","SX",
 		"TA","TB","TE","TI","TQ","TS","TU",//"TX","TY",
 		"UB","UC","UD","UE","UF","UH","UI","UJ","UK","UL","UN","UO","UP","UQ","UR","UT","UU",//"UV","UW","UX",
 		"VB","VD","VF","VH","VJ","VK","VL","VM","VO","VP","VQ","VR","VS","VT",//"VV","VW","VX","VY","VZ",
 		"YA","YB","YC","YD","YF","YG","YH","YI","YJ","YK","YL","YM","YN","YO","YP","YQ","YR","YS",//"YV","YW","YX","YY","YZ",
 		"ZB","ZC","ZD","ZE","ZF","ZG","ZH","ZI","ZJ","ZK","ZL","ZN","ZO","ZP","ZQ","ZR","ZS","ZT","ZU",//"ZV","ZX","ZY"
 		
 		// Include the following historically 'reserved' and private codes, as we know they wont appear in our data set.
 		// Some commented out for smaller array and speed gain.
 		//"AA", "OO"
 		"CS","TP","YU","EU"
 		//"QM", "QN", "QO", "QP", "QQ", "QR", "QS", "QT", "QU", // "QV", "QW", "QX", "QY", "QZ",
 		//"XA", "XB", "XC", "XD", "XE", "XF", "XG", "XH", "XI", "XJ", "XK", "XL", "XM", "XN", 
 		//"XO", "XP", "XQ", "XR", "XS", "XT", "XU", // "XV", "XW", "XX", "XY", "XZ",
 		//"ZZ"
 	);
 	
 	// Add in the Codes to Pack as Allowable duplicates in the final magic string.
 	// (Remove following line if each code can only occur once).
 	$codes_allowed = array_merge($codes_allowed, $codes_to_pack);
 	
 	
 	// Benchmarking - Starting details
 	$start_codes = implode(".", $codes_to_pack); // Starting string with delimited Codes
 	$start_length = strlen(stripslashes($start_codes)); // Ignore slashes for Stats- stripped out at the end
 	$total_codes = count($codes_to_pack);
 	if ($verbose) {
 		echo "\n";
 		echo $total_codes . " codes using " . $start_length . " chars (~" . round(8*$start_length/$total_codes,1) . " bits/code) -> \n";
 		echo $start_codes . "\n";
 	}

 	//	@TODO: We should scan and clean up arrays to remove duplicates.

 	// Remove Allowed codes that cannot be matched against Codes to Pack - faster later
 	if ($verbose) echo "\nReduce list of Allowed Codes that cannot be matched: ";
 	foreach ($codes_allowed as &$code) {
 		$last = substr($code,-1,1) ; // Last letter of string
 		$check = false;
 		foreach ($codes_to_pack as $code_check) {
 			$first_check = substr($code_check,0,1) ; // First letter of string
 			if ($last == $first_check) $check = true; // The code was matched
 		}
 		unset($code_check); // Break the reference with the last element
 		if ($check == false) {
 			if ($verbose) echo $code. " - ";
 			$code = null;
 		}
 	}
 	unset($code); // Break the reference with the last element
 	
 	
 	// Remove Codes that cannot be merged in Round 1- faster matching later
 	/* Currently this approach creates local areas of low compression in the final string.
 	It could be possible to transform these strings, substituting characters.
 	Then requires code overhead to decode.
 	*/
 	
 	if ($verbose) echo "\n\nIgnore codes with no matching letters for first round: ";
 	$removed_codes = array (); // We will store removed codes to add back to the final output
 	foreach ($codes_to_pack as &$code) {
 		$first = substr($code,0,1) ; // First letter of string
 		$last = substr($code,-1,1) ; // Last letter of string
 		$check = false;
 		foreach ($codes_to_pack as $code_check) {
 			$first_check = substr($code_check,0,1) ; // First letter of string
 			$last_check = substr($code_check,-1,1) ; // Last letter of string
 			if ($first == $last_check || $last == $first_check) $check = true; // The code was matched
 		}
 		unset($code_check); // Break the reference with the last element
 		if ($check == false) {
 			if ($verbose) echo $code. " (". $first . $last . ") - ";
 			$removed_codes[] = $code;
 			$code = null;
 		}
 	}
 	unset($code); // Break the reference with the last element
 	if ($verbose) echo "\n";
 	
 	// Round 1 - Merge strings by matching (and combining) first and last letters
 	/* Current algorithm is quite lazy. A 'greedy' approach may yield higher local compression:
 	e.g. 	IESEE > IE ES SE EE > 10bits/code
 			SIEESELV > SI IE EE ES SE EL LV > 9.14bits/code
 			BGBEESIMCZ > BG GB BE EE ES SI IM MC CZ > 8.89bits/code 
 			
 	 In testing, where the numbers of codes to compact are small, a greedy approach yields more 
 	 orphan codes that are uncompacted, balancing out any gains of the few long runs. Every merge saves 2 bytes,
 	 whereas in Round 2, each join only saves 1 byte. It seems to be a better strategy to be lazy...
 	*/
 	
 	$input = $codes_to_pack;
 	for ($i = 1; $i <= 5; $i++) {
 		shuffle($input); // Randomize order of strings in each sweep
 		$result = array();
 		
 		foreach ($input as &$code) {
 			$first = substr($code,0,1) ; // First letter of string
 			$last = substr($code,-1,1) ; // Last letter of string
 			foreach ($input as &$code_check) {
 				$first_check = substr($code_check,0,1) ; // First letter of string
 				$last_check = substr($code_check,-1,1) ; // Last letter of string
 				if ($last == $first_check && $code != $code_check ) {
 					// Check this match first, to prefer 'GB+BG>GBG' vs 'GB+BG>BGB'
 					$result[] = $code . substr($code_check,1);
 					$code = null; 
 					$code_check = null;
 					break;
 				} else if ($first == $last_check && $code != $code_check ) {
 					$result[] = $code_check . substr($code,1);
 					$code = null; 
 					$code_check = null;
 					break;
 				}
 			}
 			unset($code_check); // Break the reference with the last element
 		}
 		unset($code); // Break the reference with the last element
 		
 		// Tidy up - Find any codes that weren't matched and add to the $results array for another pass
 		foreach ($input as &$code) {
 			if ($code != null) {
 				$result[] = $code;
 				$code = null; 
 			}
 		}

 		if ($verbose) {
 			echo "\n Run-".$i." : ";
 			echo implode(".", $result);
 		}
 		
 		unset($input);
 		$input = $result;
 		unset($result);
 	}
 	
 	// Add back in the unmatching codes removed at the start
 	$codes_to_pack = array_merge($codes_to_pack, $removed_codes); // Fix the original array
 	$result = array_merge($input, $removed_codes); // Working array
 	unset($input);
 	unset($removed_codes);
 	
 	
 	if ($verbose) echo "\n *** \n";
 	// Last round - remove space between codes, if consecutive letters are in Allowed list

 	$input = $result;
 	
 	for ($i = 1; $i <= 20; $i++) {
 		shuffle($input);
 		$result = array();
 		
 		foreach ($input as &$code) {
 			$last = substr($code,-1,1) ; // Last letter of string
 			foreach ($input as &$code_check) {
 				$first_check = substr($code_check,0,1) ; // First letter of string
 				foreach ($codes_allowed as $code_available) {
 					$combo = $last . $first_check ;
 					if ($combo == $code_available && $code != $code_check) {
 						
 						// @TODO :
 						// Test if joining of the two strings duplicates a code
 						// that may be replaced elsewhere (although unlikely with few codes)
 						// Find the original, existing string e.g. ES
 							// If string length = 2, delete ES -> - 
 							// Check start of strings ESAA -> -SAA
 							// Check end of strings AAES -> AAE-
 							// Else we cannot remove AESA -> AESA
 						// Only bother to test for one letter, as two is very unlikely for code length=2...
 						
 						// Finally combine the two strings
 						
 						// @TODO :
 						// Redundant check to see if first / last letter is equal then compress instead of concatenate
 						// IE + EE > IEE ... or AA + AA = AA
 						// This shouldn't happen after the 1st pass!... Check we don't overly compress e.g. 'EE'
 						
 						$result[] = $code . $code_check; // Concatenate strings						
 						$code = null; 
 						$code_check = null;
 						break;
 					}
 				}
 				unset($code_available); // Break the reference with the last element
 			}
 			unset($code_check); // Break the reference with the last element
 		}
 		unset($code); // Break the reference with the last element
 		
 		// Tidy up - Find any codes that weren't matched and add to the $results array for another pass
 		foreach ($input as &$code) {
 			if ($code != null) {
 				$result[] = $code;
 				$code = null; 
 			}
 		}

 		if ($verbose) {
 			echo "\n Run-".$i." : ";
 			echo implode(".", $result);
 		}
 		
 		unset($input);
 		$input = $result;
 		unset($result);
 	}
 	
 	
 	
 	// Finish up
 	$result = $input;
 	unset($input);
 	if ($verbose) echo "\n";
 	
 	
 	$magic_string = stripslashes( implode(".", $result) ); // Remove any slashes
 	unset($result);
 	$magic_length = strlen($magic_string);

 	
 	// Test for duplicates
 	/*
 	Currently we don't do anything about this. Best is to tackle at the point repeated strings are generated.
 	Also edge case of repeated runs of letters, 
 		e.g. AA+AB BA+AA > AAB BAA, AAB+BAA > AABAA
 	*/
 	
 	if ($verbose)  {
 		$result = array (); // Array to store codes for duplication check
 		$result_inter_codes = array (); // Array to store 'inter-codes' (adjacent non-coding letters)
 		// TODO - This currently only works for fixed 2 character codes
 		// Should be rewritten for variable input code lengths...
 		for ($i = 1; $i < $magic_length; $i++) {
 			$code = substr($magic_string,$i-1,2) ; // Grab possible 2 character code
 			$check = false;
 			foreach ($codes_to_pack as $code_check) {
 				if (stripslashes($code_check) == $code) {
 					$check = true; // The code was matched
 					$result[] = $code; // Codes get logged
 				}
 			}
 			unset($code_check); // Break the reference with the last element
 			if ($check == false) $result_inter_codes[] = $code; // Log inter-code
 		}
 		asort($result);
 		asort($result_inter_codes);
 		// If we have more Codes than we started with, duplicates are present...
 		if (count($result) != $total_codes) echo "\nWARNING: Possible duplicate codes... ";
 		echo "\nCodes, from adjacent letters: \n ";
 		echo implode("-", $result);
 		echo " ". count($result)." codes \n\n";
 		echo "Non-matching codes, from adjacent letters: \n ";
 		echo implode("-", $result_inter_codes);
 		echo " ". count($result_inter_codes)." codes \n\n";
 	}


 	if ($verbose) {
 		echo $magic_string . " <- " . $magic_length . " chars (~" . round(8*$magic_length/$total_codes,1) . " bits/code).\n";
 		echo "Compressed: " . round(($start_length - $magic_length) / $start_length * 100) . "% of original. \n";
 		echo round((800*$magic_length)/($total_codes*10),0) . "% Theoretical limit (10 bits/code). \n";
 		// Theoretical limit applies to country codes
 	}

 	// Set here conditions to Save the packed magic string generated
 	// For our data, we want the most common code first 'GB' for early matching and escape.
 	if ($magic_length < 49 || $magic_length < 50 && substr($magic_string,0,2) == "GB" ) {

 		$filename = 'magic_string.txt';
 		$somecontent = "\n" . $magic_length . " - ". $magic_string;

 		// Let's make sure the file exists and is writable first.
 		if (is_writable($filename)) {

 		    // We're opening $filename in append mode.
 		    // The file pointer is at the bottom of the file hence
 		    // that's where $somecontent will go when we fwrite() it.
 		    if (!$handle = fopen($filename, 'a')) {
 		         echo "Cannot open file ($filename)";
 		         exit;
 		    }

 		    // Write $somecontent to our opened file.
 		    if (fwrite($handle, $somecontent) === FALSE) {
 		        echo "Cannot write to file ($filename)";
 		        exit;
 		    }

 		    echo "Success, wrote ( $somecontent ) to file ($filename)";

 		    fclose($handle);

 		} else {
 		    echo "The file $filename is not writable";
 		}

 	}
 	
 	echo " ...End \n";
 	
 	// Cleanup and unset all variables
 	$keys = array();
 	foreach($GLOBALS as $k => $v){
 		$keys[] = $k;
 	}
 	for($t=1;$keys[$t];$t++){
 		unset($$keys[$t]);
 	}
 	unset($k); unset($v); unset($t);
 	
 ?>
	<?php
	/**
	* MAGICSTRINGPACKER
	*
	* Pack small strings or Codes (e.g. 2 letter ISO country codes), into a compact 'magic string' minimized by:
	* 1> Running Codes into each other (eliminating matching first and last letters) 'AB'+'BA' => 'ABA' ;
	* 2> Avoiding spaces / code break characters, by joining Codes 'AA,BB' => 'AABB';
	* Only where 1&2 generates allowable sequences (unique, with no collisions).
	* This is useful for matching data against a compact reference (magic string), where hashing isn't an option.
	* E.g. Checking country codes in a spreadsheet against the list of EU countries.
	*
	* This software is provided 'as-is', without any express or implied warranty.
	*
	* @author Andrew C.E. Dent
	*
	* @license GPL
	* @license http://opensource.org/licenses/gpl-license.php GNU Public License
	* @version 1.2.1
	*/

	// The_Beginning:
	echo "Start... \n";
	$verbose = true; // Provide useful feedback for testing. Set to False for batch runs.


	// Small strings -or codes- that will be packed
	$codes_to_pack = array(
	// Country codes for EU VAT
	"AT","BE","BG","CY","CZ","DE","DK","EE","ES",
	"FI","FR","\GB","EL","HU","IE","IT","LT","LU", // VAT codes. Greece uses language code EL not country code GR
	"LV","MT","NL","PL","PT","RO","SE","SI","SK",
	//"EU", // General non-domicile EU registration
	"HR", // Croatia joined EU 1st July 2013
	"UK","IM","GR", // Extra country codes for checking Shipping destination against
	"MC" // Monaco, French dependency
	);
	// To select the most commonly occuring code and fix its position at the start of the magic string
	// prefix with a slash ('GB' > '\GB'). This hack prevents string matches and may reduce compression.
	// Any slashes are ignored for statistics and stripped out at the end of the process.
	// This is useful in practice when matching data against the magic string, for early matching and escape.


	// Strings that are allowable in resulting compact form (unused Codes)
	// These codes may be generated when we remove the break character between two input codes
	// e.g. AA+BB > AABB, as 'AB' substring does not collide with another valid code.
	$codes_allowed = array(
	// Subset of codes -not- in use from ISO 3166-1:2006.
	// Selection based on EU countries. Some commented out for smaller array and speed gain.
	// "AB","AC","AH","AJ","AK","AP",//"AV","AY",
	"BC","BK","BL","BP","BQ","BU",//"BX",
	"CB","CE","CJ","CP","CQ","CT",//"CW",
	"EA","EB","ED","EF","EI","EJ","EK",/"EL" - Greece/ "EM","EN","EO","EP","EQ",//"EV","EW","EX","EY","EZ",
	"GC","GJ","GK","GO",//"GV","GX","GZ",
	"IA","IB","IC","IF","IG","IH","II","IJ","IK","IP","IU",//"IV","IW","IX","IY","IZ",
	"KA","KB","KC","KD","KF","KJ","KK","KL","KO","KQ","KS","KT","KU",//"KV","KX",
	"LD","LE","LF","LG","LH","LJ","LL","LM","LN","LO","LP","LQ",//"LW","LX","LZ",
	"MB","MF","MI","MJ",
	"OA","OB","OC","OD","OE","OF","OG","OH","OI","OJ","OK","OL","ON","OP","OQ","OR","OS","OT","OU",//"OV","OW","OX","OY","OZ",
	//"QB", "QC", "QD", "QE", "QF", "QG", "QH", "QI", "QJ", "QK", "QL"
	"RA","RB","RC","RD","RF","RG","RH","RI","RJ","RK","RL","RM","RN","RP","RQ","RR","RT",//"RV","RX","RY","RZ",
	"SF","SP","SQ","SS","SU",//"SW","SX",
	"TA","TB","TE","TI","TQ","TS","TU",//"TX","TY",
	"UB","UC","UD","UE","UF","UH","UI","UJ","UK","UL","UN","UO","UP","UQ","UR","UT","UU",//"UV","UW","UX",
	"VB","VD","VF","VH","VJ","VK","VL","VM","VO","VP","VQ","VR","VS","VT",//"VV","VW","VX","VY","VZ",
	"YA","YB","YC","YD","YF","YG","YH","YI","YJ","YK","YL","YM","YN","YO","YP","YQ","YR","YS",//"YV","YW","YX","YY","YZ",
	"ZB","ZC","ZD","ZE","ZF","ZG","ZH","ZI","ZJ","ZK","ZL","ZN","ZO","ZP","ZQ","ZR","ZS","ZT","ZU",//"ZV","ZX","ZY"

	// Include the following historically 'reserved' and private codes, as we know they wont appear in our data set.
	// Some commented out for smaller array and speed gain.
	//"AA", "OO"
	"CS","TP","YU","EU"
	//"QM", "QN", "QO", "QP", "QQ", "QR", "QS", "QT", "QU", // "QV", "QW", "QX", "QY", "QZ",
	//"XA", "XB", "XC", "XD", "XE", "XF", "XG", "XH", "XI", "XJ", "XK", "XL", "XM", "XN",
	//"XO", "XP", "XQ", "XR", "XS", "XT", "XU", // "XV", "XW", "XX", "XY", "XZ",
	//"ZZ"
	);

	// Add in the Codes to Pack as Allowable duplicates in the final magic string.
	// (Remove following line if each code can only occur once).
	$codes_allowed = array_merge($codes_allowed, $codes_to_pack);


	// Benchmarking - Starting details
	$start_codes = implode(".", $codes_to_pack); // Starting string with delimited Codes
	$start_length = strlen(stripslashes($start_codes)); // Ignore slashes for Stats- stripped out at the end
	$total_codes = count($codes_to_pack);
	if ($verbose) {
	echo "\n";
	echo $total_codes . " codes using " . $start_length . " chars (~" . round(8*$start_length/$total_codes,1) . " bits/code) -> \n";
	echo $start_codes . "\n";
	}

	// @TODO: We should scan and clean up arrays to remove duplicates.

	// Remove Allowed codes that cannot be matched against Codes to Pack - faster later
	if ($verbose) echo "\nReduce list of Allowed Codes that cannot be matched: ";
	foreach ($codes_allowed as &$code) {
	$last = substr($code,-1,1) ; // Last letter of string
	$check = false;
	foreach ($codes_to_pack as $code_check) {
	$first_check = substr($code_check,0,1) ; // First letter of string
	if ($last == $first_check) $check = true; // The code was matched
	}
	unset($code_check); // Break the reference with the last element
	if ($check == false) {
	if ($verbose) echo $code. " - ";
	$code = null;
	}
	}
	unset($code); // Break the reference with the last element


	// Remove Codes that cannot be merged in Round 1- faster matching later
	/* Currently this approach creates local areas of low compression in the final string.
	It could be possible to transform these strings, substituting characters.
	Then requires code overhead to decode.
	*/

	if ($verbose) echo "\n\nIgnore codes with no matching letters for first round: ";
	$removed_codes = array (); // We will store removed codes to add back to the final output
	foreach ($codes_to_pack as &$code) {
	$first = substr($code,0,1) ; // First letter of string
	$last = substr($code,-1,1) ; // Last letter of string
	$check = false;
	foreach ($codes_to_pack as $code_check) {
	$first_check = substr($code_check,0,1) ; // First letter of string
	$last_check = substr($code_check,-1,1) ; // Last letter of string
	if ($first == $last_check \|\| $last == $first_check) $check = true; // The code was matched
	}
	unset($code_check); // Break the reference with the last element
	if ($check == false) {
	if ($verbose) echo $code. " (". $first . $last . ") - ";
	$removed_codes[] = $code;
	$code = null;
	}
	}
	unset($code); // Break the reference with the last element
	if ($verbose) echo "\n";

	// Round 1 - Merge strings by matching (and combining) first and last letters
	/* Current algorithm is quite lazy. A 'greedy' approach may yield higher local compression:
	e.g. IESEE > IE ES SE EE > 10bits/code
	SIEESELV > SI IE EE ES SE EL LV > 9.14bits/code
	BGBEESIMCZ > BG GB BE EE ES SI IM MC CZ > 8.89bits/code

	In testing, where the numbers of codes to compact are small, a greedy approach yields more
	orphan codes that are uncompacted, balancing out any gains of the few long runs. Every merge saves 2 bytes,
	whereas in Round 2, each join only saves 1 byte. It seems to be a better strategy to be lazy...
	*/

	$input = $codes_to_pack;
	for ($i = 1; $i <= 5; $i++) {
	shuffle($input); // Randomize order of strings in each sweep
	$result = array();

	foreach ($input as &$code) {
	$first = substr($code,0,1) ; // First letter of string
	$last = substr($code,-1,1) ; // Last letter of string
	foreach ($input as &$code_check) {
	$first_check = substr($code_check,0,1) ; // First letter of string
	$last_check = substr($code_check,-1,1) ; // Last letter of string
	if ($last == $first_check && $code != $code_check ) {
	// Check this match first, to prefer 'GB+BG>GBG' vs 'GB+BG>BGB'
	$result[] = $code . substr($code_check,1);
	$code = null;
	$code_check = null;
	break;
	} else if ($first == $last_check && $code != $code_check ) {
	$result[] = $code_check . substr($code,1);
	$code = null;
	$code_check = null;
	break;
	}
	}
	unset($code_check); // Break the reference with the last element
	}
	unset($code); // Break the reference with the last element

	// Tidy up - Find any codes that weren't matched and add to the $results array for another pass
	foreach ($input as &$code) {
	if ($code != null) {
	$result[] = $code;
	$code = null;
	}
	}

	if ($verbose) {
	echo "\n Run-".$i." : ";
	echo implode(".", $result);
	}

	unset($input);
	$input = $result;
	unset($result);
	}

	// Add back in the unmatching codes removed at the start
	$codes_to_pack = array_merge($codes_to_pack, $removed_codes); // Fix the original array
	$result = array_merge($input, $removed_codes); // Working array
	unset($input);
	unset($removed_codes);


	if ($verbose) echo "\n *** \n";
	// Last round - remove space between codes, if consecutive letters are in Allowed list

	$input = $result;

	for ($i = 1; $i <= 20; $i++) {
	shuffle($input);
	$result = array();

	foreach ($input as &$code) {
	$last = substr($code,-1,1) ; // Last letter of string
	foreach ($input as &$code_check) {
	$first_check = substr($code_check,0,1) ; // First letter of string
	foreach ($codes_allowed as $code_available) {
	$combo = $last . $first_check ;
	if ($combo == $code_available && $code != $code_check) {

	// @TODO :
	// Test if joining of the two strings duplicates a code
	// that may be replaced elsewhere (although unlikely with few codes)
	// Find the original, existing string e.g. ES
	// If string length = 2, delete ES -> -
	// Check start of strings ESAA -> -SAA
	// Check end of strings AAES -> AAE-
	// Else we cannot remove AESA -> AESA
	// Only bother to test for one letter, as two is very unlikely for code length=2...

	// Finally combine the two strings

	// @TODO :
	// Redundant check to see if first / last letter is equal then compress instead of concatenate
	// IE + EE > IEE ... or AA + AA = AA
	// This shouldn't happen after the 1st pass!... Check we don't overly compress e.g. 'EE'

	$result[] = $code . $code_check; // Concatenate strings
	$code = null;
	$code_check = null;
	break;
	}
	}
	unset($code_available); // Break the reference with the last element
	}
	unset($code_check); // Break the reference with the last element
	}
	unset($code); // Break the reference with the last element

	// Tidy up - Find any codes that weren't matched and add to the $results array for another pass
	foreach ($input as &$code) {
	if ($code != null) {
	$result[] = $code;
	$code = null;
	}
	}

	if ($verbose) {
	echo "\n Run-".$i." : ";
	echo implode(".", $result);
	}

	unset($input);
	$input = $result;
	unset($result);
	}



	// Finish up
	$result = $input;
	unset($input);
	if ($verbose) echo "\n";


	$magic_string = stripslashes( implode(".", $result) ); // Remove any slashes
	unset($result);
	$magic_length = strlen($magic_string);


	// Test for duplicates
	/*
	Currently we don't do anything about this. Best is to tackle at the point repeated strings are generated.
	Also edge case of repeated runs of letters,
	e.g. AA+AB BA+AA > AAB BAA, AAB+BAA > AABAA
	*/

	if ($verbose) {
	$result = array (); // Array to store codes for duplication check
	$result_inter_codes = array (); // Array to store 'inter-codes' (adjacent non-coding letters)
	// TODO - This currently only works for fixed 2 character codes
	// Should be rewritten for variable input code lengths...
	for ($i = 1; $i < $magic_length; $i++) {
	$code = substr($magic_string,$i-1,2) ; // Grab possible 2 character code
	$check = false;
	foreach ($codes_to_pack as $code_check) {
	if (stripslashes($code_check) == $code) {
	$check = true; // The code was matched
	$result[] = $code; // Codes get logged
	}
	}
	unset($code_check); // Break the reference with the last element
	if ($check == false) $result_inter_codes[] = $code; // Log inter-code
	}
	asort($result);
	asort($result_inter_codes);
	// If we have more Codes than we started with, duplicates are present...
	if (count($result) != $total_codes) echo "\nWARNING: Possible duplicate codes... ";
	echo "\nCodes, from adjacent letters: \n ";
	echo implode("-", $result);
	echo " ". count($result)." codes \n\n";
	echo "Non-matching codes, from adjacent letters: \n ";
	echo implode("-", $result_inter_codes);
	echo " ". count($result_inter_codes)." codes \n\n";
	}


	if ($verbose) {
	echo $magic_string . " <- " . $magic_length . " chars (~" . round(8*$magic_length/$total_codes,1) . " bits/code).\n";
	echo "Compressed: " . round(($start_length - $magic_length) / $start_length * 100) . "% of original. \n";
	echo round((800$magic_length)/($total_codes10),0) . "% Theoretical limit (10 bits/code). \n";
	// Theoretical limit applies to country codes
	}

	// Set here conditions to Save the packed magic string generated
	// For our data, we want the most common code first 'GB' for early matching and escape.
	if ($magic_length < 49 \|\| $magic_length < 50 && substr($magic_string,0,2) == "GB" ) {

	$filename = 'magic_string.txt';
	$somecontent = "\n" . $magic_length . " - ". $magic_string;

	// Let's make sure the file exists and is writable first.
	if (is_writable($filename)) {

	// We're opening $filename in append mode.
	// The file pointer is at the bottom of the file hence
	// that's where $somecontent will go when we fwrite() it.
	if (!$handle = fopen($filename, 'a')) {
	echo "Cannot open file ($filename)";
	exit;
	}

	// Write $somecontent to our opened file.
	if (fwrite($handle, $somecontent) === FALSE) {
	echo "Cannot write to file ($filename)";
	exit;
	}

	echo "Success, wrote ( $somecontent ) to file ($filename)";

	fclose($handle);

	} else {
	echo "The file $filename is not writable";
	}

	}

	echo " ...End \n";

	// Cleanup and unset all variables
	$keys = array();
	foreach($GLOBALS as $k => $v){
	$keys[] = $k;
	}
	for($t=1;$keys[$t];$t++){
	unset($$keys[$t]);
	}
	unset($k); unset($v); unset($t);

	?>
No results found