Created
March 12, 2015 14:53
-
-
Save MattiJarvinen-BA/ac86592c2cfc2aa34501 to your computer and use it in GitHub Desktop.
Reading ZIP content file names as UTF-8 for saving into DB or something
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* Example for reading zip filenames as | |
* UTF-8 strings regardless of original | |
* filename encoding | |
*/ | |
$Zip = new ZipArchive(); | |
$open = $Zip->open($filename, ZIPARCHIVE::CHECKCONS); | |
if($open !== true) | |
{ | |
die('Invalid zip'); | |
} | |
$length = $Zip->numFiles; | |
for($i = 0; $i < $length; $i++) | |
{ | |
$importName = $Zip->getNameIndex($i,ZipArchive::FL_UNCHANGED); | |
$nameLen = mb_strlen($importName,'UTF-8'); | |
$lastChar = mb_substr($importName, $nameLen-1 ,1 ,'UTF-8'); | |
if( $lastChar == '/' || $lastChar == '\\') | |
{ | |
// skip dirs | |
// if you want to create directories | |
// here is a good place for that | |
continue; | |
} | |
// get file info | |
$info = pathinfo($importName); | |
// some encodings do funny things | |
// with filenames | |
// we want to get UTF-8 filenames | |
// so there are few that must be fixed | |
$fixedName = $info['basename']; | |
// if fixed name gets broken with this handling | |
// file name encoding is likely CP850 | |
if($fixedName !== mb_convert_encoding(mb_convert_encoding($fixedName, "UTF-32", "UTF-8"), "UTF-8", "UTF-32")) | |
{ | |
// convert from CP850 to UTF-8 | |
$fixedName = mb_convert_encoding($fixedName, 'UTF-8','CP850'); | |
// $fixedName should now be a valid UTF-8 string | |
} | |
// do what ever you are trying to do | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment