Forked from Congee/macosx_unzip_multi_charset.rb
Last active
August 29, 2015 14:22
-
-
Save zwf/155348978878dd9a8100 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'formula' | |
class Unzip < Formula | |
homepage 'http://www.info-zip.org/pub/infozip/UnZip.html' | |
url 'https://downloads.sourceforge.net/project/infozip/UnZip%206.x%20%28latest%29/UnZip%206.0/unzip60.tar.gz' | |
version '6.0' | |
sha1 'abf7de8a4018a983590ed6f5cbd990d4740f8a22' | |
keg_only :provided_by_osx | |
# Add -O & -I option for multi-charset support | |
patch :DATA | |
def install | |
system "make", "-f", "unix/Makefile", "macosx" | |
system "make", "prefix=#{prefix}", "MANDIR=#{man}", "install" | |
end | |
test do | |
system "#{bin}/unzip", "--help" | |
end | |
end | |
__END__ | |
--- a/unix/Makefile 2015-02-12 22:17:02.000000000 +0800 | |
+++ b/unix/Makefile 2015-02-12 22:17:16.000000000 +0800 | |
@@ -872,7 +872,7 @@ | |
# Macintosh MacOS X (Unix-compatible enviroment), using standard compiler | |
macosx: unix_make | |
- $(MAKE) unzips CFLAGS="-O3 -Wall -DBSD" LF2="" | |
+ $(MAKE) unzips CFLAGS="-O3 -Wall -DBSD" LF2="-liconv" | |
$(STRIP) $(UNZIPS) | |
# Macintosh MacOS X (Unix-compatible enviroment), using gcc | |
diff -r e812cb68e51d unix/unix.c | |
--- a/unix/unix.c Tue Jun 23 23:08:25 2009 -0500 | |
+++ b/unix/unix.c Thu Jun 25 00:10:29 2009 -0500 | |
@@ -30,6 +30,9 @@ | |
#define UNZIP_INTERNAL | |
#include "unzip.h" | |
+#include <iconv.h> | |
+#include <langinfo.h> | |
+ | |
#ifdef SCO_XENIX | |
# define SYSNDIR | |
#else /* SCO Unix, AIX, DNIX, TI SysV, Coherent 4.x, ... */ | |
@@ -1874,3 +1877,90 @@ | |
} | |
} | |
#endif /* QLZIP */ | |
+ | |
+ | |
+typedef struct { | |
+ char *local_charset; | |
+ char *archive_charset; | |
+} CHARSET_MAP; | |
+ | |
+/* A mapping of local <-> archive charsets used by default to convert filenames | |
+ * of DOS/Windows Zip archives. Currently very basic. */ | |
+static CHARSET_MAP dos_charset_map[] = { | |
+ { "ANSI_X3.4-1968", "CP850" }, | |
+ { "ISO-8859-1", "CP850" }, | |
+ { "CP1252", "CP850" }, | |
+ { "UTF-8", "CP866" }, | |
+ { "KOI8-R", "CP866" }, | |
+ { "KOI8-U", "CP866" }, | |
+ { "ISO-8859-5", "CP866" } | |
+}; | |
+ | |
+char OEM_CP[MAX_CP_NAME] = ""; | |
+char ISO_CP[MAX_CP_NAME] = ""; | |
+ | |
+/* Try to guess the default value of OEM_CP based on the current locale. | |
+ * ISO_CP is left alone for now. */ | |
+void init_conversion_charsets() | |
+{ | |
+ const char *local_charset; | |
+ int i; | |
+ | |
+ /* Make a guess only if OEM_CP not already set. */ | |
+ if(*OEM_CP == '\0') { | |
+ local_charset = nl_langinfo(CODESET); | |
+ for(i = 0; i < sizeof(dos_charset_map)/sizeof(CHARSET_MAP); i++) | |
+ if(!strcasecmp(local_charset, dos_charset_map[i].local_charset)) { | |
+ strncpy(OEM_CP, dos_charset_map[i].archive_charset, | |
+ sizeof(OEM_CP)); | |
+ break; | |
+ } | |
+ } | |
+} | |
+ | |
+/* Convert a string from one encoding to the current locale using iconv(). | |
+ * Be as non-intrusive as possible. If error is encountered during covertion | |
+ * just leave the string intact. */ | |
+static void charset_to_intern(char *string, char *from_charset) | |
+{ | |
+ iconv_t cd; | |
+ char *s,*d, *buf; | |
+ size_t slen, dlen, buflen; | |
+ const char *local_charset; | |
+ | |
+ if(*from_charset == '\0') | |
+ return; | |
+ | |
+ buf = NULL; | |
+ local_charset = nl_langinfo(CODESET); | |
+ | |
+ if((cd = iconv_open(local_charset, from_charset)) == (iconv_t)-1) | |
+ return; | |
+ | |
+ slen = strlen(string); | |
+ s = string; | |
+ dlen = buflen = 2*slen; | |
+ d = buf = malloc(buflen + 1); | |
+ if(!d) | |
+ goto cleanup; | |
+ bzero(buf,buflen); | |
+ if(iconv(cd, &s, &slen, &d, &dlen) == (size_t)-1) | |
+ goto cleanup; | |
+ strncpy(string, buf, buflen); | |
+ | |
+ cleanup: | |
+ free(buf); | |
+ iconv_close(cd); | |
+} | |
+ | |
+/* Convert a string from OEM_CP to the current locale charset. */ | |
+inline void oem_intern(char *string) | |
+{ | |
+ charset_to_intern(string, OEM_CP); | |
+} | |
+ | |
+/* Convert a string from ISO_CP to the current locale charset. */ | |
+inline void iso_intern(char *string) | |
+{ | |
+ charset_to_intern(string, ISO_CP); | |
+} | |
diff -r e812cb68e51d unix/unxcfg.h | |
--- a/unix/unxcfg.h Tue Jun 23 23:08:25 2009 -0500 | |
+++ b/unix/unxcfg.h Thu Jun 25 00:10:29 2009 -0500 | |
@@ -227,4 +227,30 @@ | |
/* wild_dir, dirname, wildname, matchname[], dirnamelen, have_dirname, */ | |
/* and notfirstcall are used by do_wild(). */ | |
+ | |
+#define MAX_CP_NAME 25 | |
+ | |
+#ifdef SETLOCALE | |
+# undef SETLOCALE | |
+#endif | |
+#define SETLOCALE(category, locale) setlocale(category, locale) | |
+#include <locale.h> | |
+ | |
+#ifdef _ISO_INTERN | |
+# undef _ISO_INTERN | |
+#endif | |
+#define _ISO_INTERN(str1) iso_intern(str1) | |
+ | |
+#ifdef _OEM_INTERN | |
+# undef _OEM_INTERN | |
+#endif | |
+#ifndef IZ_OEM2ISO_ARRAY | |
+# define IZ_OEM2ISO_ARRAY | |
+#endif | |
+#define _OEM_INTERN(str1) oem_intern(str1) | |
+ | |
+void iso_intern(char *); | |
+void oem_intern(char *); | |
+void init_conversion_charsets(void); | |
+ | |
#endif /* !__unxcfg_h */ | |
diff -r e812cb68e51d unzip.c | |
--- a/unzip.c Tue Jun 23 23:08:25 2009 -0500 | |
+++ b/unzip.c Thu Jun 25 00:10:29 2009 -0500 | |
@@ -327,11 +327,21 @@ | |
-2 just filenames but allow -h/-t/-z -l long Unix \"ls -l\" format\n\ | |
-v verbose, multi-page format\n"; | |
+#ifndef __APPLE__ | |
static ZCONST char Far ZipInfoUsageLine3[] = "miscellaneous options:\n\ | |
-h print header line -t print totals for listed files or for all\n\ | |
-z print zipfile comment -T print file times in sortable decimal format\ | |
\n -C be case-insensitive %s\ | |
-x exclude filenames that follow from listing\n"; | |
+#else /* __APPLE__ */ | |
+static ZCONST char Far ZipInfoUsageLine3[] = "miscellaneous options:\n\ | |
+ -h print header line -t print totals for listed files or for all\n\ | |
+ -z print zipfile comment %c-T%c print file times in sortable decimal format\ | |
+\n %c-C%c be case-insensitive %s\ | |
+ -x exclude filenames that follow from listing\n\ | |
+ -O CHARSET specify a character encoding for DOS, Windows and OS/2 archives\n\ | |
+ -I CHARSET specify a character encoding for MAC OS X and other archives\n"; | |
+#endif /* !__APPLE__ */ | |
#ifdef MORE | |
static ZCONST char Far ZipInfoUsageLine4[] = | |
" -M page output through built-in \"more\"\n"; | |
@@ -665,6 +675,17 @@ | |
-U use escapes for all non-ASCII Unicode -UU ignore any Unicode fields\n\ | |
-C match filenames case-insensitively -L make (some) names \ | |
lowercase\n %-42s -V retain VMS version numbers\n%s"; | |
+#elif (defined __APPLE__) | |
+static ZCONST char Far UnzipUsageLine4[] = "\ | |
+modifiers:\n\ | |
+ -n never overwrite existing files -q quiet mode (-qq => quieter)\n\ | |
+ -o overwrite files WITHOUT prompting -a auto-convert any text files\n\ | |
+ -j junk paths (do not make directories) -aa treat ALL files as text\n\ | |
+ -U use escapes for all non-ASCII Unicode -UU ignore any Unicode fields\n\ | |
+ -C match filenames case-insensitively -L make (some) names \ | |
+lowercase\n %-42s -V retain VMS version numbers\n%s\ | |
+ -O CHARSET specify a character encoding for DOS, Windows and OS/2 archives\n\ | |
+ -I CHARSET specify a character encoding for MC OS X and other archives\n\n"; | |
#else /* !VMS */ | |
static ZCONST char Far UnzipUsageLine4[] = "\ | |
modifiers:\n\ | |
@@ -803,6 +824,10 @@ | |
#endif /* UNICODE_SUPPORT */ | |
+#ifdef __APPLE__ | |
+ init_conversion_charsets(); | |
+#endif | |
+ | |
#if (defined(__IBMC__) && defined(__DEBUG_ALLOC__)) | |
extern void DebugMalloc(void); | |
@@ -1336,6 +1361,11 @@ | |
argc = *pargc; | |
argv = *pargv; | |
+#ifdef __APPLE__ | |
+ extern char OEM_CP[MAX_CP_NAME]; | |
+ extern char ISO_CP[MAX_CP_NAME]; | |
+#endif | |
+ | |
while (++argv, (--argc > 0 && *argv != NULL && **argv == '-')) { | |
s = *argv + 1; | |
while ((c = *s++) != 0) { /* "!= 0": prevent Turbo C warning */ | |
@@ -1517,6 +1547,35 @@ | |
} | |
break; | |
#endif /* MACOS */ | |
+#ifdef __APPLE__ | |
+ case ('I'): | |
+ if (negative) { | |
+ Info(slide, 0x401, ((char *)slide, | |
+ "error: encodings can't be negated")); | |
+ return(PK_PARAM); | |
+ } else { | |
+ if(*s) { /* Handle the -Icharset case */ | |
+ /* Assume that charsets can't start with a dash to spot arguments misuse */ | |
+ if(*s == '-') { | |
+ Info(slide, 0x401, ((char *)slide, | |
+ "error: a valid character encoding should follow the -I argument")); | |
+ return(PK_PARAM); | |
+ } | |
+ strncpy(ISO_CP, s, sizeof(ISO_CP)); | |
+ } else { /* -I charset */ | |
+ ++argv; | |
+ if(!(--argc > 0 && *argv != NULL && **argv != '-')) { | |
+ Info(slide, 0x401, ((char *)slide, | |
+ "error: a valid character encoding should follow the -I argument")); | |
+ return(PK_PARAM); | |
+ } | |
+ s = *argv; | |
+ strncpy(ISO_CP, s, sizeof(ISO_CP)); | |
+ } | |
+ while(*(++s)); /* No params straight after charset name */ | |
+ } | |
+ break; | |
+#endif /* ?__APPLE__ */ | |
case ('j'): /* junk pathnames/directory structure */ | |
if (negative) | |
uO.jflag = FALSE, negative = 0; | |
@@ -1592,6 +1651,35 @@ | |
} else | |
++uO.overwrite_all; | |
break; | |
+#ifdef __APPLE__ | |
+ case ('O'): | |
+ if (negative) { | |
+ Info(slide, 0x401, ((char *)slide, | |
+ "error: encodings can't be negated")); | |
+ return(PK_PARAM); | |
+ } else { | |
+ if(*s) { /* Handle the -Ocharset case */ | |
+ /* Assume that charsets can't start with a dash to spot arguments misuse */ | |
+ if(*s == '-') { | |
+ Info(slide, 0x401, ((char *)slide, | |
+ "error: a valid character encoding should follow the -I argument")); | |
+ return(PK_PARAM); | |
+ } | |
+ strncpy(OEM_CP, s, sizeof(OEM_CP)); | |
+ } else { /* -O charset */ | |
+ ++argv; | |
+ if(!(--argc > 0 && *argv != NULL && **argv != '-')) { | |
+ Info(slide, 0x401, ((char *)slide, | |
+ "error: a valid character encoding should follow the -O argument")); | |
+ return(PK_PARAM); | |
+ } | |
+ s = *argv; | |
+ strncpy(OEM_CP, s, sizeof(OEM_CP)); | |
+ } | |
+ while(*(++s)); /* No params straight after charset name */ | |
+ } | |
+ break; | |
+#endif /* ?__APPLE__ */ | |
case ('p'): /* pipes: extract to stdout, no messages */ | |
if (negative) { | |
uO.cflag = FALSE; | |
diff -r e812cb68e51d unzpriv.h | |
--- a/unzpriv.h Tue Jun 23 23:08:25 2009 -0500 | |
+++ b/unzpriv.h Thu Jun 25 00:10:29 2009 -0500 | |
@@ -3008,7 +3008,7 @@ | |
!(((islochdr) || (isuxatt)) && \ | |
((hostver) == 25 || (hostver) == 26 || (hostver) == 40))) || \ | |
(hostnum) == FS_HPFS_ || \ | |
- ((hostnum) == FS_NTFS_ && (hostver) == 50)) { \ | |
+ ((hostnum) == FS_NTFS_ /* && (hostver) == 50 */ )) { \ | |
_OEM_INTERN((string)); \ | |
} else { \ | |
_ISO_INTERN((string)); \ | |
diff -r e812cb68e51d zipinfo.c | |
--- a/zipinfo.c Tue Jun 23 23:08:25 2009 -0500 | |
+++ b/zipinfo.c Thu Jun 25 00:10:29 2009 -0500 | |
@@ -457,6 +457,10 @@ | |
int tflag_slm=TRUE, tflag_2v=FALSE; | |
int explicit_h=FALSE, explicit_t=FALSE; | |
+#ifdef __APPLE__ | |
+ extern char OEM_CP[MAX_CP_NAME]; | |
+ extern char ISO_CP[MAX_CP_NAME]; | |
+#endif | |
#ifdef MACOS | |
uO.lflag = LFLAG; /* reset default on each call */ | |
@@ -501,6 +505,35 @@ | |
uO.lflag = 0; | |
} | |
break; | |
+#ifdef __APPLE__ | |
+ case ('I'): | |
+ if (negative) { | |
+ Info(slide, 0x401, ((char *)slide, | |
+ "error: encodings can't be negated")); | |
+ return(PK_PARAM); | |
+ } else { | |
+ if(*s) { /* Handle the -Icharset case */ | |
+ /* Assume that charsets can't start with a dash to spot arguments misuse */ | |
+ if(*s == '-') { | |
+ Info(slide, 0x401, ((char *)slide, | |
+ "error: a valid character encoding should follow the -I argument")); | |
+ return(PK_PARAM); | |
+ } | |
+ strncpy(ISO_CP, s, sizeof(ISO_CP)); | |
+ } else { /* -I charset */ | |
+ ++argv; | |
+ if(!(--argc > 0 && *argv != NULL && **argv != '-')) { | |
+ Info(slide, 0x401, ((char *)slide, | |
+ "error: a valid character encoding should follow the -I argument")); | |
+ return(PK_PARAM); | |
+ } | |
+ s = *argv; | |
+ strncpy(ISO_CP, s, sizeof(ISO_CP)); | |
+ } | |
+ while(*(++s)); /* No params straight after charset name */ | |
+ } | |
+ break; | |
+#endif /* ?__APPLE__ */ | |
case 'l': /* longer form of "ls -l" type listing */ | |
if (negative) | |
uO.lflag = -2, negative = 0; | |
@@ -521,6 +554,35 @@ | |
G.M_flag = TRUE; | |
break; | |
#endif | |
+#ifdef __APPLE__ | |
+ case ('O'): | |
+ if (negative) { | |
+ Info(slide, 0x401, ((char *)slide, | |
+ "error: encodings can't be negated")); | |
+ return(PK_PARAM); | |
+ } else { | |
+ if(*s) { /* Handle the -Ocharset case */ | |
+ /* Assume that charsets can't start with a dash to spot arguments misuse */ | |
+ if(*s == '-') { | |
+ Info(slide, 0x401, ((char *)slide, | |
+ "error: a valid character encoding should follow the -I argument")); | |
+ return(PK_PARAM); | |
+ } | |
+ strncpy(OEM_CP, s, sizeof(OEM_CP)); | |
+ } else { /* -O charset */ | |
+ ++argv; | |
+ if(!(--argc > 0 && *argv != NULL && **argv != '-')) { | |
+ Info(slide, 0x401, ((char *)slide, | |
+ "error: a valid character encoding should follow the -O argument")); | |
+ return(PK_PARAM); | |
+ } | |
+ s = *argv; | |
+ strncpy(OEM_CP, s, sizeof(OEM_CP)); | |
+ } | |
+ while(*(++s)); /* No params straight after charset name */ | |
+ } | |
+ break; | |
+#endif /* ?__APPLE__ */ | |
case 's': /* default: shorter "ls -l" type listing */ | |
if (negative) | |
uO.lflag = -2, negative = 0; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment