Last active
September 27, 2024 08:49
-
-
Save josephg/5e134adf70760ee7e49d to your computer and use it in GitHub Desktop.
Apple dictionaries
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Thanks to commenters for providing the base of this much nicer implementation! | |
# Save and run with $ python 0dedict.py | |
# You may need to hunt down the dictionary files yourself and change the awful path string below. | |
# This works for me on MacOS 10.14 Mohave | |
from struct import unpack | |
from zlib import decompress | |
import re | |
filename = '/System/Library/Assets/com_apple_MobileAsset_DictionaryServices_dictionaryOSX/9f5862030e8f00af171924ebbc23ebfd6e91af78.asset/AssetData/Oxford Dictionary of English.dictionary/Contents/Resources/Body.data' | |
f = open(filename, 'rb') | |
def gen_entry(): | |
f.seek(0x40) | |
limit = 0x40 + unpack('i', f.read(4))[0] | |
f.seek(0x60) | |
while f.tell()<limit: | |
sz, = unpack('i', f.read(4)) | |
buf = decompress(f.read(sz)[8:]) | |
pos = 0 | |
while pos < len(buf): | |
chunksize, = unpack('i', buf[pos:pos+4]) | |
pos += 4 | |
entry = buf[pos:pos+chunksize] | |
title = re.search('d:title="(.*?)"', entry).group(1) | |
yield title, entry | |
pos += chunksize | |
for word, definition in gen_entry(): | |
print(word) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// *** Old code - not needed given the python code above | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <assert.h> | |
#include "zlib.h" | |
#define CHUNK 16384 | |
/* | |
40 Length of the zlib stream | |
4c 0020 | |
54 0275 number of blocks | |
60 808c pointer to the next block | |
64 8088 length of the first block | |
68 047a4a length of the unpacked block | |
6c start of the zlib stream | |
80fc second block | |
13cd134 | |
13cd174 | |
*/ | |
int unpack(unsigned char *in, int len) | |
{ | |
int ret,outed=0; | |
unsigned have; | |
z_stream strm; | |
unsigned char out[CHUNK]; | |
strm.zalloc = Z_NULL; | |
strm.zfree = Z_NULL; | |
strm.opaque = Z_NULL; | |
strm.avail_in = 0; | |
strm.next_in = Z_NULL; | |
ret = inflateInit(&strm); | |
if (ret != Z_OK) | |
return ret; | |
strm.avail_in = len; | |
strm.next_in = in; | |
do { | |
strm.avail_out = CHUNK; | |
strm.next_out = out; | |
ret = inflate(&strm, Z_NO_FLUSH); | |
assert(ret != Z_STREAM_ERROR); /* state not clobbered */ | |
switch (ret) { | |
case Z_NEED_DICT: | |
ret = Z_DATA_ERROR; /* and fall through */ | |
case Z_DATA_ERROR: | |
case Z_MEM_ERROR: | |
(void)inflateEnd(&strm); | |
return ret; | |
} | |
// printf("%lx %x\n",strm.next_in-in,strm.avail_in); | |
have = CHUNK - strm.avail_out /* - (outed?0:4)*/; | |
int off = 0; | |
/* | |
while (have - off > 3 && out[off] != '<' && out[1+off] != 'd' && out[2+off] != ':') { | |
++off; | |
}*/ | |
if (have - off <= 3) { | |
fprintf(stderr, "could not find entry\n"); | |
} | |
if (fwrite(out + off/*+(outed?0:4)*/, have - off, 1, stdout) != 1 || ferror(stdout)) { | |
(void)inflateEnd(&strm); | |
return Z_ERRNO; | |
} | |
//exit(0); | |
outed+=have; | |
} while (strm.avail_out == 0); | |
printf("%06x\n",outed); | |
(void)inflateEnd(&strm); | |
return ret == Z_STREAM_END ? Z_OK : Z_DATA_ERROR; | |
} | |
char filename[256]; | |
int main(int argc,char **argv) { | |
FILE *fin; int limit,blen=0,p,l,bcnt=0; unsigned char *buf=NULL; | |
assert(argc >= 2); | |
sprintf(filename,"/Library/Dictionaries/%s.dictionary/Contents/Body.data",argv[1]); | |
if((fin=fopen(filename,"rb"))) { | |
fseek(fin,0x40,SEEK_SET); | |
fread(&l,1,4,fin); | |
limit=0x40+l; | |
p=0x60; | |
do { | |
fseek(fin,p,SEEK_SET); | |
fread(&l,1,4,fin); | |
// if(0==l) break; | |
if(blen<l) { | |
if(buf!=NULL) free(buf); | |
blen=l; | |
buf=(unsigned char *)malloc(blen); | |
} | |
fread(buf,1,l,fin); | |
//fprintf(stderr, "%x@%06x: %x>%06x\n",bcnt,p,l,((int *)buf)[1]); | |
unpack(buf+8,l-8); | |
p+=4+l; | |
++bcnt; | |
} while(p<limit); | |
free(buf); | |
fclose(fin); | |
} | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// This program strips the first 4 characters from each line in the input | |
#include <stdio.h> | |
int main() { | |
while(!ferror(stdin) && !feof(stdin)) { | |
size_t len = 0; | |
char *line = fgetln(stdin, &len); | |
if (!line) break; | |
if (len > 4) | |
fwrite(line + 4, 1, len - 4, stdout); | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@tsunho12 you will get much better convertion results if you use https://github.com/ilius/pyglossary