-
-
Save joemck/816115 to your computer and use it in GitHub Desktop.
#!/usr/bin/python | |
# Copyright 2007 by Tobia Conforto <[email protected]> | |
# | |
# This program is free software; you can redistribute it and/or modify it under the terms of the GNU General | |
# Public License as published by the Free Software Foundation; either version 2 of the License, or (at your | |
# option) any later version. | |
# | |
# This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the | |
# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
# for more details. | |
# | |
# You should have received a copy of the GNU General Public License along with this program. | |
# If not, see http://www.gnu.org/licenses/ | |
# Use this program to extract files from a disk image from a Creative Zen Xtra or Zen Vision M player. | |
# Unlike the zenrecover.py this is based on (https://gist.github.com/483969), this also finds deleted files | |
# and can extract most files after the player has been formatted. However, it doesn't distinguish between | |
# "songs" and "archives" areas on the player, and also extracts a few player system files. | |
# | |
# NOTE: Make sure it's set for the right player before you run it. Otherwise it won't find anything. | |
# See the "ZENVISION=..." line just after the comments. | |
# Versions: 0.1 2007-08-13 Initial release | |
# 0.2 2008-05-12 Small fixes for Zen Xtra models | |
# 0.3 2009-02-23 Zen Vision M compatible version (Leho Kraav <[email protected]>) | |
# 0.2a 2010-07-20 Undid most of 0.3's changes to make it Zen Xtra-compatible again | |
# (because Tobia Conforto's site no longer has it and 0.3 is the only version of this | |
# script I can find anywhere) | |
# Modified it to use LRUCache from http://evan.prodromou.name/Software/Python/LRUCache | |
# because I couldn't find the module LRU that it used before. | |
# Added code to reencode path and filenames to UTF-8 since certain "weird" characters in | |
# filenames appearing on the device caused an unhandled exception. | |
# (Joe McKenzie <[email protected]>) | |
# 0.2b 2010-07-21 Fixed problem when the filename field contains a path. | |
# Fixed problem with files added by XNJB for Max OS X | |
# Fixed a serious bug in the previous revision of this on Gist that created insane dirs | |
# Combined Xtra and Vision M tag checks to make it a bit easier to convert the script | |
# Removed use of LRUCache because: | |
# a. Most sectors are accessed exactly once, making the cache pointless. | |
# b. On my Slackware 13.1 AMD64 box with Python 2.6, | |
# with LRUCache: 2.5 MB/s | |
# without cache: 80 MB/s (d'oh...) | |
# (AMD Sempron 140, 2 GB RAM, 40 GB Zen Xtra disk image stored on one WD20EARS | |
# [2 TB WD Green drive], recovering to a directory on another WD20EARS) | |
# **** Cleaned up code and comments a bit, for instructions to make it work with a **** | |
# **** Zen Vision M, search this file for "visionm" **** | |
# (Joe McKenzie <[email protected]>) | |
# 0.4 2011-02-07 Changed it to scan for anything that looks like an inode and recover it. | |
# Basically works like an "undelete" program now. It'll find some files even after you | |
# format your Zen. NOTE: May not recover ALL files after a format!! | |
# (Joe McKenzie <[email protected]>) | |
# 0.5 2011-08-02 Removed parsing of directories in CFSInode.__init__ so it ignores dirs | |
# with errors in them. Now it should work better on damaged filesystems. | |
from __future__ import division | |
import sys, os, codecs, array, time, operator, getopt, re | |
# visionm: Set this to True for Zen Vision M, False for Zen Xtra | |
ZENVISION=True | |
class CFS: | |
if ZENVISION: | |
clusterSize = 0x8000 | |
else: | |
clusterSize = 0x2000 | |
def __init__(self, filename, offset = 0): | |
'''Filename and optional offset where the CFS filesystem begins | |
(offset of cluster -1, the one filled with 0xff)''' | |
self.image = file(filename) | |
self.offset = offset | |
def __getitem__(self, key): | |
'''Get the nth CFS cluster from the image and cache it for later usage. | |
Accepts simple slices of clusters, but doesn't process negative indices. | |
In any case it returns the requested data as a byte string.''' | |
if isinstance(key, slice): | |
cstart, cstop = key.start, key.stop | |
else: | |
cstart, cstop = key, key + 1 | |
data = '' | |
for cluster in range(cstart, cstop): | |
self.image.seek(self.offset + (cluster + 1) * self.clusterSize) | |
data += self.image.read(self.clusterSize) | |
return data | |
def get_byteswapped_data(self, cluster): | |
'''Get the nth CFS cluster from the image, without caching it. | |
Swap the position of every two bytes and return it as an array object. | |
This method is designed for bulk file retrieving.''' | |
a = array.array('H') | |
self.image.seek(self.offset + (cluster + 1) * self.clusterSize) | |
a.fromfile(self.image, self.clusterSize // 2) | |
if not ZENVISION: | |
a.byteswap() | |
return a | |
def inode(self, cluster): | |
return CFSInode(self, cluster) | |
def pdp_uint32_xtra(data, offset = 0): | |
o2, o1, o4, o3 = map(ord, data[offset : offset + 4]) | |
return (o1 << 24) | (o2 << 16) | (o3 << 8) | o4 | |
def pdp_uint32_vision(data, offset = 0): | |
o4, o3, o2, o1 = map(ord, data[offset : offset + 4]) | |
return (o1 << 24) | (o2 << 16) | (o3 << 8) | o4 | |
def pdp_uint32(data, offset = 0): | |
if ZENVISION: | |
return pdp_uint32_vision(data, offset) | |
else: | |
return pdp_uint32_xtra(data, offset) | |
def pdp_uint16(data, offset = 0): | |
o2, o1 = map(ord, data[offset : offset + 2]) | |
return (o1 << 8) | o2 | |
def ucs2string(data, offset, length): # length in bytes | |
return codecs.utf_16_le_decode(data[offset : offset + length])[0] | |
def pdp_getbit(bitmap, bit_no): | |
return (pdp_uint32(bitmap, bit_no // 32 * 4) >> (bit_no % 32)) & 1 | |
class CFSInode: | |
filename = '(no filename)' | |
filesize = 0 | |
path = [] | |
def __init__(self, cfs, cluster): | |
self.filename = '(no filename)' | |
self.filesize = 0 | |
self.path = [] | |
self.cluster = cluster | |
self.cfs = cfs | |
inode = cfs[cluster] | |
# reading misc flags and values | |
# print "pdp_uint: %x" % pdp_uint32(inode[4:8]) | |
# print "cluster: %x" % cluster | |
assert pdp_uint32(inode[4:8]) == cluster # self-reference | |
self.serial = pdp_uint32(inode, 0x78) | |
# reading metadata | |
count_metadata = pdp_uint32(inode, 0x7c) | |
offset = 0x80 | |
self.metadata = {} | |
for i in range(count_metadata): | |
assert pdp_uint16(inode, offset) == 3 | |
length = pdp_uint16(inode, offset + 2) | |
tag = ucs2string(inode, offset + 4, 4) | |
self.metadata[tag] = inode[offset + 10 : offset + 10 + length] | |
# byte reordering issue, 07 -> 70, 0= -> =0, 0> -> >0 | |
# but we cannot figure out where to get path info, tag '51' doesnt work | |
#...putting both the Xtra and Vision M tags here | |
# First one listed is for Zen Xtra, second/third are for Zen Vision M | |
# Remove the ones for the Nomad you don't have if the extra checks cause problems | |
if tag == '07' or tag == '70': | |
self.filename = unicode(ucs2string(inode, offset + 10, length - 2)).strip('\\/').encode("utf-8") | |
#I don't have a Vision M, so I can't test this... 51 or =0 might work... | |
elif tag == '0=' or tag == '51' or tag == '=0': | |
# handle UTF-8 properly | |
self.path = unicode(ucs2string(inode, offset + 10, length - 2)).encode("utf-8") | |
# split on both \ and / | |
self.path = re.split(r"[\\/]+", self.path.strip('\\/')) | |
elif tag == '0>' or tag == '>0': | |
self.filesize = pdp_uint32(inode, offset + 10) | |
offset += 10 + length | |
#if filename has / or \ in it, split and append those to the path | |
# (XNJB puts whole path in filename and leaves path blank) | |
if '/' in self.filename or '\\' in self.filename: | |
self.path.extend(re.split(r"[\\/]+", self.filename)) | |
self.filename = self.path.pop() | |
print 'adjusted filename: %s' % self.filename | |
print 'adjusted path: %s' % repr(self.path) | |
# collecting flat list of data clusters | |
self.dataclusters = [] | |
pointerclusters = [] | |
for off in range(0x20, 0x4c + 1, 4): | |
c = pdp_uint32(inode, off) | |
if c != 0xFFFFFFFFL: | |
self.dataclusters.append(c) | |
second_class_chain = pdp_uint32(inode, 0x58) | |
if second_class_chain != 0xFFFFFFFFL: | |
pointerclusters.append(second_class_chain) | |
third_class_chain = pdp_uint32(inode, 0x64) | |
if ZENVISION: | |
clusterSize=0x8000 | |
else: | |
clusterSize=0x2000 | |
if third_class_chain != 0xFFFFFFFFL: | |
for off in range(0, clusterSize, 4): | |
c = pdp_uint32(cfs[third_class_chain], off) | |
if c == 0xFFFFFFFFL: | |
break | |
pointerclusters.append(c) | |
for pnt in pointerclusters: | |
for off in range(0, clusterSize, 4): | |
c = pdp_uint32(cfs[pnt], off) | |
if c == 0xFFFFFFFFL: | |
break | |
self.dataclusters.append(c) | |
def __getitem__(self, key): | |
'''Returns the given byte (or byte slice) from the file contents.''' | |
if isinstance(key, slice): | |
bstart, bstop = key.start, key.stop | |
else: | |
bstart, bstop = key, key + 1 | |
cs = self.cfs.clusterSize | |
cstart = bstart // cs | |
cstop = (bstop - 1) // cs + 1 | |
data = ''.join([ self.cfs[x] for x in self.dataclusters[cstart : cstop] ]) | |
return data[bstart - cs * cstart : bstop - cs * cstart] | |
class CFSDirEntry: | |
def __init__(self, cfs, entrydata): | |
self.cluster = pdp_uint32(entrydata) # cluster no. of the inode | |
# length of full filename | |
self.len_filename = pdp_uint16(entrydata, 4) | |
# first 15 chars of filename | |
self.shortname = ucs2string(entrydata, 8, min(30, self.len_filename * 2)) | |
if __name__ == '__main__': | |
# commandline arguments | |
optlist, args = getopt.gnu_getopt(sys.argv[1:], 'o:') | |
opts = dict(optlist) | |
offset = int(opts.get('-o', 20 * 2**20)) | |
if len(args) != 2: | |
print 'Usage: zenrecover.py [-o OFFSET] DISK_OR_IMAGE OUTPUT_DIR' | |
print 'DISK_OR_IMAGE is the disk containing the filesystem, or an image thereof' | |
print 'OFFSET is the offset at which the filesystem starts (in bytes, default 20M)' | |
print 'OUTPUT_DIR is the directory in which to place the recovered files' | |
print '***NOTICE***: This version of zenrecover extracts everything it can find,' | |
print 'no matter what section it\'s in. This can and will extract songs, archives,' | |
print 'Zen system files like "sg00.log", and probably even some deleted stuff!' | |
sys.exit(1) | |
cfs = CFS(args[0], offset) | |
outdir = args[1] | |
try: | |
os.makedirs(outdir) | |
except OSError: | |
print "Can't create output directory \""+outdir+"\" -- make sure it doesn't already exist" | |
sys.exit() | |
lastfiles = [(1,1)] # timing of latest few files recovered (size in bytes, time in secs) | |
clust=4 | |
while True: | |
isAnInode=False | |
try: | |
if pdp_uint32(cfs[clust][:4]) == 0x3bbe0ad9: | |
print "\n\nFound inode at cluster 0x%x" % clust | |
isAnInode=True | |
except ValueError: | |
print "Hit end of disk image, DONE." | |
print "cluster # "+str(clust) | |
break | |
if isAnInode: | |
# recover this inode if it's a single file | |
inode = cfs.inode(clust) | |
if (not inode.metadata) or inode.filesize == 0: | |
print " ignoring directory inode or empty file" | |
else: | |
t0 = time.time() | |
m=inode.metadata | |
print repr(m) | |
for j in m: | |
if len(m[j])==4: | |
print repr(j), pdp_uint32(m[j]) | |
else: | |
print repr(j), repr(''.join([m[j][x] for x in range(0,len(m[j]),2)])) | |
print '\r%.1fMB/s "%s" (%.1fMB)\033[K' % ( | |
operator.truediv(*map(sum, zip(*lastfiles))) / 2**20, | |
inode.filename[:50], | |
inode.filesize / 2**20), | |
sys.stdout.flush() | |
path = os.path.join(outdir, *inode.path) | |
try: | |
os.makedirs(path) | |
except: | |
pass | |
f = file(os.path.join(path, inode.filename), 'w') | |
remaining = inode.filesize | |
for c in inode.dataclusters: | |
if remaining >= cfs.clusterSize: | |
cfs.get_byteswapped_data(c).tofile(f) | |
else: | |
f.write(cfs.get_byteswapped_data(c).tostring()[:remaining]) | |
remaining -= min(cfs.clusterSize, remaining) | |
f.close() | |
assert remaining == 0 | |
if len(lastfiles) >= 32: #transfer speed is calculated on latest 32 files | |
lastfiles.pop(0) | |
lastfiles.append((inode.filesize, time.time() - t0)) | |
# end of recover code | |
clust+=1 |
I have come back to it and see that my disk is "not Initialized" in windows. What should I do? I'm hesitant to initialize it as I imagine I may be able to recover less of my music. Is this the case?
Damn! I might be the last person on Earth who needs this, but thanks for providing it! I have an old Nomad Jukebox 3 with a bunch of live recordings I made 10+ years ago, in WAV format, on it. I wanted to make sure I had copies of all of them before I throw this thing in the trash. Looks like it's copying successfully :)
Now I might be the last person who needs this.
Now I might be the last person who needs this. This is my story, I got a disk from Creative Nomad Extra from my friend in 2006, who dropped it and the disk was broken. I took the hard drive to a local disk recovery company. After examining it they called me that the hard drive is unrepairable and they cannot give the hard drive back to me, because it's lost. Couple days ago (August 2018), I met a father of my friend and he by some coincidence started working at this recovery company, and he told me that police seized the disk with many others because the company was doing some frauds. However, he found the hard drive and was able to recover the data. And this script works perfectly! Thank you for it!
Now I might be last person who used this - what I did:
- Got the HDD out of old Creative Nomad Zen Xtra
- Plug in into IDE/SATA TO USB adapater
- Made an image: sudo dd if=/dev/sda of=./nomad.img
- Changed the ZENVISION variable to false as wriiten in this script (zenrecover.py)
- Run the script python zenrecover.py nomad.img /home/nomad_content with Python 2.7.18
Worked like charm! Thanks!
Now I might be last person who used this - what I did: exactly follow the steps @llansky3 and it worked perfectly! Thanks so much everyone involved creating the script and the steps to use it 🥇
I used this today on a Creative Nomad Jukebox 3 hard drive image following the steps @llansky3 posted above with a Python 2 install in the Ubuntu terminal environment Windows Subsystem for Linux and it worked perfectly. Thanks to everyone that created the script and the instructions for its use. The hardest part for me was getting the drive image as my computer was seeing the IDE/SATA TO USB adapter connected Jukebox 3 drive in Disk Management but would not allow operations on the drive without initializing it (and it sounded like from the discussion above that this was something to avoid). I ended up using OSFClone to create a bootable USB memory stick which allowed for the detection of the Jukebox 3 drive and the use of dd to make the drive image. I also changed the ZENVISION variable to False in the zenrecover.py file as described above. Thanks again!
Firstly, thanks for building/sharing this code, it looks like exactly the thing I need. I had a hard-drive failure with my music collection and the only backup I have is on my Creative Zen Nomad 30Gb player.
I need to run this on Windows 7. Which version of Python do you recommend?
Also I am new to Python, thus what would be the command line including directory structure eg ($ python e:\zenrecover\zenrecover.py e:\music i:) ?
Regards
Mark