Skip to content

Instantly share code, notes, and snippets.

@egguy
Created May 25, 2012 16:04
Show Gist options
  • Save egguy/2788955 to your computer and use it in GitHub Desktop.
Save egguy/2788955 to your computer and use it in GitHub Desktop.
Recover deleted data from mongo DB database
"""A little script to recover deleted recording of a mongoDB db file
There's no optimization but it work and has saved me
"""
import struct
import bson
import pymongo
import sys
def decode_chunck(chunck):
"Try to decode a chunck"
#if not bson.is_valid(chunck):
# return None
try:
result = bson.decode_all(chunck)[0]
if not result:
return None
else:
# if there's all the searched field, return it
if 'field_1' in result and 'field_2' in result and 'field_3' in result:
return result
except Exception:
return None
return None
def generate_chunck(data, pos=0):
"Generator to create chunck"
print "open at: %s" % pos
f= open(data,'rb')
a=f.read()
size = len(a)
while pos < size:
# Progress indicator
if pos % 1024 ==0:
print pos
# Determine the size of the possible bson encoded data
bson_size = struct.unpack("<I", a[pos:pos + 4])[0]
# If it's more than 2KB reject it (perfect for me)
if bson_size > 2*1024:
# Continue tu search in the file
pos += 1
continue
# If the bson is bigger than the file, reject it
if bson_size+pos > size-1:
pos += 1
continue
# A bson should end by \x00
# http://bsonspec.org/#/specification
if a[pos+bson_size] != '\x00':
pos += 1
continue
# Chunck it
chunck = a[pos:pos+bson_size]
pos += 1
yield chunck
# create connection
connection = pymongo.Connection('localhost', 27017)
# Connect to MongoDB in order to reinsert the data
db = connection.recover_db
collection = db.recover_collection
# argv[1] = the file to recover
# argv[2] = Where to start in the file
for chunck in generate_chunck(sys.argv[1], int(sys.argv[2])):
result = decode_chunck(chunck)
if result:
print "insert"
collection.insert(result)
@atul9911
Copy link

atul9911 commented Jan 7, 2017

Showing : ./recover.py: line 8: syntax error near unexpected token (' ./recover.py: line 8: def decode_chunck(chunck):

@arihantdaga
Copy link

@egguy can u please give an example for argv[1] and argv[2], since i dont know which file do we have to feed it.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment