-
-
Save vithalreddy/6cce09a2b63ae30de34de9f788977c4d to your computer and use it in GitHub Desktop.
Recover deleted data from mongo DB database
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""A little script to recover deleted recording of a mongoDB db file | |
There's no optimization but it work and has saved me | |
""" | |
import struct | |
import bson | |
import pymongo | |
import sys | |
def decode_chunck(chunck): | |
"Try to decode a chunck" | |
#if not bson.is_valid(chunck): | |
# return None | |
try: | |
result = bson.decode_all(chunck)[0] | |
if not result: | |
return None | |
else: | |
# if there's all the searched field, return it | |
if 'field_1' in result and 'field_2' in result and 'field_3' in result: | |
return result | |
except Exception: | |
return None | |
return None | |
def generate_chunck(data, pos=0): | |
"Generator to create chunck" | |
print "open at: %s" % pos | |
f= open(data,'rb') | |
a=f.read() | |
size = len(a) | |
while pos < size: | |
# Progress indicator | |
if pos % 1024 ==0: | |
print pos | |
# Determine the size of the possible bson encoded data | |
bson_size = struct.unpack("<I", a[pos:pos + 4])[0] | |
# If it's more than 2KB reject it (perfect for me) | |
if bson_size > 2*1024: | |
# Continue tu search in the file | |
pos += 1 | |
continue | |
# If the bson is bigger than the file, reject it | |
if bson_size+pos > size-1: | |
pos += 1 | |
continue | |
# A bson should end by \x00 | |
# http://bsonspec.org/#/specification | |
if a[pos+bson_size] != '\x00': | |
pos += 1 | |
continue | |
# Chunck it | |
chunck = a[pos:pos+bson_size] | |
pos += 1 | |
yield chunck | |
# create connection | |
connection = pymongo.Connection('localhost', 27017) | |
# Connect to MongoDB in order to reinsert the data | |
db = connection.recover_db | |
collection = db.recover_collection | |
# argv[1] = the file to recover | |
# argv[2] = Where to start in the file | |
for chunck in generate_chunck(sys.argv[1], int(sys.argv[2])): | |
result = decode_chunck(chunck) | |
if result: | |
print "insert" | |
collection.insert(result) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment