Skip to content

Instantly share code, notes, and snippets.

@ciudilo
Forked from achille/telescoping_delete.js
Created January 16, 2017 15:19
Show Gist options
  • Save ciudilo/99adb63d6d0617a0ba97282da6ff0343 to your computer and use it in GitHub Desktop.
Save ciudilo/99adb63d6d0617a0ba97282da6ff0343 to your computer and use it in GitHub Desktop.
/*
* Auto-tuning delete that allows for removal of large amounts of data
* without impacting performance. Configurable to a target load amount.
*
* How it works:
* TL;DR: Delete a small slice every second; Vary the size of each slice
* based on how long the previous delete took; sleep; repeat.
*
* Intuition: If target is 100ms but delete took 50ms then
* we'd like to double the window size.
*
* Find the oldest record, delete all records whose timestamp is older than
* oldest_record + window_size_ms. Measure how long the delete took and
* compare it against the target to create a ratio.
*
* The window shrinks or grows automatically until reaching stability.
* If load on the system increase (i.e: During daily peak) then the
* window size will shrink dynamially to adjust.
*/
var telescoping_delete = function(coll_name, date_field, stop_time) {
target_millis = 200 // desired time spent deleting anywhere between 1 to 999 milliseconds
window_size_ms = 50 // initial window size estimate
window_size_max= 1000 * 60 // Safety switch, maximum window size, default to 1 min window
// get date of last document
sort = {}
sort[date_field] = 1
last_doc_date = db[coll_name].find().sort(sort).limit(1)[0][date_field]
//Loop through
print("Starting deletion...")
while(last_doc_date.getTime() < stop_time.getTime()){
next_time_millis = last_doc_date.getTime() + window_size_ms
next_date = ISODate()
next_date.setTime(next_time_millis)
print("\nDeletion boundary: "+ next_date)
remove_query = {}
remove_query[date_field] = {$lte:next_date}
//safety count, expected remove
//count=db[coll_name].count(remove_query)
//print("Expected count: " + count)
var start = new Date().getTime(); //Start timing
db[coll_name].remove(remove_query)
tt=db.getLastErrorObj() //ensure GLE in case we're in a legacyWriteOp shell
var end = new Date().getTime(); //End timing
var time_taken = end-start
//Adjust window size
window_size_ms = 0.8 * window_size_ms + 0.2*(window_size_ms * (target_millis/time_taken))
//Safety check, time_delta must be between 1 and 1000
window_size_ms = Math.min(Math.max(window_size_ms, 1), window_size_max);
print("Time taken (ms) : " + time_taken)
print("Next window size : " + window_size_ms)
sleep(1000-time_taken) // Sleep for the rest of the second....
//reset date and repeat
last_doc_date = next_date
}
}
//Insert 1M records Example
for(i=0;i<1000000;i++){db.sample_coll.insert({_id:i,create_date:new Date()})}
db.sample_coll.createIndex({create_date:1})
//Delete everything in slices
telescoping_delete("sample_coll","create_date",new Date())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment