-
-
Save ciudilo/99adb63d6d0617a0ba97282da6ff0343 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Auto-tuning delete that allows for removal of large amounts of data | |
* without impacting performance. Configurable to a target load amount. | |
* | |
* How it works: | |
* TL;DR: Delete a small slice every second; Vary the size of each slice | |
* based on how long the previous delete took; sleep; repeat. | |
* | |
* Intuition: If target is 100ms but delete took 50ms then | |
* we'd like to double the window size. | |
* | |
* Find the oldest record, delete all records whose timestamp is older than | |
* oldest_record + window_size_ms. Measure how long the delete took and | |
* compare it against the target to create a ratio. | |
* | |
* The window shrinks or grows automatically until reaching stability. | |
* If load on the system increase (i.e: During daily peak) then the | |
* window size will shrink dynamially to adjust. | |
*/ | |
var telescoping_delete = function(coll_name, date_field, stop_time) { | |
target_millis = 200 // desired time spent deleting anywhere between 1 to 999 milliseconds | |
window_size_ms = 50 // initial window size estimate | |
window_size_max= 1000 * 60 // Safety switch, maximum window size, default to 1 min window | |
// get date of last document | |
sort = {} | |
sort[date_field] = 1 | |
last_doc_date = db[coll_name].find().sort(sort).limit(1)[0][date_field] | |
//Loop through | |
print("Starting deletion...") | |
while(last_doc_date.getTime() < stop_time.getTime()){ | |
next_time_millis = last_doc_date.getTime() + window_size_ms | |
next_date = ISODate() | |
next_date.setTime(next_time_millis) | |
print("\nDeletion boundary: "+ next_date) | |
remove_query = {} | |
remove_query[date_field] = {$lte:next_date} | |
//safety count, expected remove | |
//count=db[coll_name].count(remove_query) | |
//print("Expected count: " + count) | |
var start = new Date().getTime(); //Start timing | |
db[coll_name].remove(remove_query) | |
tt=db.getLastErrorObj() //ensure GLE in case we're in a legacyWriteOp shell | |
var end = new Date().getTime(); //End timing | |
var time_taken = end-start | |
//Adjust window size | |
window_size_ms = 0.8 * window_size_ms + 0.2*(window_size_ms * (target_millis/time_taken)) | |
//Safety check, time_delta must be between 1 and 1000 | |
window_size_ms = Math.min(Math.max(window_size_ms, 1), window_size_max); | |
print("Time taken (ms) : " + time_taken) | |
print("Next window size : " + window_size_ms) | |
sleep(1000-time_taken) // Sleep for the rest of the second.... | |
//reset date and repeat | |
last_doc_date = next_date | |
} | |
} | |
//Insert 1M records Example | |
for(i=0;i<1000000;i++){db.sample_coll.insert({_id:i,create_date:new Date()})} | |
db.sample_coll.createIndex({create_date:1}) | |
//Delete everything in slices | |
telescoping_delete("sample_coll","create_date",new Date()) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment