/* * Auto-tuning delete that allows for removal of large amounts of data * without impacting performance. Configurable to a target load amount. * * How it works: * TL;DR: Delete a small slice every second; Vary the size of each slice * based on how long the previous delete took; sleep; repeat. * * TODO: Modify this to allow for deletion based on objectid's date * which is embedded in the first four bytes. * * Intuition: If target is 100ms but delete took 50ms then * we'd like to double the window size. * * Find the oldest record, delete all records whose timestamp is older than * oldest_record + window_size_ms. Measure how long the delete took and * compare it against the target to create a ratio. * * The window shrinks or grows automatically until reaching stability. * If load on the system increase (i.e: During daily peak) then the * window size will shrink dynamially to adjust. */ var telescoping_delete = function(coll_name, date_field, stop_time) { target_millis = 200 // desired time spent deleting anywhere between 1 to 999 milliseconds window_size_ms = 50 // initial window size estimate window_size_max= 1000 * 60 // Safety switch, maximum window size, default to 1 min window // get date of last document sort = {} sort[date_field] = 1 last_doc_date = db[coll_name].find().sort(sort).limit(1)[0][date_field] //Loop through print("Starting deletion...") while(last_doc_date.getTime() < stop_time.getTime()){ next_time_millis = last_doc_date.getTime() + window_size_ms next_date = ISODate() next_date.setTime(next_time_millis) print("\nDeletion boundary: "+ next_date) remove_query = {} remove_query[date_field] = {$lte:next_date} //safety count, expected remove //count=db[coll_name].count(remove_query) //print("Expected count: " + count) var start = new Date().getTime(); //Start timing db[coll_name].remove(remove_query) tt=db.getLastErrorObj() //ensure GLE in case we're in a legacyWriteOp shell var end = new Date().getTime(); //End timing var time_taken = end-start //Adjust window size window_size_ms = 0.8 * window_size_ms + 0.2*(window_size_ms * (target_millis/time_taken)) //Safety check, time_delta must be between 1 and 1000 window_size_ms = Math.min(Math.max(window_size_ms, 1), window_size_max); print("Time taken (ms) : " + time_taken) print("Next window size : " + window_size_ms) sleep(1000-time_taken) // Sleep for the rest of the second.... //reset date and repeat last_doc_date = next_date } } //Insert 1M records Example for(i=0;i<1000000;i++){db.sample_coll.insert({_id:i,create_date:new Date()})} db.sample_coll.createIndex({create_date:1}) //Delete everything in slices telescoping_delete("sample_coll","create_date",new Date())