/* 
 * Auto-tuning delete that allows for removal of large amounts of data 
 * without impacting performance. Configurable to a target load amount.
 *
 * How it works: 
 * TL;DR: Delete a small slice every second; Vary the size of each slice 
 *        based on how long the previous delete took; sleep; repeat. 
 *
 * TODO: Modify this to allow for deletion based on objectid's date 
 *       which is embedded in the first four bytes.
 *
 * Intuition: If target is 100ms but delete took 50ms then
 * we'd like to double the window size.  
 *
 * Find the oldest record, delete all records whose timestamp is older than 
 * oldest_record + window_size_ms. Measure how long the delete took and 
 * compare it against the target to create a ratio.  
 *
 * The window shrinks or grows automatically until reaching stability.
 * If load on the system increase (i.e: During daily peak) then the 
 * window size will shrink dynamially to adjust. 
*/
var telescoping_delete = function(coll_name, date_field, stop_time) {
    target_millis  = 200  // desired time spent deleting anywhere between 1 to 999 milliseconds
    window_size_ms = 50   // initial window size estimate 
    window_size_max= 1000 * 60 // Safety switch, maximum window size, default to 1 min window

    // get date of last document
    sort = {}
    sort[date_field] = 1
    last_doc_date = db[coll_name].find().sort(sort).limit(1)[0][date_field]

    //Loop through 
    print("Starting deletion...")
    while(last_doc_date.getTime() < stop_time.getTime()){ 
        next_time_millis = last_doc_date.getTime() + window_size_ms
        next_date = ISODate()
        next_date.setTime(next_time_millis)

        print("\nDeletion boundary: "+ next_date)

        remove_query = {}
        remove_query[date_field] = {$lte:next_date}

        //safety count, expected remove
        //count=db[coll_name].count(remove_query)
        //print("Expected count: " + count)

        var start = new Date().getTime(); //Start timing
        db[coll_name].remove(remove_query)
        tt=db.getLastErrorObj()           //ensure GLE in case we're in a legacyWriteOp shell
        var end = new Date().getTime();   //End timing
        var time_taken = end-start 

        //Adjust window size
        window_size_ms = 0.8 * window_size_ms + 0.2*(window_size_ms * (target_millis/time_taken))

        //Safety check, time_delta must be between 1 and 1000
        window_size_ms = Math.min(Math.max(window_size_ms, 1), window_size_max);

        print("Time taken (ms)  : " + time_taken)
        print("Next window size : " + window_size_ms)
        sleep(1000-time_taken) // Sleep for the rest of the second....

        //reset date and repeat 
        last_doc_date = next_date
    }
}


//Insert 1M records Example
for(i=0;i<1000000;i++){db.sample_coll.insert({_id:i,create_date:new Date()})}
db.sample_coll.createIndex({create_date:1})

//Delete everything in slices
telescoping_delete("sample_coll","create_date",new Date())