Skip to content

Instantly share code, notes, and snippets.

@BrandonLWhite
Created May 6, 2020 21:27
Show Gist options
  • Save BrandonLWhite/2fddbd3b565779b421ddc5569d8845b4 to your computer and use it in GitHub Desktop.
Save BrandonLWhite/2fddbd3b565779b421ddc5569d8845b4 to your computer and use it in GitHub Desktop.
C# .NET Core S3 Object Listing Performance
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
using Amazon.Lambda.Core;
using Amazon.S3;
using Amazon.S3.Model;
// Assembly attribute to enable the Lambda function's JSON input to be converted into a .NET class.
[assembly: LambdaSerializer(typeof(Amazon.Lambda.Serialization.SystemTextJson.DefaultLambdaJsonSerializer))]
namespace temp_di_1514_3
{
///
/// TODO BW: Seems like this is retaining state somehow between invocations.
/// I need to Extract Class and create a new one each time, instead of having state variables
/// in the exported Function class.
public class Function
{
string S3_MERGE_BUCKET = "prod-transactions-merge.upside-services.com";
string S3_MERGE_PENDING_PREFIX = "pending";
int DEFAULT_MAX_KEYS = 10000;
IAmazonS3 _s3Client;
string _continuationToken;
int _keyCount;
int _maxKeys;
DateTime _oldestTimestamp = DateTime.MaxValue;
/// <summary>
/// A simple function that takes a string and does a ToUpper
/// </summary>
/// <param name="input"></param>
/// <param name="context"></param>
/// <returns></returns>
public async Task<string> FunctionHandler(string input, ILambdaContext context)
{
// Dirty hack.
_continuationToken = null;
_keyCount = 0;
_maxKeys = string.IsNullOrWhiteSpace(input) ? DEFAULT_MAX_KEYS : int.Parse(input);
_s3Client = new AmazonS3Client();
var pendingPage = GetNextPage();
ListObjectsV2Response currentPage = null;
do
{
currentPage = await pendingPage;
if(currentPage.IsTruncated && _keyCount < _maxKeys) // TODO BW: DRY.
{
pendingPage = GetNextPage();
}
FindOldest(currentPage.S3Objects);
System.Console.WriteLine($"{_keyCount}");
}
while(currentPage.IsTruncated && _keyCount < _maxKeys); // TODO BW: DRY.
System.Console.WriteLine($"Oldest LastModifed: {_oldestTimestamp.ToString("o")}");
return input?.ToUpper();
}
/**
TODO BW: This would be better if implemented as an async iterator continuation.
*/
async Task<ListObjectsV2Response> GetNextPage()
{
var page = await _s3Client.ListObjectsV2Async(new ListObjectsV2Request
{
BucketName = S3_MERGE_BUCKET,
Prefix = S3_MERGE_PENDING_PREFIX,
ContinuationToken = _continuationToken
});
_keyCount += page.KeyCount;
_continuationToken = page.IsTruncated ? page.NextContinuationToken : null;
return page;
}
void FindOldest(IEnumerable<S3Object> s3Objects)
{
foreach(var objectInfo in s3Objects)
{
if(objectInfo.LastModified < _oldestTimestamp)
{
_oldestTimestamp = objectInfo.LastModified;
}
}
}
}
}
@BrandonLWhite
Copy link
Author

C# performance with S3 file listing isn't bad.
10k files: 1.4s
188,368 files: 25.9s

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment