Created
May 6, 2020 21:27
-
-
Save BrandonLWhite/2fddbd3b565779b421ddc5569d8845b4 to your computer and use it in GitHub Desktop.
C# .NET Core S3 Object Listing Performance
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.Linq; | |
using System.Threading.Tasks; | |
using Amazon.Lambda.Core; | |
using Amazon.S3; | |
using Amazon.S3.Model; | |
// Assembly attribute to enable the Lambda function's JSON input to be converted into a .NET class. | |
[assembly: LambdaSerializer(typeof(Amazon.Lambda.Serialization.SystemTextJson.DefaultLambdaJsonSerializer))] | |
namespace temp_di_1514_3 | |
{ | |
/// | |
/// TODO BW: Seems like this is retaining state somehow between invocations. | |
/// I need to Extract Class and create a new one each time, instead of having state variables | |
/// in the exported Function class. | |
public class Function | |
{ | |
string S3_MERGE_BUCKET = "prod-transactions-merge.upside-services.com"; | |
string S3_MERGE_PENDING_PREFIX = "pending"; | |
int DEFAULT_MAX_KEYS = 10000; | |
IAmazonS3 _s3Client; | |
string _continuationToken; | |
int _keyCount; | |
int _maxKeys; | |
DateTime _oldestTimestamp = DateTime.MaxValue; | |
/// <summary> | |
/// A simple function that takes a string and does a ToUpper | |
/// </summary> | |
/// <param name="input"></param> | |
/// <param name="context"></param> | |
/// <returns></returns> | |
public async Task<string> FunctionHandler(string input, ILambdaContext context) | |
{ | |
// Dirty hack. | |
_continuationToken = null; | |
_keyCount = 0; | |
_maxKeys = string.IsNullOrWhiteSpace(input) ? DEFAULT_MAX_KEYS : int.Parse(input); | |
_s3Client = new AmazonS3Client(); | |
var pendingPage = GetNextPage(); | |
ListObjectsV2Response currentPage = null; | |
do | |
{ | |
currentPage = await pendingPage; | |
if(currentPage.IsTruncated && _keyCount < _maxKeys) // TODO BW: DRY. | |
{ | |
pendingPage = GetNextPage(); | |
} | |
FindOldest(currentPage.S3Objects); | |
System.Console.WriteLine($"{_keyCount}"); | |
} | |
while(currentPage.IsTruncated && _keyCount < _maxKeys); // TODO BW: DRY. | |
System.Console.WriteLine($"Oldest LastModifed: {_oldestTimestamp.ToString("o")}"); | |
return input?.ToUpper(); | |
} | |
/** | |
TODO BW: This would be better if implemented as an async iterator continuation. | |
*/ | |
async Task<ListObjectsV2Response> GetNextPage() | |
{ | |
var page = await _s3Client.ListObjectsV2Async(new ListObjectsV2Request | |
{ | |
BucketName = S3_MERGE_BUCKET, | |
Prefix = S3_MERGE_PENDING_PREFIX, | |
ContinuationToken = _continuationToken | |
}); | |
_keyCount += page.KeyCount; | |
_continuationToken = page.IsTruncated ? page.NextContinuationToken : null; | |
return page; | |
} | |
void FindOldest(IEnumerable<S3Object> s3Objects) | |
{ | |
foreach(var objectInfo in s3Objects) | |
{ | |
if(objectInfo.LastModified < _oldestTimestamp) | |
{ | |
_oldestTimestamp = objectInfo.LastModified; | |
} | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
C# performance with S3 file listing isn't bad.
10k files: 1.4s
188,368 files: 25.9s