Created
October 15, 2023 03:09
-
-
Save run-dlang/6d603756c64b37aec370d215692ac930 to your computer and use it in GitHub Desktop.
Code shared from run.dlang.io.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import std.stdio : writeln, toFile; | |
import std.datetime.stopwatch : StopWatch, AutoStart; | |
import asdf.serialization : deserialize, serializeToJson; | |
import std.file : readText; | |
enum TopN = 5; | |
struct Post | |
{ | |
string _id; | |
string title; | |
string[] tags; | |
} | |
struct RelatedPosts | |
{ | |
string _id; | |
string[] tags; | |
Post[TopN] related; | |
} | |
struct PostIdxAndRelatedCount | |
{ | |
size_t postIdx; | |
ubyte relatedCount; | |
} | |
void main() | |
{ | |
auto jsonText = readText("../posts.json"); | |
auto posts = deserialize!(Post[])(jsonText); | |
auto sw = StopWatch(AutoStart.yes); | |
auto relatedPosts = new RelatedPosts[posts.length]; | |
size_t[][string] tagMap; | |
foreach (i, ref const post; posts) | |
foreach (tag; post.tags) | |
tagMap[tag] ~= i; | |
auto relatedCounts = new ubyte[posts.length]; | |
foreach (const myPostIdx, ref post; posts) | |
{ | |
relatedCounts[] = 0; | |
foreach (tag; post.tags) | |
foreach (idx; tagMap[tag]) | |
relatedCounts[idx]++; | |
relatedCounts[myPostIdx] = 0; // exclude ourselves from consideration | |
PostIdxAndRelatedCount[TopN] topn; | |
auto minRelatedCount = 0; | |
foreach (postIdx, relatedCount; relatedCounts) | |
{ | |
if (relatedCount > minRelatedCount) | |
{ | |
// find our insertion loc, shifting lower values to the right as we go | |
auto loc = topn.length - 1U; | |
for (; loc > 0 && relatedCount > topn[loc - 1].relatedCount; --loc) | |
topn[loc] = topn[loc - 1]; | |
topn[loc] = PostIdxAndRelatedCount(postIdx, relatedCount); | |
minRelatedCount = topn[$ - 1].relatedCount; | |
} | |
} | |
auto rp = &relatedPosts[myPostIdx]; | |
rp._id = post._id; | |
rp.tags = post.tags; | |
foreach (i; 0 .. rp.related.length) | |
rp.related[i] = posts[topn[i].postIdx]; | |
} | |
sw.stop(); | |
writeln("Processing time (w/o IO): ", sw.peek.total!"usecs" * 1.0 / 1000, "ms"); | |
toFile(serializeToJson(relatedPosts), "../related_posts_d.json"); | |
}k |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment