Created
July 22, 2024 18:33
-
-
Save pjmagee/003b3816fe0372aa4eaa834d2986b051 to your computer and use it in GitHub Desktop.
Get all Pages with Galactic Coordinates
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Program | |
{ | |
private static readonly HttpClient client = new HttpClient(); | |
static async Task Main(string[] args) | |
{ | |
string category = "Category:Locations_by_grid_square"; | |
List<string> allPages = await GetAllPagesInCategoryAndSubcategories(category); | |
foreach (string page in allPages) | |
{ | |
Console.WriteLine($"Page: {page}"); | |
var infobox = await GetInfoboxData(page); | |
Console.WriteLine($"Infobox: {infobox}\n"); | |
} | |
} | |
static async Task<List<string>> GetAllPagesInCategoryAndSubcategories(string category) | |
{ | |
List<string> allPages = new List<string>(); | |
List<string> subcategories = await GetSubcategories(category); | |
foreach (string subcategory in subcategories) | |
{ | |
List<string> pages = await GetAllCategoryMembers (subcategory); | |
Console.WriteLine ($"Subcategory: {subcategory}"); | |
allPages.AddRange(pages); | |
} | |
return allPages; | |
} | |
static async Task<List<string>> GetSubcategories(string category) | |
{ | |
List<string> subcategories = new List<string>(); | |
string cmcontinue = null; | |
do | |
{ | |
var result = await GetCategoryMembers(category, cmcontinue); | |
var query = result.GetProperty("query"); | |
var categoryMembers = query.GetProperty("categorymembers"); | |
foreach (var member in categoryMembers.EnumerateArray()) | |
{ | |
if (member.GetProperty("title").GetString().StartsWith("Category:")) | |
{ | |
subcategories.Add(member.GetProperty("title").GetString()); | |
} | |
} | |
cmcontinue = result.TryGetProperty("continue", out JsonElement continueElement) ? continueElement.GetProperty("cmcontinue").GetString() : null; | |
} while (cmcontinue != null); | |
return subcategories; | |
} | |
static async Task<List<string>> GetAllCategoryMembers(string category) | |
{ | |
List<string> members = new List<string>(); | |
string cmcontinue = null; | |
do | |
{ | |
var result = await GetCategoryMembers(category, cmcontinue); | |
var query = result.GetProperty("query"); | |
var categoryMembers = query.GetProperty("categorymembers"); | |
foreach (var member in categoryMembers.EnumerateArray()) | |
{ | |
if (!member.GetProperty("title").GetString().StartsWith("Category:")) | |
{ | |
members.Add(member.GetProperty("title").GetString()); | |
} | |
} | |
cmcontinue = result.TryGetProperty("continue", out JsonElement continueElement) ? continueElement.GetProperty("cmcontinue").GetString() : null; | |
} while (cmcontinue != null); | |
return members.Dump("category members"); | |
} | |
static async Task<JsonElement> GetCategoryMembers(string category, string cmcontinue) | |
{ | |
var url = "https://starwars.fandom.com/api.php"; | |
var parameters = new Dictionary<string, string> | |
{ | |
{ "action", "query" }, | |
{ "list", "categorymembers" }, | |
{ "cmtitle", category }, | |
{ "cmlimit", "max" }, // Use 'max' to get as many results as possible per request | |
{ "format", "json" } | |
}; | |
if (cmcontinue != null) | |
{ | |
parameters.Add("cmcontinue", cmcontinue); | |
} | |
var content = new FormUrlEncodedContent(parameters); | |
var response = await client.PostAsync (url, content); | |
var responseString = await response.Content.ReadAsStringAsync(); | |
var jsonDoc = JsonDocument.Parse (responseString); | |
return jsonDoc.RootElement; | |
} | |
static async Task<string> GetInfoboxData (string pageTitle) | |
{ | |
var url = "https://starwars.fandom.com/api.php"; | |
var parameters = new Dictionary<string, string> | |
{ | |
{ "action", "parse" }, | |
{ "page", pageTitle }, | |
{ "prop", "wikitext" }, | |
{ "format", "json" } | |
}; | |
var content = new FormUrlEncodedContent (parameters); | |
var response = await client.PostAsync (url, content); | |
var responseString = await response.Content.ReadAsStringAsync(); | |
var jsonDoc = JsonDocument.Parse (responseString); | |
var wikitext = jsonDoc.RootElement.GetProperty ("parse").GetProperty ("wikitext").GetRawText(); | |
return ExtractInfobox (wikitext); | |
} | |
static string ExtractInfobox (string wikitext) | |
{ | |
// Simple extraction logic based on typical infobox template | |
// Adjust the regex pattern to match the actual infobox structure | |
var infoboxStart = wikitext.IndexOf ("{{Infobox"); | |
if (infoboxStart == -1) return "No infobox found"; | |
var infoboxEnd = wikitext.IndexOf ("}}", infoboxStart); | |
if (infoboxEnd == -1) return "Incomplete infobox"; | |
var infobox = wikitext.Substring (infoboxStart, infoboxEnd - infoboxStart + 2); | |
return infobox; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment