Skip to content

Instantly share code, notes, and snippets.

@MerijnHendriks
Last active April 13, 2021 23:48
Show Gist options
  • Save MerijnHendriks/758921d290efa72c63ea2ef1344b08b4 to your computer and use it in GitHub Desktop.
Save MerijnHendriks/758921d290efa72c63ea2ef1344b08b4 to your computer and use it in GitHub Desktop.
Wikimedia API in .NET 5.0

Notes

Goal

  • download data
  • unzip data
  • import data
  • manage data

Info

Request URL

Example: https://dumps.wikimedia.org/nlwiki/latest/nlwiki-latest-pages-articles.xml.bz2

Ideas

using System.Collections.Generic;
using System.Text;
namespace Wikimedia.Api
{
public EFile
{
Abstract = 0,
AllTitles,
Babel,
Category,
CategoryLinks,
ChangeTag,
ChangeTagDef,
ExternalLinks,
GeoTags,
Image,
ImageLinks,
IWLinks,
LangLinks,
Md5Sums,
Page,
PageProps,
PageRestrictions,
PageLinks,
PageArticles,
PageLoging,
PageMetaCurrent,
PageMetaHistory,
ProtectedTitles,
Redirect,
Sha1Sums,
SiteStats,
SiteInfoNamespaces,
Sites,
StubArticles,
StubMetaCurrent,
StubMetaHistory,
TemplateLinks,
UserFormerGroups,
UserGroups,
WbcEntityUsage
}
public static class Constants
{
public const string SiteMatrixUrl = "https://www.mediawiki.org/w/api.php?action=sitematrix&format=json";
public const string DumpFileUrlFormat = "https://dumps.wikimedia.org/{0}/{1}/{0}wiki-{1}-{2}";
public readonly Dictionary<EFile, string> Files;
static Constants()
{
Files = new Dictionary<EFile, string>()
{
{ EFile.Abstract, "abstract.xml.gz" },
{ EFile.AllTitles, "all-titles.gz" },
{ EFile.Babel, "babel.sql.gz" },
{ EFile.Category, "category.sql.gz" },
{ EFile.CategoryLinks, "categorylinks.sql.gz" },
{ EFile.ChangeTag, "change_tag.sql.gz" },
{ EFile.ChangeTagDef, "change_tag_def.sql.gz" },
{ EFile.ExternalLinks, "externallinks.sql.gz" },
{ EFile.GeoTags, "geo_tags.sql.gz" },
{ EFile.Image, "image.sql.gz" },
{ EFile.ImageLinks, "imagelinks.sql.gz" },
{ EFile.IWLinks, "iwlinks.sql.gz" },
{ EFile.LangLinks, "langlinks.sql.gz" },
{ EFile.Md5Sums, "md5sums.txt" },
{ EFile.Page, "page.sql.gz" },
{ EFile.PageProps, "page_props.sql.gz" },
{ EFile.PageRestrictions, "page_restrictions.sql.gz" },
{ EFile.PageLinks, "pagelinks.sql.gz" },
{ EFile.PageArticles, "pages-articles.xml.bz2" },
{ EFile.PageLoging, "pages-logging.xml.gz" },
{ EFile.PageMetaCurrent, "pages-meta-current.xml.bz2" },
{ EFile.PageMetaHistory, "pages-meta-history.xml.bz2" },
{ EFile.ProtectedTitles, "protected_titles.sql.gz" },
{ EFile.Redirect, "redirect.sql.gz" },
{ EFile.Sha1Sums, "sha1sums.txt" },
{ EFile.SiteStats, "site_stats.sql.gz" },
{ EFile.SiteInfoNamespaces, "siteinfo-namespaces.json.gz" },
{ EFile.Sites, "sites.sql.gz" },
{ EFile.StubArticles, "stub-articles.xml.gz" },
{ EFile.StubMetaCurrent, "stub-meta-current.xml.gz" },
{ EFile.StubMetaHistory, "stub-meta-history.xml.gz" },
{ EFile.TemplateLinks, "templatelinks.sql.gz" },
{ EFile.UserFormerGroups, "user_former_groups.sql.gz" },
{ EFile.UserGroups, "user_groups.sql.gz" },
{ EFile.WbcEntityUsage, "wbc_entity_usage.sql.gz" }
};
}
}
public static class RequestHandler
{
public static string GetSiteMatrix()
{
return Encoding.UTF8.GetString(Request.get(WikipediaConstants.SiteMatrixUrl));
}
public static byte[] GetDump(string code, string version, EFile file)
{
return Request.get(Constants.DumpFileUrlFormat.Format(code, version, Constants.Files[file]));
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment