- download data
- unzip data
- import data
- manage data
Example: https://dumps.wikimedia.org/nlwiki/latest/nlwiki-latest-pages-articles.xml.bz2
Example: https://dumps.wikimedia.org/nlwiki/latest/nlwiki-latest-pages-articles.xml.bz2
using System.Collections.Generic; | |
using System.Text; | |
namespace Wikimedia.Api | |
{ | |
public EFile | |
{ | |
Abstract = 0, | |
AllTitles, | |
Babel, | |
Category, | |
CategoryLinks, | |
ChangeTag, | |
ChangeTagDef, | |
ExternalLinks, | |
GeoTags, | |
Image, | |
ImageLinks, | |
IWLinks, | |
LangLinks, | |
Md5Sums, | |
Page, | |
PageProps, | |
PageRestrictions, | |
PageLinks, | |
PageArticles, | |
PageLoging, | |
PageMetaCurrent, | |
PageMetaHistory, | |
ProtectedTitles, | |
Redirect, | |
Sha1Sums, | |
SiteStats, | |
SiteInfoNamespaces, | |
Sites, | |
StubArticles, | |
StubMetaCurrent, | |
StubMetaHistory, | |
TemplateLinks, | |
UserFormerGroups, | |
UserGroups, | |
WbcEntityUsage | |
} | |
public static class Constants | |
{ | |
public const string SiteMatrixUrl = "https://www.mediawiki.org/w/api.php?action=sitematrix&format=json"; | |
public const string DumpFileUrlFormat = "https://dumps.wikimedia.org/{0}/{1}/{0}wiki-{1}-{2}"; | |
public readonly Dictionary<EFile, string> Files; | |
static Constants() | |
{ | |
Files = new Dictionary<EFile, string>() | |
{ | |
{ EFile.Abstract, "abstract.xml.gz" }, | |
{ EFile.AllTitles, "all-titles.gz" }, | |
{ EFile.Babel, "babel.sql.gz" }, | |
{ EFile.Category, "category.sql.gz" }, | |
{ EFile.CategoryLinks, "categorylinks.sql.gz" }, | |
{ EFile.ChangeTag, "change_tag.sql.gz" }, | |
{ EFile.ChangeTagDef, "change_tag_def.sql.gz" }, | |
{ EFile.ExternalLinks, "externallinks.sql.gz" }, | |
{ EFile.GeoTags, "geo_tags.sql.gz" }, | |
{ EFile.Image, "image.sql.gz" }, | |
{ EFile.ImageLinks, "imagelinks.sql.gz" }, | |
{ EFile.IWLinks, "iwlinks.sql.gz" }, | |
{ EFile.LangLinks, "langlinks.sql.gz" }, | |
{ EFile.Md5Sums, "md5sums.txt" }, | |
{ EFile.Page, "page.sql.gz" }, | |
{ EFile.PageProps, "page_props.sql.gz" }, | |
{ EFile.PageRestrictions, "page_restrictions.sql.gz" }, | |
{ EFile.PageLinks, "pagelinks.sql.gz" }, | |
{ EFile.PageArticles, "pages-articles.xml.bz2" }, | |
{ EFile.PageLoging, "pages-logging.xml.gz" }, | |
{ EFile.PageMetaCurrent, "pages-meta-current.xml.bz2" }, | |
{ EFile.PageMetaHistory, "pages-meta-history.xml.bz2" }, | |
{ EFile.ProtectedTitles, "protected_titles.sql.gz" }, | |
{ EFile.Redirect, "redirect.sql.gz" }, | |
{ EFile.Sha1Sums, "sha1sums.txt" }, | |
{ EFile.SiteStats, "site_stats.sql.gz" }, | |
{ EFile.SiteInfoNamespaces, "siteinfo-namespaces.json.gz" }, | |
{ EFile.Sites, "sites.sql.gz" }, | |
{ EFile.StubArticles, "stub-articles.xml.gz" }, | |
{ EFile.StubMetaCurrent, "stub-meta-current.xml.gz" }, | |
{ EFile.StubMetaHistory, "stub-meta-history.xml.gz" }, | |
{ EFile.TemplateLinks, "templatelinks.sql.gz" }, | |
{ EFile.UserFormerGroups, "user_former_groups.sql.gz" }, | |
{ EFile.UserGroups, "user_groups.sql.gz" }, | |
{ EFile.WbcEntityUsage, "wbc_entity_usage.sql.gz" } | |
}; | |
} | |
} | |
public static class RequestHandler | |
{ | |
public static string GetSiteMatrix() | |
{ | |
return Encoding.UTF8.GetString(Request.get(WikipediaConstants.SiteMatrixUrl)); | |
} | |
public static byte[] GetDump(string code, string version, EFile file) | |
{ | |
return Request.get(Constants.DumpFileUrlFormat.Format(code, version, Constants.Files[file])); | |
} | |
} | |
} |