Last active
August 29, 2024 18:09
-
-
Save pagetronic/a52fb5d70dcef6d431363b0f431de4e6 to your computer and use it in GitHub Desktop.
Commons wikimedia to Json
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package live.page.wiki; | |
import live.page.hubd.system.json.Json; | |
import java.util.ArrayList; | |
import java.util.Arrays; | |
import java.util.List; | |
public class WikiLocation { | |
public static List<Double> findLocation(Json data) { | |
for (String key : data.keySet()) { | |
if (key.matches("object location|object location dec|location|location dec|camera location|camera location dec")) { | |
List<Double> coordinates = convertCoordinates(data.getList(key)); | |
if (coordinates != null) { | |
return coordinates; | |
} | |
} | |
if (Json.class.isAssignableFrom(data.get(key).getClass())) { | |
List<Double> coordinates = findLocation(data.getJson(key)); | |
if (coordinates != null) { | |
return coordinates; | |
} | |
} | |
if (List.class.isAssignableFrom(data.get(key).getClass()) && | |
!data.getList(key).isEmpty()) { | |
for (Object item : data.getList(key)) { | |
if (item != null && Json.class.isAssignableFrom(item.getClass())) { | |
List<Double> coordinates = findLocation((Json) item); | |
if (coordinates != null) { | |
return coordinates; | |
} | |
} | |
} | |
} | |
} | |
return null; | |
} | |
private static List<Double> convertCoordinates(List<String> coordinates) { | |
if (coordinates == null) { | |
return null; | |
} | |
coordinates = new ArrayList<>(coordinates); | |
for (int key : new int[]{8, 2}) { | |
if (coordinates.size() > key) { | |
for (String start : new String[]{ | |
"source", "alt", "type", | |
"heading", "region", "zoom", "scale", | |
"...", "sl", "dim", "view"}) { | |
if (coordinates.get(key).trim().toLowerCase().startsWith(start) || coordinates.get(key).trim().isEmpty()) { | |
coordinates.remove(key); | |
break; | |
} | |
} | |
} | |
} | |
try { | |
if (coordinates.size() >= 8 && | |
(coordinates.get(3).equals("N") || coordinates.get(3).equals("S")) && | |
(coordinates.get(7).equals("E") || coordinates.get(7).equals("W")) | |
) { | |
return Arrays.asList( | |
convertCoordinates(Double.parseDouble(coordinates.get(0)), Double.parseDouble(coordinates.get(1)), Double.parseDouble(coordinates.get(2)), coordinates.get(3)), | |
convertCoordinates(Double.parseDouble(coordinates.get(4)), Double.parseDouble(coordinates.get(5)), Double.parseDouble(coordinates.get(6)), coordinates.get(7))); | |
} else { | |
return Arrays.asList(Double.parseDouble(coordinates.get(0)), Double.parseDouble(coordinates.get(1))); | |
} | |
} catch (Exception e) { | |
e.printStackTrace(); | |
} | |
return null; | |
} | |
private static double convertCoordinates(double degree, double minute, double second, String heading) { | |
double decimalDegrees = degree + (minute / 60.0) + (second / 3600.0); | |
if ("W".equals(heading) || "S".equals(heading)) { | |
decimalDegrees = -decimalDegrees; | |
} | |
return decimalDegrees; | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package live.page.wiki; | |
import info.bliki.wiki.filter.PlainTextConverter; | |
import info.bliki.wiki.model.WikiModel; | |
import live.page.hubd.system.json.Json; | |
import live.page.hubd.system.utils.Fx; | |
import org.apache.commons.text.StringEscapeUtils; | |
import java.io.IOException; | |
import java.net.URLEncoder; | |
import java.nio.charset.StandardCharsets; | |
import java.util.List; | |
import java.util.Map; | |
import java.util.regex.Matcher; | |
import java.util.regex.Pattern; | |
public class WikiParser extends WikiModel { | |
final Json data = new Json(); | |
public WikiParser() { | |
super("", ""); | |
} | |
public static Json getInfos(String title, String text) { | |
WikiParser wikiModel = new WikiParser(); | |
try { | |
wikiModel.render(new PlainTextConverter(), text, new StringBuilder(), true, true); | |
} catch (IOException e) { | |
Fx.log("\n#{" + url(title) + "}"); | |
} | |
Json data = new Json(); | |
if (wikiModel.data.containsKey("information")) { | |
Json information = wikiModel.data.getJson("information"); | |
Json description = new Json(); | |
if (information.containsKey("description") && String.class.isAssignableFrom(information.get("description").getClass())) { | |
description.put("int", information.getString("description")); | |
} else { | |
description = information.getJson("description"); | |
if (description != null && description.containsKey("langswitch")) { | |
description = description.getJson("langswitch"); | |
} | |
} | |
data.put("description", description); | |
if (description == null || description.isEmpty()) { | |
Fx.log("\nD{" + url(title) + "}"); | |
data.put("description", StringEscapeUtils.unescapeHtml4(title).split("\\.")[0]); | |
} | |
if (information.containsKey("author")) { | |
data.put("author", information.get("author")); | |
} | |
} | |
List<Double> coordinates = WikiLocation.findLocation(wikiModel.data); | |
if (coordinates == null) { | |
Fx.log("\nL{" + url(title) + "}"); | |
} | |
data.put("coordinates", coordinates); | |
data.put("data", wikiModel.data); | |
return data; | |
} | |
private static String url(String title) { | |
return " https://commons.wikimedia.org/wiki/" + URLEncoder.encode(StringEscapeUtils.unescapeHtml4(title), StandardCharsets.UTF_8).replace("+", "%20") + " "; | |
} | |
@Override | |
public void substituteTemplateCall(String templateName, Map<String, String> parameterMap, Appendable writer) throws IOException { | |
writer.append("@@Template@").append(templateName.toLowerCase().trim()).append("@"); | |
Json params = new Json(); | |
for (String key : parameterMap.keySet()) { | |
if (key.toLowerCase().trim().equals("prec") || key.toLowerCase().trim().equals("wikidata")) { | |
continue; | |
} | |
WikiParser model = new WikiParser(); | |
StringBuilder builder = new StringBuilder(); | |
model.render(new PlainTextConverter(), parameterMap.get(key), builder, true, false); | |
String str = builder.toString().replace("[\r\n ]+", " ").replaceAll(" +", " ").trim(); | |
writer.append(str); | |
Matcher match = Pattern.compile("@@Template@([^@]+)@", Pattern.MULTILINE).matcher(builder.toString()); | |
Json done = new Json(); | |
while (match.find()) { | |
if (data.containsKey(match.group(1).toLowerCase().trim())) { | |
done.put(match.group(1).toLowerCase().trim(), data.get(match.group(1).toLowerCase().trim())); | |
data.remove(match.group(1).toLowerCase().trim()); | |
} | |
} | |
if (!done.isEmpty()) { | |
params.put(key.toLowerCase().trim(), done); | |
} else if (!str.isEmpty()) { | |
params.put(key.toLowerCase().trim(), str); | |
} | |
} | |
if (params.size() == 1 && params.containsKey("1")) { | |
data.put(templateName.toLowerCase().trim(), params.get("1")); | |
} else if (params.keySet().stream().allMatch(name -> name.matches("[0-9]+"))) { | |
data.put(templateName.toLowerCase().trim(), params.values().stream().toList()); | |
} else { | |
data.put(templateName.toLowerCase().trim(), params); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment