Last active
July 16, 2018 08:34
-
-
Save Romern/58945b0b0f86b15859a6454a39100eb8 to your computer and use it in GitHub Desktop.
OkusonParser LAInf18 Needs https://jsoup.org/packages/jsoup-1.11.2.jar in the same path
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
javac -cp .:jsoup-1.11.2.jar OkusonParser.java |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
echo Matrnr: | |
read matrnr | |
echo Password: | |
read -s pass | |
pass=$(echo -e "import urllib.parse\nprint(urllib.parse.quote_plus('$pass'))" | python3) | |
for i in `seq 1 9`; do | |
wget "https://www2.math.rwth-aachen.de/LAInf18/QuerySheet?id=$matrnr&passwd=$pass&format=MathJax&resolution=Standard&sheet=$i" -O sheet$i.html | |
done | |
exit |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import org.jsoup.Jsoup; | |
import org.jsoup.nodes.Document; | |
import org.jsoup.nodes.Element; | |
import org.jsoup.select.Elements; | |
import java.io.File; | |
import java.nio.charset.StandardCharsets; | |
import java.io.IOException; | |
import java.nio.file.Files; | |
import java.util.*; | |
public class OkusonParser { | |
public static void main(String[] args) throws Exception { | |
if (args.length < 1) { | |
System.out.println("Usage: java OkusonParser [Sheet-Folder-1] ... [Sheet-Folder-n]\n\n" + | |
"In each sheet-folder have to be only files named sheetXX.html, and the same amount in each folder.\n" + | |
"Use downloadskript.sh to download them properly instead of through the browser.\n"); | |
return; | |
} | |
File init = new File(args[0]); | |
if (!init.isDirectory()) { | |
System.err.println(args[0]+" is not a directory!"); | |
return; | |
} | |
String[] initf = init.list(); | |
if (initf == null) { | |
System.err.println("Error reading "+args[0]+"."); | |
return; | |
} | |
// Parse the input folders to Strings: | |
int sheets = initf.length; | |
String input[][] = new String[args.length][sheets]; | |
File[][] files = new File[args.length][sheets]; | |
for (int i = 0; i< args.length; i++) { | |
File cur = new File(args[i]); | |
File[] cure = cur.listFiles(); | |
if (!cur.isDirectory()) { | |
System.err.println(args[i]+" is not a directory!"); | |
return; | |
} | |
if ( cure == null) { | |
System.err.println("Can't list files on folder "+args[i]+"!"); | |
return; | |
} | |
if (cure.length != sheets) { | |
System.err.println("Sheet folder \""+args[i]+"\" has a different amount of files than \""+args[0]+"\"!"); | |
return; | |
} | |
files[i] = cure; | |
Arrays.sort(files[i]); | |
} | |
for (int i = 0; i< args.length; i++) { | |
for(int j = 0; j< sheets; j++) { | |
input[i][j] = preProcessRules(readFile(files[i][j])); | |
} | |
} | |
// Parse the input files into Objects, with no duplicate questions | |
Blatt[] blaetter = new Blatt[sheets]; | |
for(int j = 0; j< sheets; j++) { | |
blaetter[j] = new Blatt(files[0][j].getName().replace("sheet","").replace(".html","")); | |
} | |
for (int i = 0; i< args.length; i++) { | |
for(int j = 0; j< sheets; j++) { | |
Document doc = Jsoup.parse(input[i][j]); | |
Elements elements = doc.body().select("*"); | |
String currentaufgabe = ""; | |
String currentquestion = ""; | |
boolean newquestion = true; | |
for (Element element : elements) { | |
switch(element.className()) { | |
case "hidden": | |
String curhead = element.text().replace("\\(","").replace("\\)",""); | |
if (!header.contains(curhead)) | |
header += "\n"+curhead+"\n"; | |
break; | |
case "number": | |
case "exnr": | |
currentaufgabe = element.text(); | |
if (!blaetter[j].aufgaben.containsKey(currentaufgabe)) { | |
blaetter[j].aufgaben.put(currentaufgabe,new Aufgabe()); | |
} | |
break; | |
case "intro": | |
case "extext": | |
if (blaetter[j].aufgaben.get(currentaufgabe).aufgabentext == null) { | |
blaetter[j].aufgaben.get(currentaufgabe).aufgabentext = element.text(); | |
} | |
break; | |
case "question": | |
if (!blaetter[j].aufgaben.get(currentaufgabe).subaufgaben.containsKey(element.text())) { | |
currentquestion = element.text(); | |
blaetter[j].aufgaben.get(currentaufgabe).subaufgaben.put(currentquestion,""); | |
newquestion = true; | |
} else { | |
newquestion = false; | |
} | |
break; | |
case "erg": | |
if (newquestion) { | |
blaetter[j].aufgaben.get(currentaufgabe).subaufgaben.put(currentquestion,element.text()); | |
} | |
break; | |
} | |
} | |
} | |
} | |
//Parse the objects into a latex file | |
StringBuilder out = new StringBuilder(); | |
out.append(header); | |
out.append(begin); | |
for (Blatt b : blaetter) { | |
out.append(String.format("\\section{Blatt %s}\n",b.blattnummer)); | |
for (String aufgabennummer : b.aufgaben.keySet()) { | |
out.append(String.format("\\subsection{Aufgabe %s}\n",aufgabennummer)); | |
out.append(b.aufgaben.get(aufgabennummer).aufgabentext); | |
out.append("\n\\begin{itemize}\n"); | |
int j = 0; | |
for (String sub : b.aufgaben.get(aufgabennummer).subaufgaben.keySet()) { | |
out.append("\n\\item "); | |
out.append(sub); | |
out.append("\n"); | |
if (j<b.aufgaben.get(aufgabennummer).subaufgaben.size()) | |
out.append(String.format("\\newline\\fbox{\\parbox{0.9\\textwidth}{Solution: %s}}",b.aufgaben.get(aufgabennummer).subaufgaben.get(sub))); | |
} | |
out.append("\n\\end{itemize}\n"); | |
} | |
} | |
out.append("\n\\end{document}"); | |
System.out.println(postProcessRules(out.toString())); | |
} | |
public static class Aufgabe { | |
String aufgabentext; | |
Map<String,String> subaufgaben; | |
public Aufgabe () { | |
subaufgaben = new TreeMap<>(); | |
} | |
} | |
public static class Blatt { | |
String blattnummer; | |
Map<String, Aufgabe> aufgaben; | |
public Blatt (String bn) { | |
blattnummer = bn; | |
aufgaben = new TreeMap<>(); | |
} | |
} | |
private static String readFile(File file) throws IOException { | |
byte[] encoded = Files.readAllBytes(file.toPath()); | |
return new String(encoded, StandardCharsets.UTF_8); | |
} | |
private static String postProcessRules(String prefinal) { | |
return prefinal.replace("verbatim}\\","verbatim}") | |
.replace("\\begin{itemize}\n\n\\end{itemize}",""); | |
} | |
private static String preProcessRules(String init) { | |
return init.replace("</p>\n","</p>\\\\\n") | |
.replace("\\\\%","\\\\") | |
.replace("<code>","\\verb|") | |
.replace("</code>","|") | |
.replace("{};","{ };") | |
.replace("{}","\\{\\}") | |
.replace("<ol>","\\begin{enumerate}[label=(\\alph*)]") | |
.replace("<ul>","\\begin{enumerate}[label=(\\roman*)]") | |
.replace("</ol>","\\end{enumerate}") | |
.replace("</ul>","\\end{enumerate}") | |
.replace("<dt>","\\textbf{") | |
.replace("</dt>","}") | |
.replace("<li>","\\item ") | |
.replace("</li>","") | |
.replace("<em>","\\textit{") | |
.replace("</em>","}") | |
.replace("\\newcommand{\\ExpandedCoefficientMatrix}[2]{<![CDATA[\\left( \\!\\!\\! \\begin{array}{c|c} {#1} & {#2} \\end{array} \\!\\!\\! \\right)]]>}","\\newcommand{\\ExpandedCoefficientMatrix}[2]{\\left( \\!\\!\\! \\begin{array}{c|c} {#1} & {#2} \\end{array} \\!\\!\\! \\right)]}") | |
.replace("<p>[","<p>$[$") | |
.replace("]</p>","$]$</p>") | |
.replaceAll("(?m)%.*$",""); | |
} | |
private static String header = "\\documentclass[12pt,a4paper]{article}\n" | |
+"\\usepackage[utf8]{inputenc}\n" | |
+"\\usepackage[german]{babel}\n" | |
+"\\usepackage{amsmath}\n" | |
+"\\usepackage{amsfonts}\n" | |
+"\\usepackage{amssymb}\n" | |
+"\\usepackage{amsthm}\n" | |
+"\\usepackage{mathtools}\n" | |
+"\\usepackage{enumitem}\n" | |
+"\\usepackage[top=1in, bottom=1.25in, left=1.25in, right=1.25in]{geometry}\n" | |
+"\\usepackage{hyperref}\n" | |
+"\\usepackage{tikz}\n" | |
+"\\usepackage{pb-diagram}\n" | |
+"\\usetikzlibrary{arrows.meta,positioning,calc}\n" | |
+"\\tikzset{graph node/.style={circle,fill=black,draw,minimum size=5pt,inner sep=0pt},node distance=1.5cm and 1.5cm,graph/.style={ }}\n" | |
+"\\newcommand{\\n}{~\\\\}\n"; | |
private static String begin = "\n\\title{TITEL}\n" | |
+"\\author{der boy der g, sick wie leukaemie}\n" | |
+"\\begin{document}\n" | |
+"\\maketitle\n" | |
+"\\setcounter{secnumdepth}{0}\n"; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
java -cp .:jsoup-1.11.2.jar OkusonParser $@ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment