Skip to content

Instantly share code, notes, and snippets.

@Eugeny
Created February 4, 2013 20:12
Show Gist options
  • Save Eugeny/4709323 to your computer and use it in GitHub Desktop.
Save Eugeny/4709323 to your computer and use it in GitHub Desktop.
package by.fksis.schedule.parser;
import fksis.domain.entities.ScheduledClass;
import org.htmlparser.Node;
import org.htmlparser.Parser;
import org.htmlparser.filters.CssSelectorNodeFilter;
import org.htmlparser.http.ConnectionManager;
import org.htmlparser.util.NodeList;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
public class ScheduleParser {
private final static String[] TIMESLOTS = new String[] { "8:00-9:35",
"9:45-11:20", "11:40-13:15", "13:25-15:00", "15:20-16:55",
"17:05-18:40", "18:45-20:20", "20:25-22:00" };
private final static String[] TIMESLOTS2 = new String[] { "", "",
"11:40-15:00", "", "", "", "" }; // Extra non-uniform timeslots
private List<ScheduledClass> scheduledClasses = new ArrayList<ScheduledClass>();
public static ScheduleParser parse(String group) {
ScheduleParser parser = new ScheduleParser();
try {
InputStream inputStream = parser.getClass().getClassLoader()
.getResourceAsStream("parser.properties");
Properties properties = new Properties();
properties.load(inputStream);
ConnectionManager c = new ConnectionManager();
c.setProxyHost(properties.getProperty("proxyHost"));
c.setProxyPort(Integer.parseInt(properties.getProperty("proxyPort")));
Parser htmlParser = new Parser(c.openConnection(properties
.getProperty("baseUri") + group));
NodeList nodes = htmlParser
.extractAllNodesThatMatch(new CssSelectorNodeFilter("td"));
Node[] nodeArray = nodes.toNodeArray();
for (int day = 0; day < nodeArray.length / 8; day++) {
Node[] weekNodes = nodeArray[day * 8 + 1].getChildren()
.toNodeArray();
Node[] timeNodes = nodeArray[day * 8 + 2].getChildren()
.toNodeArray();
Node[] subgrNodes = nodeArray[day * 8 + 3].getChildren()
.toNodeArray();
Node[] classNodes = nodeArray[day * 8 + 4].getChildren()
.toNodeArray();
Node[] typeNodes = nodeArray[day * 8 + 5].getChildren()
.toNodeArray();
Node[] roomNodes = nodeArray[day * 8 + 6].getChildren()
.toNodeArray();
Node[] teacherNodes = nodeArray[day * 8 + 7].getChildren()
.toNodeArray();
for (int idx = 0; idx < classNodes.length; idx++) {
ScheduledClass cls = new ScheduledClass();
cls.setGroup(group);
cls.setDay(day);
cls.setName(extractText(classNodes[idx]));
cls.setTeacher(extractText(teacherNodes[idx]));
cls.setType(extractText(typeNodes[idx]));
cls.setRoom(extractText(roomNodes[idx]));
cls.setWeeks(parseMask(extractText(weekNodes[idx]),
properties.getProperty("defaultWeeks")));
cls.setSubgroups(parseMask(extractText(subgrNodes[idx]),
properties.getProperty("defaultSubgroups")));
for (int ts = 0; ts < TIMESLOTS.length; ts++)
if (TIMESLOTS[ts].equals(extractText(timeNodes[idx])))
cls.setTimeSlot(ts);
for (int ts = 0; ts < TIMESLOTS2.length; ts++)
if (TIMESLOTS2[ts].equals(extractText(timeNodes[idx])))
cls.setTimeSlot(ts);
parser.scheduledClasses.add(cls);
}
}
} catch (Exception e) {
System.out.println("EXCEPTION IN PARSER");
System.out.println(e.getMessage());
e.printStackTrace(System.out);
return null;
}
return parser;
}
public ScheduledClass[] getScheduledClasses() {
return (ScheduledClass[]) scheduledClasses
.toArray(new ScheduledClass[] {});
}
private static String extractText(Node n) {
if (n.getChildren().size() >= 2) {
return n.getFirstChild().getText();
}
return null;
}
private static String parseMask(String mask, String def) {
String res = "";
if (mask == null)
return def;
for (String elem : mask.split(","))
res += Integer.parseInt(elem);
return (res.length() == 0) ? def : res;
}
}
baseUri = http://www.bsuir.by/psched/schedulegroup?group=
defaultSubgroups = 12
defaultWeeks = 1234
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment