Created
February 4, 2013 20:12
-
-
Save Eugeny/4709323 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package by.fksis.schedule.parser; | |
import fksis.domain.entities.ScheduledClass; | |
import org.htmlparser.Node; | |
import org.htmlparser.Parser; | |
import org.htmlparser.filters.CssSelectorNodeFilter; | |
import org.htmlparser.http.ConnectionManager; | |
import org.htmlparser.util.NodeList; | |
import java.io.InputStream; | |
import java.util.ArrayList; | |
import java.util.List; | |
import java.util.Properties; | |
public class ScheduleParser { | |
private final static String[] TIMESLOTS = new String[] { "8:00-9:35", | |
"9:45-11:20", "11:40-13:15", "13:25-15:00", "15:20-16:55", | |
"17:05-18:40", "18:45-20:20", "20:25-22:00" }; | |
private final static String[] TIMESLOTS2 = new String[] { "", "", | |
"11:40-15:00", "", "", "", "" }; // Extra non-uniform timeslots | |
private List<ScheduledClass> scheduledClasses = new ArrayList<ScheduledClass>(); | |
public static ScheduleParser parse(String group) { | |
ScheduleParser parser = new ScheduleParser(); | |
try { | |
InputStream inputStream = parser.getClass().getClassLoader() | |
.getResourceAsStream("parser.properties"); | |
Properties properties = new Properties(); | |
properties.load(inputStream); | |
ConnectionManager c = new ConnectionManager(); | |
c.setProxyHost(properties.getProperty("proxyHost")); | |
c.setProxyPort(Integer.parseInt(properties.getProperty("proxyPort"))); | |
Parser htmlParser = new Parser(c.openConnection(properties | |
.getProperty("baseUri") + group)); | |
NodeList nodes = htmlParser | |
.extractAllNodesThatMatch(new CssSelectorNodeFilter("td")); | |
Node[] nodeArray = nodes.toNodeArray(); | |
for (int day = 0; day < nodeArray.length / 8; day++) { | |
Node[] weekNodes = nodeArray[day * 8 + 1].getChildren() | |
.toNodeArray(); | |
Node[] timeNodes = nodeArray[day * 8 + 2].getChildren() | |
.toNodeArray(); | |
Node[] subgrNodes = nodeArray[day * 8 + 3].getChildren() | |
.toNodeArray(); | |
Node[] classNodes = nodeArray[day * 8 + 4].getChildren() | |
.toNodeArray(); | |
Node[] typeNodes = nodeArray[day * 8 + 5].getChildren() | |
.toNodeArray(); | |
Node[] roomNodes = nodeArray[day * 8 + 6].getChildren() | |
.toNodeArray(); | |
Node[] teacherNodes = nodeArray[day * 8 + 7].getChildren() | |
.toNodeArray(); | |
for (int idx = 0; idx < classNodes.length; idx++) { | |
ScheduledClass cls = new ScheduledClass(); | |
cls.setGroup(group); | |
cls.setDay(day); | |
cls.setName(extractText(classNodes[idx])); | |
cls.setTeacher(extractText(teacherNodes[idx])); | |
cls.setType(extractText(typeNodes[idx])); | |
cls.setRoom(extractText(roomNodes[idx])); | |
cls.setWeeks(parseMask(extractText(weekNodes[idx]), | |
properties.getProperty("defaultWeeks"))); | |
cls.setSubgroups(parseMask(extractText(subgrNodes[idx]), | |
properties.getProperty("defaultSubgroups"))); | |
for (int ts = 0; ts < TIMESLOTS.length; ts++) | |
if (TIMESLOTS[ts].equals(extractText(timeNodes[idx]))) | |
cls.setTimeSlot(ts); | |
for (int ts = 0; ts < TIMESLOTS2.length; ts++) | |
if (TIMESLOTS2[ts].equals(extractText(timeNodes[idx]))) | |
cls.setTimeSlot(ts); | |
parser.scheduledClasses.add(cls); | |
} | |
} | |
} catch (Exception e) { | |
System.out.println("EXCEPTION IN PARSER"); | |
System.out.println(e.getMessage()); | |
e.printStackTrace(System.out); | |
return null; | |
} | |
return parser; | |
} | |
public ScheduledClass[] getScheduledClasses() { | |
return (ScheduledClass[]) scheduledClasses | |
.toArray(new ScheduledClass[] {}); | |
} | |
private static String extractText(Node n) { | |
if (n.getChildren().size() >= 2) { | |
return n.getFirstChild().getText(); | |
} | |
return null; | |
} | |
private static String parseMask(String mask, String def) { | |
String res = ""; | |
if (mask == null) | |
return def; | |
for (String elem : mask.split(",")) | |
res += Integer.parseInt(elem); | |
return (res.length() == 0) ? def : res; | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
baseUri = http://www.bsuir.by/psched/schedulegroup?group= | |
defaultSubgroups = 12 | |
defaultWeeks = 1234 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment