Skip to content

Instantly share code, notes, and snippets.

@fiskurgit
Created February 22, 2014 08:49
Show Gist options
  • Save fiskurgit/9150628 to your computer and use it in GitHub Desktop.
Save fiskurgit/9150628 to your computer and use it in GitHub Desktop.
Parse hippocamp.net catalogue from html using JSoup
package hctojson;
import java.io.IOException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class hctojson {
public static void main(String[] args) throws IOException {
Document doc = Jsoup.connect("http://hippocamp.net/index.php").get();
Elements releases = doc.getElementsByClass("box");
l("Number of releases: " + releases.size());
for(Element release : releases){
//Get the catalogue number:
String catNum = release.getElementsByClass("cellheadlabel").first().text();
//Get the release title and artist name:
String titleHTML = release.getElementsByTag("h4").first().html();
String title = titleHTML.substring(0, titleHTML.indexOf("<br />"));
String artist = titleHTML.substring(titleHTML.indexOf("<br />") + 6, titleHTML.length());
l("cat: " + catNum + " title: " + title + " artist: " + artist);
//Get the tracks:
Elements tracks = release.getElementsByTag("li");
for(Element track : tracks){
String trackTitle = track.text();
String trackUrl = track.getElementsByTag("a").first().attr("href").toString();
l("track: " + trackTitle + " url: " + trackUrl);
}
}
}
private static void l(String message){
System.out.println(message);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment