Skip to content

Instantly share code, notes, and snippets.

@osa1
Created July 13, 2013 12:36
Show Gist options
  • Save osa1/5990612 to your computer and use it in GitHub Desktop.
Save osa1/5990612 to your computer and use it in GitHub Desktop.
package net.osa1.WikiaLyricsParser;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URL;
import java.util.List;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.select.Elements;
public class LyricsParser {
public static String parseLyrics(String html) {
Document doc = Jsoup.parse(html);
Elements lyricboxes = doc.getElementsByClass("lyricbox");
assert (lyricboxes.size() == 1);
Element lyricbox = lyricboxes.get(0);
List<Node> childNodes = lyricbox.childNodes();
StringBuilder builder = new StringBuilder();
for (int i = 1; i < childNodes.size() - 1; i++) {
Node n = childNodes.get(i);
if (n.nodeName().equals("#text")) {
builder.append(n.toString());
builder.append('\n');
}
}
return builder.toString();
}
public static void main(String[] args) throws IOException {
URL url = new URL("http://lyrics.wikia.com/Camel:Rajaz");
BufferedReader in = new BufferedReader(new InputStreamReader(url.openStream()));
StringBuilder builder = new StringBuilder();
String inputLine;
while ((inputLine = in.readLine()) != null)
builder.append(inputLine);
in.close();
System.out.println(parseLyrics(builder.toString()));
System.out.println("end-of-program");
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment