Skip to content

Instantly share code, notes, and snippets.

@butlermh
Created January 30, 2013 12:38
Show Gist options
  • Save butlermh/4673011 to your computer and use it in GitHub Desktop.
Save butlermh/4673011 to your computer and use it in GitHub Desktop.
Using Apache Commons IO to fix BOM problems when using the Rome RSS parser library
package rss;
import org.xml.sax.InputSource;
import java.io.*;
import java.net.*;
import com.sun.syndication.io.*;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.input.BOMInputStream;
import org.junit.Test;
public class RssEncodingTest {
String url = "http://www.moneydj.com/KMDJ/RssCenter.aspx?svc=NH&fno=1&arg=X0000000";
// This works because we use InputSource direct from the UrlConnection's InputStream
@Test
public void test01() throws MalformedURLException, IOException,
IllegalArgumentException, FeedException {
try (InputStream is = new URL(url).openConnection().getInputStream()) {
InputSource source = new InputSource(is);
System.out.println("description: "
+ new SyndFeedInput().build(source).getDescription());
}
}
// But a String input fails because the byte order mark problem
@Test
public void test02() throws MalformedURLException, IOException,
IllegalArgumentException, FeedException {
String html = IOUtils.toString(new URL(url).openConnection()
.getInputStream());
Reader reader = new StringReader(html);
System.out.println("description: "
+ new SyndFeedInput().build(reader).getDescription());
}
// We can use Apache Commons IO to fix the byte order mark
@Test
public void test03() throws MalformedURLException, IOException,
IllegalArgumentException, FeedException {
String html = IOUtils.toString(new URL(url).openConnection()
.getInputStream());
try (BOMInputStream bomIn = new BOMInputStream(
IOUtils.toInputStream(html))) {
String f = IOUtils.toString(bomIn);
Reader reader = new StringReader(f);
System.out.println("description: "
+ new SyndFeedInput().build(reader).getDescription());
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment