Created
January 30, 2013 12:38
-
-
Save butlermh/4673011 to your computer and use it in GitHub Desktop.
Using Apache Commons IO to fix BOM problems when using the Rome RSS parser library
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package rss; | |
import org.xml.sax.InputSource; | |
import java.io.*; | |
import java.net.*; | |
import com.sun.syndication.io.*; | |
import org.apache.commons.io.IOUtils; | |
import org.apache.commons.io.input.BOMInputStream; | |
import org.junit.Test; | |
public class RssEncodingTest { | |
String url = "http://www.moneydj.com/KMDJ/RssCenter.aspx?svc=NH&fno=1&arg=X0000000"; | |
// This works because we use InputSource direct from the UrlConnection's InputStream | |
@Test | |
public void test01() throws MalformedURLException, IOException, | |
IllegalArgumentException, FeedException { | |
try (InputStream is = new URL(url).openConnection().getInputStream()) { | |
InputSource source = new InputSource(is); | |
System.out.println("description: " | |
+ new SyndFeedInput().build(source).getDescription()); | |
} | |
} | |
// But a String input fails because the byte order mark problem | |
@Test | |
public void test02() throws MalformedURLException, IOException, | |
IllegalArgumentException, FeedException { | |
String html = IOUtils.toString(new URL(url).openConnection() | |
.getInputStream()); | |
Reader reader = new StringReader(html); | |
System.out.println("description: " | |
+ new SyndFeedInput().build(reader).getDescription()); | |
} | |
// We can use Apache Commons IO to fix the byte order mark | |
@Test | |
public void test03() throws MalformedURLException, IOException, | |
IllegalArgumentException, FeedException { | |
String html = IOUtils.toString(new URL(url).openConnection() | |
.getInputStream()); | |
try (BOMInputStream bomIn = new BOMInputStream( | |
IOUtils.toInputStream(html))) { | |
String f = IOUtils.toString(bomIn); | |
Reader reader = new StringReader(f); | |
System.out.println("description: " | |
+ new SyndFeedInput().build(reader).getDescription()); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment