Created
March 16, 2015 17:43
-
-
Save glena/e622b30fc3af24ffb3ba to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package lib | |
import ( | |
"bytes" | |
"io" | |
"strings" | |
"errors" | |
"syscall" | |
"unicode/utf8" | |
) | |
type CharsetISO88591er struct { | |
r io.ByteReader | |
buf *bytes.Buffer | |
} | |
func NewCharsetISO88591(r io.Reader) *CharsetISO88591er { | |
buf := bytes.NewBuffer(make([]byte, 0, utf8.UTFMax)) | |
return &CharsetISO88591er{r.(io.ByteReader), buf} | |
} | |
func (cs *CharsetISO88591er) ReadByte() (b byte, err error) { | |
// http://unicode.org/Public/MAPPINGS/ISO8859/8859-1.TXT | |
// Date: 1999 July 27; Last modified: 27-Feb-2001 05:08 | |
if cs.buf.Len() <= 0 { | |
r, err := cs.r.ReadByte() | |
if err != nil { | |
return 0, err | |
} | |
if r < utf8.RuneSelf { | |
return r, nil | |
} | |
cs.buf.WriteRune(rune(r)) | |
} | |
return cs.buf.ReadByte() | |
} | |
func (cs *CharsetISO88591er) Read(p []byte) (int, error) { | |
// Use ReadByte method. | |
return 0, syscall.EINVAL | |
} | |
func isCharset(charset string, names []string) bool { | |
charset = strings.ToLower(charset) | |
for _, n := range names { | |
if charset == strings.ToLower(n) { | |
return true | |
} | |
} | |
return false | |
} | |
func IsCharsetISO88591(charset string) bool { | |
// http://www.iana.org/assignments/character-sets | |
// (last updated 2010-11-04) | |
names := []string{ | |
// Name | |
"ISO_8859-1:1987", | |
// Alias (preferred MIME name) | |
"ISO-8859-1", | |
// Aliases | |
"iso-ir-100", | |
"ISO_8859-1", | |
"latin1", | |
"l1", | |
"IBM819", | |
"CP819", | |
"csISOLatin1", | |
} | |
return isCharset(charset, names) | |
} | |
func IsCharsetUTF8(charset string) bool { | |
names := []string{ | |
"UTF-8", | |
// Default | |
"", | |
} | |
return isCharset(charset, names) | |
} | |
func CharsetReader(charset string, input io.Reader) (io.Reader, error) { | |
switch { | |
case IsCharsetUTF8(charset): | |
return input, nil | |
case IsCharsetISO88591(charset): | |
return NewCharsetISO88591(input), nil | |
} | |
return nil, errors.New("CharsetReader: unexpected charset: " + charset) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"fmt" | |
rss "github.com/jteeuwen/go-pkg-rss" | |
"os" | |
"./lib" | |
) | |
/* | |
http://www.infobae.com/rss | |
http://www.pagina12.com.ar/usuarios/rss.php | |
http://stackoverflow.com/questions/6002619/unmarshal-an-iso-8859-1-xml-input-in-go | |
*/ | |
var pendingItems chan *rss.Item | |
func main() { | |
fmt.Println("Start") | |
feeds := []string{ | |
//"http://cdn01.ib.infobae.com/adjuntos/162/rss/politica.xml", | |
"http://cdn01.ib.infobae.com/adjuntos/162/rss/Infobae.xml", | |
//"http://cdn01.ib.infobae.com/adjuntos/162/rss/economia.xml", | |
//"http://cdn01.ib.infobae.com/adjuntos/162/rss/sociedad.xml", | |
//"http://cdn01.ib.infobae.com/adjuntos/162/rss/finanzas.xml", | |
//"http://cdn01.ib.infobae.com/adjuntos/162/rss/policiales.xml", | |
"http://www.pagina12.com.ar/diario/rss/principal.xml", | |
//"http://www.pagina12.com.ar/diario/rss/ultimas_noticias.xml", | |
} | |
pendingItems = make(chan *rss.Item, 50) | |
PullFeeds(feeds) | |
for item := range pendingItems { | |
fmt.Println("\t",item.Title) | |
fmt.Println("\t","Links") | |
for j := range item.Links { | |
link := item.Links[j] | |
fmt.Println("\t\t", link.Href) | |
} | |
fmt.Println("\t","Categories") | |
for k := range item.Categories { | |
category := item.Categories[k] | |
fmt.Println("\t\t", category.Domain, category.Text) | |
} | |
} | |
} | |
func PullFeeds(feeds []string) { | |
feed := rss.New(5, true, chanHandler, itemHandler) | |
for f := range feeds { | |
uri := feeds[f] | |
go Pull(feed, uri) | |
} | |
} | |
func Pull(feed *rss.Feed, uri string) { | |
if err := feed.Fetch(uri, lib.CharsetReader); err != nil { | |
fmt.Fprintf(os.Stderr, "[e] %s: %s", uri, err) | |
fmt.Println(err) | |
return | |
} | |
} | |
func chanHandler(feed *rss.Feed, newchannels []*rss.Channel) { | |
fmt.Printf("%d new channel(s) in %s\n", len(newchannels), feed.Url) | |
} | |
func itemHandler(feed *rss.Feed, ch *rss.Channel, newitems []*rss.Item) { | |
fmt.Printf("%d new item(s) in %s\n", len(newitems), feed.Url) | |
for i := range newitems { | |
pendingItems <- newitems[i] | |
} | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment