Created
January 17, 2019 20:10
-
-
Save adjam/f1e54514075788fc079adb44e8beceb1 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"fmt" | |
"io" | |
"bytes" | |
"os" | |
"log" | |
"encoding/xml" | |
) | |
type ParserState struct { | |
File *os.File | |
LastSeenId string | |
In918 bool | |
InSubfieldA bool | |
LastRecordStart int64 | |
Error error | |
} | |
func(state *ParserState) LastRecord() (bool, string) { | |
currPos, _ := state.File.Seek(state.LastRecordStart, io.SeekStart) | |
buf := make([]byte, 20000) | |
readLen, err := state.File.Read(buf); //, state.LastRecordStart) | |
if err != nil { | |
state.Error = err | |
return false,"encountered non-recoverable error" | |
} | |
recordEnd := bytes.Index(buf, []byte("</record>")) | |
if recordEnd != -1 { | |
culled := string(buf[0:recordEnd+9]) | |
state.LastRecordStart = currPos + int64(readLen) | |
state.File.Seek(state.LastRecordStart, io.SeekStart) | |
return true, culled | |
} else { | |
return false, "<n/a>" | |
} | |
} | |
func dumpState(l *log.Logger, state ParserState) int { | |
l.Printf("Last seen catkey was %s\n", state.LastSeenId) | |
l.Printf("Last record element seen at %d\n", state.LastRecordStart) | |
available, contents := state.LastRecord() | |
if available { | |
l.Println("Record we were processing at the time") | |
l.Println("-----------") | |
l.Println(contents) | |
l.Println("-----------") | |
return 0 | |
} else { | |
l.Println("Unable to retrieve record where error was encountered") | |
return 1 | |
} | |
} | |
func check(e error, state ParserState) { | |
if e != nil { | |
fmt.Printf("Last seen catkey was %s\n", state.LastSeenId) | |
panic(e) | |
} | |
} | |
func main() { | |
l := log.New(os.Stderr, "", 0) | |
file, err:= os.Open(os.Args[1]) | |
state := ParserState{file,"<not started>", false, false, 0, nil} | |
check(err , state) | |
defer file.Close() | |
decoder := xml.NewDecoder(file) | |
for { | |
currentPos := decoder.InputOffset() | |
t, err := decoder.Token() | |
if t == io.EOF { | |
break | |
} | |
if err != nil { | |
exit_status := dumpState(l, state) | |
os.Exit(exit_status) | |
} | |
switch v := t.(type) { | |
case xml.StartElement: | |
if v.Name.Local == "record" { | |
state.LastRecordStart = currentPos | |
} | |
if v.Name.Local == "datafield" { | |
for _, attr:= range v.Attr { | |
if attr.Name.Local == "tag" && attr.Value == "918" { | |
state.In918 = true | |
} | |
} | |
} | |
if v.Name.Local == "subfield" && state.In918 { | |
for _, attr := range v.Attr { | |
if attr.Name.Local == "code" && attr.Value == "a" { | |
state.InSubfieldA = true | |
} | |
} | |
} | |
case xml.CharData: | |
if state.InSubfieldA { | |
state.LastSeenId = string(v) | |
} | |
case xml.EndElement: | |
if state.In918 { | |
state.In918 = false | |
state.InSubfieldA = false | |
} | |
if state.InSubfieldA { | |
state.InSubfieldA = false | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment