Created
September 4, 2012 21:13
-
-
Save quii/3626583 to your computer and use it in GitHub Desktop.
Filtering an xml file for certain elements and then removing unwanted nodes
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import scala.xml._ | |
import scala.xml.transform._ | |
object Main extends App { | |
import Stuff._ | |
val sourceXml = XML.loadFile("metadata.switchaai.xml") | |
val allEntities = sourceXml \\ "EntityDescriptor" | |
val wantedEntities = allEntities filter entitiesWeWant | |
val strippedEntities = wantedEntities flatMap removeExtensions | |
strippedEntities foreach printEntityToFile | |
} | |
object Stuff{ | |
private val wantedEntityNames = Seq("https://aai.unifr.ch/idp/shibboleth", "https://aai.unil.ch/idp/shibboleth") | |
def entitiesWeWant(x: Node) = wantedEntityNames.contains((x \ "@entityID").text) | |
def removeExtensions(x: Node) = new RuleTransformer(removeIt).transform(x) | |
private def filenameFromEntity(x: Node) = { | |
val entityName = (x \ "@entityID").text | |
entityName.replace("https://", "").replace("/idp/shibboleth", "") + ".xml" | |
} | |
def printEntityToFile(x: Node) = { | |
printToFile(new java.io.File(filenameFromEntity(x)))(p => { | |
p.println("""<?xml version="1.0" encoding="UTF-8"?>""" + x) | |
}) | |
} | |
private def printToFile(f: java.io.File)(op: java.io.PrintWriter => Unit) { | |
val p = new java.io.PrintWriter(f) | |
try { op(p) } finally { p.close() } | |
} | |
private val removeIt = new RewriteRule { | |
override def transform(n: Node): NodeSeq = n match { | |
case e: Elem if e.label == "Extensions" => NodeSeq.Empty | |
case n => n | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Source XML file: http://metadata.aai.switch.ch/metadata.switchaai.xml
The aim of this was to only retrieve elements within the XML with a certain attribute value. Within that we need to remove a particular node.