Created
February 1, 2016 01:01
-
-
Save tunjid/80d54386a587cd1176ce to your computer and use it in GitHub Desktop.
Embedding Github Gist using TextAngular
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import com.google.gson.Gson; | |
import com.google.gson.GsonBuilder; | |
import org.w3c.dom.Document; | |
import org.w3c.dom.Element; | |
import org.w3c.dom.Node; | |
import org.w3c.dom.NodeList; | |
import java.io.File; | |
import java.text.DateFormat; | |
import java.text.SimpleDateFormat; | |
import java.util.Date; | |
import java.util.Locale; | |
import javax.xml.parsers.DocumentBuilder; | |
import javax.xml.parsers.DocumentBuilderFactory; | |
import javax.xml.transform.Result; | |
import javax.xml.transform.Source; | |
import javax.xml.transform.Transformer; | |
import javax.xml.transform.TransformerFactory; | |
import javax.xml.transform.dom.DOMSource; | |
import javax.xml.transform.stream.StreamResult; | |
import javax.xml.xpath.XPath; | |
import javax.xml.xpath.XPathConstants; | |
import javax.xml.xpath.XPathExpression; | |
import javax.xml.xpath.XPathFactory; | |
public class BlogXML { | |
static DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); | |
static DocumentBuilder documentBuilder; | |
static XPathFactory xPathFactory; | |
public static void main(String[] args) { | |
try { | |
documentBuilder = factory.newDocumentBuilder(); | |
xPathFactory = XPathFactory.newInstance(); | |
File file = new File("/Users/Shemanigans/Desktop/blog.xml"); | |
Document baseDocument = documentBuilder.parse(file); | |
removeWordPressNodes(baseDocument); | |
Document blogDocument = createBlogDocument(baseDocument); | |
cleanBlogDocument(blogDocument); | |
saveBlogDocument(blogDocument); | |
} | |
catch (Exception e) { | |
e.printStackTrace(); | |
} | |
} | |
static void removeWordPressNodes(Document baseDocument) { | |
try { | |
XPath wordPressFilterXpath = xPathFactory.newXPath(); | |
XPathExpression wordPressExpression = wordPressFilterXpath.compile("//*[starts-with(name(), 'wp')]"); | |
NodeList nodes = (NodeList) wordPressExpression.evaluate(baseDocument, XPathConstants.NODESET); | |
for (int i = 0; i < nodes.getLength(); i++) { | |
Node node = nodes.item(i); | |
node.getParentNode().removeChild(node); | |
} | |
} | |
catch (Exception e) { | |
e.printStackTrace(); | |
} | |
} | |
static Document createBlogDocument(Document baseDocument) { | |
Document blogDocument = documentBuilder.newDocument(); | |
NodeList allPosts = baseDocument.getElementsByTagName("item"); | |
Element blogPosts = blogDocument.createElement("blogPosts"); | |
blogPosts.setAttribute("xmlns:excerpt", "excerpt"); | |
blogPosts.setAttribute("xmlns:content", "http://example/namespace"); | |
blogPosts.setAttribute("xmlns:dc", "dc"); | |
blogPosts.setAttribute("xmlns:wp", "http://example/namespace"); | |
blogDocument.appendChild(blogPosts); | |
for (int i = 0; i < allPosts.getLength(); i++) { | |
Node node = allPosts.item(i); | |
Node copyNode = blogDocument.importNode(node, true); | |
copyNode.getNodeName(); | |
blogPosts.appendChild(copyNode); | |
} | |
return blogDocument; | |
} | |
static void cleanBlogDocument(Document blogDocument) { | |
try { | |
Element blogPosts = (Element) blogDocument.getElementsByTagName("blogPosts").item(0); | |
// To encode the body | |
XPath jsonEncoderXpath = xPathFactory.newXPath(); | |
XPathExpression encoderExpression = jsonEncoderXpath.compile("//*[starts-with(name(), 'content')]"); | |
NodeList unencodedNodes = (NodeList) encoderExpression.evaluate(blogPosts, XPathConstants.NODESET); | |
for (int i = 0; i < unencodedNodes.getLength(); i++) { | |
Node nodeToEncode = unencodedNodes.item(i); | |
encodeNode(nodeToEncode); | |
Element parentElement = (Element) nodeToEncode.getParentNode(); | |
Node dateNode = parentElement.getElementsByTagName("pubDate").item(0); | |
formatDateNode(dateNode); | |
removeUnneccessaryNodes(parentElement); | |
separatetagsAndCategories(parentElement); | |
} | |
} | |
catch (Exception e) { | |
e.printStackTrace(); | |
} | |
} | |
static void encodeNode(Node nodeToEncode) { | |
Gson gson = new GsonBuilder().create(); | |
String unencodedString = nodeToEncode.getTextContent(); | |
String encodedString = gson.toJson(unencodedString); | |
String removedQuotes = encodedString.length() > 2 | |
? encodedString.substring(1, (encodedString.length() - 1)) | |
: encodedString; | |
nodeToEncode.setTextContent(removedQuotes); | |
Document blogDocument = nodeToEncode.getOwnerDocument(); | |
blogDocument.renameNode(nodeToEncode, null, "body"); | |
} | |
static void formatDateNode(Node dateNode) { | |
try { | |
String pubDate = dateNode.getTextContent(); | |
DateFormat originalFormat = new SimpleDateFormat("EEE, d MMM yyyy HH:mm:ss Z", Locale.ENGLISH); | |
DateFormat newFormat = new SimpleDateFormat("MM-dd-yyyy hh:mm:ss"); | |
Date date = originalFormat.parse(pubDate); | |
dateNode.setTextContent(newFormat.format(date)); | |
Document blogDocument = dateNode.getOwnerDocument(); | |
blogDocument.renameNode(dateNode, null, "stringDate"); | |
} | |
catch (Exception e) { | |
e.printStackTrace(); | |
} | |
} | |
static void separatetagsAndCategories(Element parentElement) { | |
Document blogDocument = parentElement.getOwnerDocument(); | |
NodeList nodeList = parentElement.getElementsByTagName("category"); | |
for (int j = 0; j < nodeList.getLength(); j++) { | |
Element tagNode = (Element) nodeList.item(j); | |
boolean safe = tagNode.hasAttribute("domain"); | |
if (safe) { | |
boolean isTag = tagNode.getAttribute("domain").equals("post_tag"); | |
if (isTag) { | |
blogDocument.renameNode(tagNode, null, "tags"); | |
} | |
tagNode.removeAttribute("domain"); | |
tagNode.removeAttribute("nicename"); | |
} | |
} | |
} | |
static void removeUnneccessaryNodes(Element parentElement) { | |
Node guidNode = parentElement.getElementsByTagName("guid").item(0); | |
Node linkNode = parentElement.getElementsByTagName("link").item(0); | |
parentElement.removeChild(guidNode); | |
parentElement.removeChild(linkNode); | |
} | |
static void saveBlogDocument(Document blogDocument) { | |
try { | |
Transformer transformer = TransformerFactory.newInstance().newTransformer(); | |
Result output = new StreamResult(new File("/Users/Shemanigans/Desktop/edited.xml")); | |
Source input = new DOMSource(blogDocument); | |
transformer.transform(input, output); | |
} | |
catch (Exception e) { | |
e.printStackTrace(); | |
} | |
} | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment