Skip to content

Instantly share code, notes, and snippets.

@tunjid
Created February 1, 2016 01:01
Show Gist options
  • Save tunjid/80d54386a587cd1176ce to your computer and use it in GitHub Desktop.
Save tunjid/80d54386a587cd1176ce to your computer and use it in GitHub Desktop.
Embedding Github Gist using TextAngular
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import java.io.File;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Locale;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.Result;
import javax.xml.transform.Source;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathFactory;
public class BlogXML {
static DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
static DocumentBuilder documentBuilder;
static XPathFactory xPathFactory;
public static void main(String[] args) {
try {
documentBuilder = factory.newDocumentBuilder();
xPathFactory = XPathFactory.newInstance();
File file = new File("/Users/Shemanigans/Desktop/blog.xml");
Document baseDocument = documentBuilder.parse(file);
removeWordPressNodes(baseDocument);
Document blogDocument = createBlogDocument(baseDocument);
cleanBlogDocument(blogDocument);
saveBlogDocument(blogDocument);
}
catch (Exception e) {
e.printStackTrace();
}
}
static void removeWordPressNodes(Document baseDocument) {
try {
XPath wordPressFilterXpath = xPathFactory.newXPath();
XPathExpression wordPressExpression = wordPressFilterXpath.compile("//*[starts-with(name(), 'wp')]");
NodeList nodes = (NodeList) wordPressExpression.evaluate(baseDocument, XPathConstants.NODESET);
for (int i = 0; i < nodes.getLength(); i++) {
Node node = nodes.item(i);
node.getParentNode().removeChild(node);
}
}
catch (Exception e) {
e.printStackTrace();
}
}
static Document createBlogDocument(Document baseDocument) {
Document blogDocument = documentBuilder.newDocument();
NodeList allPosts = baseDocument.getElementsByTagName("item");
Element blogPosts = blogDocument.createElement("blogPosts");
blogPosts.setAttribute("xmlns:excerpt", "excerpt");
blogPosts.setAttribute("xmlns:content", "http://example/namespace");
blogPosts.setAttribute("xmlns:dc", "dc");
blogPosts.setAttribute("xmlns:wp", "http://example/namespace");
blogDocument.appendChild(blogPosts);
for (int i = 0; i < allPosts.getLength(); i++) {
Node node = allPosts.item(i);
Node copyNode = blogDocument.importNode(node, true);
copyNode.getNodeName();
blogPosts.appendChild(copyNode);
}
return blogDocument;
}
static void cleanBlogDocument(Document blogDocument) {
try {
Element blogPosts = (Element) blogDocument.getElementsByTagName("blogPosts").item(0);
// To encode the body
XPath jsonEncoderXpath = xPathFactory.newXPath();
XPathExpression encoderExpression = jsonEncoderXpath.compile("//*[starts-with(name(), 'content')]");
NodeList unencodedNodes = (NodeList) encoderExpression.evaluate(blogPosts, XPathConstants.NODESET);
for (int i = 0; i < unencodedNodes.getLength(); i++) {
Node nodeToEncode = unencodedNodes.item(i);
encodeNode(nodeToEncode);
Element parentElement = (Element) nodeToEncode.getParentNode();
Node dateNode = parentElement.getElementsByTagName("pubDate").item(0);
formatDateNode(dateNode);
removeUnneccessaryNodes(parentElement);
separatetagsAndCategories(parentElement);
}
}
catch (Exception e) {
e.printStackTrace();
}
}
static void encodeNode(Node nodeToEncode) {
Gson gson = new GsonBuilder().create();
String unencodedString = nodeToEncode.getTextContent();
String encodedString = gson.toJson(unencodedString);
String removedQuotes = encodedString.length() > 2
? encodedString.substring(1, (encodedString.length() - 1))
: encodedString;
nodeToEncode.setTextContent(removedQuotes);
Document blogDocument = nodeToEncode.getOwnerDocument();
blogDocument.renameNode(nodeToEncode, null, "body");
}
static void formatDateNode(Node dateNode) {
try {
String pubDate = dateNode.getTextContent();
DateFormat originalFormat = new SimpleDateFormat("EEE, d MMM yyyy HH:mm:ss Z", Locale.ENGLISH);
DateFormat newFormat = new SimpleDateFormat("MM-dd-yyyy hh:mm:ss");
Date date = originalFormat.parse(pubDate);
dateNode.setTextContent(newFormat.format(date));
Document blogDocument = dateNode.getOwnerDocument();
blogDocument.renameNode(dateNode, null, "stringDate");
}
catch (Exception e) {
e.printStackTrace();
}
}
static void separatetagsAndCategories(Element parentElement) {
Document blogDocument = parentElement.getOwnerDocument();
NodeList nodeList = parentElement.getElementsByTagName("category");
for (int j = 0; j < nodeList.getLength(); j++) {
Element tagNode = (Element) nodeList.item(j);
boolean safe = tagNode.hasAttribute("domain");
if (safe) {
boolean isTag = tagNode.getAttribute("domain").equals("post_tag");
if (isTag) {
blogDocument.renameNode(tagNode, null, "tags");
}
tagNode.removeAttribute("domain");
tagNode.removeAttribute("nicename");
}
}
}
static void removeUnneccessaryNodes(Element parentElement) {
Node guidNode = parentElement.getElementsByTagName("guid").item(0);
Node linkNode = parentElement.getElementsByTagName("link").item(0);
parentElement.removeChild(guidNode);
parentElement.removeChild(linkNode);
}
static void saveBlogDocument(Document blogDocument) {
try {
Transformer transformer = TransformerFactory.newInstance().newTransformer();
Result output = new StreamResult(new File("/Users/Shemanigans/Desktop/edited.xml"));
Source input = new DOMSource(blogDocument);
transformer.transform(input, output);
}
catch (Exception e) {
e.printStackTrace();
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment