Last active
December 19, 2015 07:49
-
-
Save jaffes2/5921095 to your computer and use it in GitHub Desktop.
Converts old HTML Files into ones with new formatting based on a template file by selecting the elements that are different in each file (title and links) and placing them into the tag placeholders hard-coded in the template file.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* Sarabeth Jaffe | |
* Converts old HTML Files into ones with new formatting based on a template file by selecting the | |
* elements that are different in each file (title and links) and places them into the | |
* tag placeholders hard-coded in the template file. | |
* | |
* References: Stack Overflow | |
*/ | |
import org.jsoup.Jsoup; | |
import org.jsoup.nodes.Element; | |
import org.jsoup.nodes.Document; | |
import org.jsoup.select.Elements; | |
import java.io.*; | |
import org.apache.commons.io.FileUtils; | |
public class HTMLFormatConverter { | |
public static void main(String[] args) { | |
Document doc; | |
try { | |
doc = Jsoup.connect("http://www.google.com").get(); | |
String title = doc.title(); | |
System.out.println("Title: " + title); | |
String links_html = ""; | |
// get all links | |
Elements links = doc.select("a[href]"); | |
for (Element link : links) { | |
System.out.println("\n link : " + link.absUrl("href"));//gets the absolute link address aka including http://www... | |
System.out.println("text : " + link.text());//link label text | |
//disincluding some links that i don't want in the new html formatted pages | |
if (!link.text().equals("Return to Manuals") && !link.text().equals("Training Site") && !link.text().equals("Intranet")) { | |
//create a string of html commands that i can just insert into the file | |
links_html += "<li> <a href = \" " + link.absUrl("href") + " \" " + ">" + link.text() + "</a> </li>"; | |
} | |
} | |
make_into_html(title, links_html); | |
} catch (IOException e) { | |
System.out.println("IOException thrown.") | |
} | |
} | |
public static void make_into_html(String title, String links_html) { | |
try { | |
//dealing with coldfusion files | |
File htmlTemplateFile = new File("template.cfm");//can change the file extension to anything you want | |
String htmlString = FileUtils.readFileToString(htmlTemplateFile); | |
String body = links_html; | |
htmlString = htmlString.replace("$title", title); | |
htmlString = htmlString.replace("$body", body);//replace sections of html using tags | |
File newHtmlFile = new File(title + ".cfm");//create new html file with newly replace sections | |
FileUtils.writeStringToFile(newHtmlFile, htmlString);//write new string of html commands to newHtmlFile | |
} catch (Exception e) { | |
System.out.println("Exception thrown."); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment