Last active
August 29, 2015 14:13
-
-
Save caarmen/9ab3461d747600249ab7 to your computer and use it in GitHub Desktop.
Backing up a small program here for converting a proprietary Excel file to HTML for one specific website. Very useful for everybody...
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package ca.rmen.poemsfah; | |
import java.io.File; | |
import java.io.FileInputStream; | |
import java.io.FileWriter; | |
import java.io.InputStream; | |
import java.text.Normalizer; | |
import java.util.ArrayList; | |
import java.util.Collections; | |
import java.util.HashMap; | |
import java.util.List; | |
import java.util.Map; | |
import java.util.SortedSet; | |
import java.util.TreeSet; | |
import jxl.Cell; | |
import jxl.Sheet; | |
import jxl.Workbook; | |
import jxl.WorkbookSettings; | |
import freemarker.template.Configuration; | |
import freemarker.template.DefaultObjectWrapperBuilder; | |
import freemarker.template.Template; | |
import freemarker.template.TemplateExceptionHandler; | |
public class XLSToHTML { | |
public static class Document { | |
private int pageNumber; | |
private String date; | |
private String paintingCaption; | |
private String songTitle; | |
private String songLink; | |
private final List<Poem> breverias = new ArrayList<Poem>(); | |
private final List<Poem> sonnets = new ArrayList<Poem>(); | |
private final List<Poem> others = new ArrayList<Poem>(); | |
public Document(int pageNumber) { | |
this.pageNumber = pageNumber; | |
} | |
public int getPageNumber() { | |
return pageNumber; | |
} | |
public String getDate() { | |
return date; | |
} | |
public void setDate(String date) { | |
this.date = date; | |
} | |
public String getPaintingCaption() { | |
return paintingCaption; | |
} | |
public void setPaintingCaption(String paintingCaption) { | |
this.paintingCaption = paintingCaption; | |
} | |
public String getSongTitle() { | |
return songTitle; | |
} | |
public void setSongTitle(String songTitle) { | |
this.songTitle = songTitle; | |
} | |
public String getSongLink() { | |
return songLink; | |
} | |
public void setSongLink(String songLink) { | |
this.songLink = songLink; | |
} | |
public void addBreveria(Poem poem) { | |
breverias.add(poem); | |
} | |
public void addSonnet(Poem poem) { | |
sonnets.add(poem); | |
} | |
public void addOtherPoem(Poem poem) { | |
others.add(poem); | |
} | |
public List<Poem> getBreverias() { | |
return Collections.unmodifiableList(breverias); | |
} | |
public List<Poem> getSonnets() { | |
return sonnets; | |
} | |
public List<Poem> getOthers() { | |
return others; | |
} | |
@Override | |
public String toString() { | |
return breverias.size() + " breverias, " + sonnets.size() + " sonnets, " + others.size() + " others"; | |
} | |
} | |
public static class Poem { | |
private final String title; | |
private final String poemType; | |
private final String poemNumber; | |
private final String preContent; | |
private final String content; | |
private final String locationDate; | |
public Poem(String title, String poemType, String poemNumber, String preContent, String content, String locationDate) { | |
super(); | |
this.title = title; | |
this.poemType = poemType; | |
this.poemNumber = poemNumber; | |
this.preContent = preContent; | |
this.content = content; | |
this.locationDate = locationDate; | |
} | |
public String getTitle() { | |
return title; | |
} | |
public String getId() { | |
StringBuilder buffer = new StringBuilder(); | |
if (poemType != null) | |
buffer.append(poemType); | |
if (poemNumber != null) | |
buffer.append(poemNumber); | |
if (title != null) | |
buffer.append(title); | |
String result = Normalizer.normalize(buffer.toString(), Normalizer.Form.NFD); | |
result = result.replaceAll("[^0-9a-zA-Z]", ""); | |
result = result.replaceAll("\\p{M}", ""); | |
result = result.toLowerCase(); | |
return result; | |
} | |
public String getPoemType() { | |
return poemType; | |
} | |
public String getPoemNumber() { | |
return poemNumber; | |
} | |
public String getPreContent() { | |
return preContent; | |
} | |
public String getContent() { | |
return content; | |
} | |
public String getLocationDate() { | |
return locationDate; | |
} | |
} | |
private final static String TITLE = "title"; | |
private final static String POEM_TYPE_ID = "poem_type_id"; | |
private final static String POEM_NUMBER = "poem_number"; | |
private final static String PRE_CONTENT = "pre_content"; | |
private final static String CONTENT = "content"; | |
private final static String LOCATION = "location"; | |
private final static String YEAR = "year"; | |
private final static String MONTH = "month"; | |
private final static String DAY = "day"; | |
private final static String PAGE_NUMBER = "page_number"; | |
private final static String DATE = "date"; | |
private final static String PAINTING_CAPTION = "painting_caption"; | |
private final static String SONG_TITLE = "song_title"; | |
private final static String SONG_LINK = "song_link"; | |
private final static int TYPE_BREVERIA = 1; | |
private final static int TYPE_SONNET = 2; | |
private final static int TYPE_OTHER = 3; | |
private final static String[] MONTHS = new String[] { "enero", "febrero", "marzo", "abril", "mayo", "junio", "julio", "agosto", "septiembre", "octubre", | |
"noviembre", "diciembre" }; | |
public static void main(String[] args) throws Throwable { | |
int i = 0; | |
String excelPath = args[i++]; | |
String templatePath = args[i++]; | |
List<Document> documents = readFile(excelPath); | |
for (Document document : documents) { | |
String htmlPath = templatePath.replaceAll(".ftl$", document.getPageNumber() + ".html"); | |
writeDocument(document, templatePath, htmlPath); | |
} | |
} | |
public XLSToHTML() { | |
} | |
private static List<Document> readFile(String filePath) { | |
try { | |
InputStream is = new FileInputStream(filePath); | |
WorkbookSettings wbSettings = new WorkbookSettings(); | |
wbSettings.setEncoding("iso-8859-1"); | |
Workbook wb = Workbook.getWorkbook(is, wbSettings); | |
return readBook(wb); | |
} catch (Exception e) { | |
e.printStackTrace(); | |
return null; | |
} finally { | |
} | |
} | |
private static List<String> readColumnNames(Sheet sheet) { | |
Cell[] headerRow = sheet.getRow(0); | |
List<String> columnNames = new ArrayList<String>(); | |
for (Cell headerCell : headerRow) | |
columnNames.add(headerCell.getContents()); | |
return columnNames; | |
} | |
private static Map<String, String> readRow(Sheet sheet, int rowId, List<String> columnNames) { | |
Map<String, String> values = new HashMap<String, String>(); | |
int columnCount = sheet.getColumns(); | |
Cell[] row = sheet.getRow(rowId); | |
for (int c = 0; c < columnCount; c++) { | |
String columnName = columnNames.get(c); | |
if (columnName.startsWith("#")) | |
continue; | |
String cellData = row[c].getContents(); | |
cellData = clean(cellData); | |
values.put(columnName, cellData); | |
} | |
return values; | |
} | |
private static Map<Integer, Document> readPageMeta(Workbook wb) { | |
Map<Integer, Document> result = new HashMap<Integer, Document>(); | |
Sheet pageSheet = wb.getSheet("page"); | |
List<String> columnNames = readColumnNames(pageSheet); | |
int rowCount = pageSheet.getRows(); | |
for (int r = 1; r < rowCount; r++) { | |
Map<String, String> values = readRow(pageSheet, r, columnNames); | |
int pageNumber = Integer.parseInt(values.get(PAGE_NUMBER)); | |
Document document = new Document(pageNumber); | |
String date = values.get(DATE); | |
String paintingCaption = values.get(PAINTING_CAPTION); | |
String songTitle = values.get(SONG_TITLE); | |
String songLink = values.get(SONG_LINK); | |
document.setDate(date); | |
document.setPaintingCaption(paintingCaption); | |
document.setSongTitle(songTitle); | |
document.setSongLink(songLink); | |
result.put(pageNumber, document); | |
} | |
return result; | |
} | |
private static List<Document> readBook(Workbook wb) { | |
Map<Integer, Document> documents = readPageMeta(wb); | |
Sheet poemSheet = wb.getSheet("poem"); | |
List<String> columnNames = readColumnNames(poemSheet); | |
int columnCount = poemSheet.getColumns(); | |
int rowCount = poemSheet.getRows(); | |
Map<String, String> values = new HashMap<String, String>(); | |
for (int r = 1; r < rowCount; r++) { | |
Cell[] row = poemSheet.getRow(r); | |
for (int c = 0; c < columnCount; c++) { | |
String columnName = columnNames.get(c); | |
if (columnName.startsWith("#")) | |
continue; | |
String cellData = row[c].getContents(); | |
cellData = clean(cellData); | |
values.put(columnName, cellData); | |
} | |
String title = values.get(TITLE); | |
System.out.println(title); | |
int poemTypeId = Integer.parseInt(values.get(POEM_TYPE_ID)); | |
int pageNumber = Integer.parseInt(values.get(PAGE_NUMBER)); | |
String poemNumber = values.get(POEM_NUMBER); | |
String preContent = values.get(PRE_CONTENT); | |
String content = values.get(CONTENT); | |
String year = values.get(YEAR); | |
int month = Integer.valueOf(values.get(MONTH)); | |
int day = Integer.valueOf(values.get(DAY)); | |
String location = values.get(LOCATION); | |
String locationDate = day >= 1 ? location + ", " + day + " de " + MONTHS[month - 1] + " de " + year : location + ", " + MONTHS[month - 1] + " de " | |
+ year; | |
Document document = documents.get(pageNumber); | |
if(document == null) | |
continue; | |
switch (poemTypeId) { | |
case TYPE_BREVERIA: | |
document.addBreveria(new Poem(title, "Brevería", poemNumber, preContent, content, locationDate)); | |
break; | |
case TYPE_SONNET: | |
document.addSonnet(new Poem(title, "Soneto", poemNumber, preContent, content, locationDate)); | |
break; | |
default: | |
document.addOtherPoem(new Poem(title, null, poemNumber, preContent, content, locationDate)); | |
break; | |
} | |
} | |
final List<Document> result = new ArrayList<Document>(); | |
SortedSet<Integer> pageNumbers = new TreeSet<Integer>(); | |
pageNumbers.addAll(documents.keySet()); | |
for (Integer pageNumber : pageNumbers) | |
result.add(documents.get(pageNumber)); | |
return result; | |
} | |
private static void writeDocument(Document document, String inputTemplatePath, String outputHTMLPath) throws Throwable { | |
Map<String, Object> root = new HashMap<String, Object>(); | |
root.put("document", document); | |
Configuration cfg = new Configuration(Configuration.VERSION_2_3_21); | |
cfg.setTemplateExceptionHandler(TemplateExceptionHandler.HTML_DEBUG_HANDLER); | |
cfg.setObjectWrapper(new DefaultObjectWrapperBuilder(cfg.getIncompatibleImprovements()).build()); | |
cfg.setDirectoryForTemplateLoading(new File(".")); | |
Template template = cfg.getTemplate(inputTemplatePath); | |
FileWriter writer = new FileWriter(outputHTMLPath); | |
template.process(root, writer); | |
writer.flush(); | |
writer.close(); | |
} | |
private static String clean(String data) { | |
if (data == null) | |
return data; | |
data = data.trim(); | |
data = data.replaceAll("\\\\n", "\n"); | |
if (data.isEmpty()) | |
data = null; | |
return data; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment