Skip to content

Instantly share code, notes, and snippets.

@caarmen
Last active August 29, 2015 14:13
Show Gist options
  • Save caarmen/9ab3461d747600249ab7 to your computer and use it in GitHub Desktop.
Save caarmen/9ab3461d747600249ab7 to your computer and use it in GitHub Desktop.
Backing up a small program here for converting a proprietary Excel file to HTML for one specific website. Very useful for everybody...
package ca.rmen.poemsfah;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.InputStream;
import java.text.Normalizer;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.SortedSet;
import java.util.TreeSet;
import jxl.Cell;
import jxl.Sheet;
import jxl.Workbook;
import jxl.WorkbookSettings;
import freemarker.template.Configuration;
import freemarker.template.DefaultObjectWrapperBuilder;
import freemarker.template.Template;
import freemarker.template.TemplateExceptionHandler;
public class XLSToHTML {
public static class Document {
private int pageNumber;
private String date;
private String paintingCaption;
private String songTitle;
private String songLink;
private final List<Poem> breverias = new ArrayList<Poem>();
private final List<Poem> sonnets = new ArrayList<Poem>();
private final List<Poem> others = new ArrayList<Poem>();
public Document(int pageNumber) {
this.pageNumber = pageNumber;
}
public int getPageNumber() {
return pageNumber;
}
public String getDate() {
return date;
}
public void setDate(String date) {
this.date = date;
}
public String getPaintingCaption() {
return paintingCaption;
}
public void setPaintingCaption(String paintingCaption) {
this.paintingCaption = paintingCaption;
}
public String getSongTitle() {
return songTitle;
}
public void setSongTitle(String songTitle) {
this.songTitle = songTitle;
}
public String getSongLink() {
return songLink;
}
public void setSongLink(String songLink) {
this.songLink = songLink;
}
public void addBreveria(Poem poem) {
breverias.add(poem);
}
public void addSonnet(Poem poem) {
sonnets.add(poem);
}
public void addOtherPoem(Poem poem) {
others.add(poem);
}
public List<Poem> getBreverias() {
return Collections.unmodifiableList(breverias);
}
public List<Poem> getSonnets() {
return sonnets;
}
public List<Poem> getOthers() {
return others;
}
@Override
public String toString() {
return breverias.size() + " breverias, " + sonnets.size() + " sonnets, " + others.size() + " others";
}
}
public static class Poem {
private final String title;
private final String poemType;
private final String poemNumber;
private final String preContent;
private final String content;
private final String locationDate;
public Poem(String title, String poemType, String poemNumber, String preContent, String content, String locationDate) {
super();
this.title = title;
this.poemType = poemType;
this.poemNumber = poemNumber;
this.preContent = preContent;
this.content = content;
this.locationDate = locationDate;
}
public String getTitle() {
return title;
}
public String getId() {
StringBuilder buffer = new StringBuilder();
if (poemType != null)
buffer.append(poemType);
if (poemNumber != null)
buffer.append(poemNumber);
if (title != null)
buffer.append(title);
String result = Normalizer.normalize(buffer.toString(), Normalizer.Form.NFD);
result = result.replaceAll("[^0-9a-zA-Z]", "");
result = result.replaceAll("\\p{M}", "");
result = result.toLowerCase();
return result;
}
public String getPoemType() {
return poemType;
}
public String getPoemNumber() {
return poemNumber;
}
public String getPreContent() {
return preContent;
}
public String getContent() {
return content;
}
public String getLocationDate() {
return locationDate;
}
}
private final static String TITLE = "title";
private final static String POEM_TYPE_ID = "poem_type_id";
private final static String POEM_NUMBER = "poem_number";
private final static String PRE_CONTENT = "pre_content";
private final static String CONTENT = "content";
private final static String LOCATION = "location";
private final static String YEAR = "year";
private final static String MONTH = "month";
private final static String DAY = "day";
private final static String PAGE_NUMBER = "page_number";
private final static String DATE = "date";
private final static String PAINTING_CAPTION = "painting_caption";
private final static String SONG_TITLE = "song_title";
private final static String SONG_LINK = "song_link";
private final static int TYPE_BREVERIA = 1;
private final static int TYPE_SONNET = 2;
private final static int TYPE_OTHER = 3;
private final static String[] MONTHS = new String[] { "enero", "febrero", "marzo", "abril", "mayo", "junio", "julio", "agosto", "septiembre", "octubre",
"noviembre", "diciembre" };
public static void main(String[] args) throws Throwable {
int i = 0;
String excelPath = args[i++];
String templatePath = args[i++];
List<Document> documents = readFile(excelPath);
for (Document document : documents) {
String htmlPath = templatePath.replaceAll(".ftl$", document.getPageNumber() + ".html");
writeDocument(document, templatePath, htmlPath);
}
}
public XLSToHTML() {
}
private static List<Document> readFile(String filePath) {
try {
InputStream is = new FileInputStream(filePath);
WorkbookSettings wbSettings = new WorkbookSettings();
wbSettings.setEncoding("iso-8859-1");
Workbook wb = Workbook.getWorkbook(is, wbSettings);
return readBook(wb);
} catch (Exception e) {
e.printStackTrace();
return null;
} finally {
}
}
private static List<String> readColumnNames(Sheet sheet) {
Cell[] headerRow = sheet.getRow(0);
List<String> columnNames = new ArrayList<String>();
for (Cell headerCell : headerRow)
columnNames.add(headerCell.getContents());
return columnNames;
}
private static Map<String, String> readRow(Sheet sheet, int rowId, List<String> columnNames) {
Map<String, String> values = new HashMap<String, String>();
int columnCount = sheet.getColumns();
Cell[] row = sheet.getRow(rowId);
for (int c = 0; c < columnCount; c++) {
String columnName = columnNames.get(c);
if (columnName.startsWith("#"))
continue;
String cellData = row[c].getContents();
cellData = clean(cellData);
values.put(columnName, cellData);
}
return values;
}
private static Map<Integer, Document> readPageMeta(Workbook wb) {
Map<Integer, Document> result = new HashMap<Integer, Document>();
Sheet pageSheet = wb.getSheet("page");
List<String> columnNames = readColumnNames(pageSheet);
int rowCount = pageSheet.getRows();
for (int r = 1; r < rowCount; r++) {
Map<String, String> values = readRow(pageSheet, r, columnNames);
int pageNumber = Integer.parseInt(values.get(PAGE_NUMBER));
Document document = new Document(pageNumber);
String date = values.get(DATE);
String paintingCaption = values.get(PAINTING_CAPTION);
String songTitle = values.get(SONG_TITLE);
String songLink = values.get(SONG_LINK);
document.setDate(date);
document.setPaintingCaption(paintingCaption);
document.setSongTitle(songTitle);
document.setSongLink(songLink);
result.put(pageNumber, document);
}
return result;
}
private static List<Document> readBook(Workbook wb) {
Map<Integer, Document> documents = readPageMeta(wb);
Sheet poemSheet = wb.getSheet("poem");
List<String> columnNames = readColumnNames(poemSheet);
int columnCount = poemSheet.getColumns();
int rowCount = poemSheet.getRows();
Map<String, String> values = new HashMap<String, String>();
for (int r = 1; r < rowCount; r++) {
Cell[] row = poemSheet.getRow(r);
for (int c = 0; c < columnCount; c++) {
String columnName = columnNames.get(c);
if (columnName.startsWith("#"))
continue;
String cellData = row[c].getContents();
cellData = clean(cellData);
values.put(columnName, cellData);
}
String title = values.get(TITLE);
System.out.println(title);
int poemTypeId = Integer.parseInt(values.get(POEM_TYPE_ID));
int pageNumber = Integer.parseInt(values.get(PAGE_NUMBER));
String poemNumber = values.get(POEM_NUMBER);
String preContent = values.get(PRE_CONTENT);
String content = values.get(CONTENT);
String year = values.get(YEAR);
int month = Integer.valueOf(values.get(MONTH));
int day = Integer.valueOf(values.get(DAY));
String location = values.get(LOCATION);
String locationDate = day >= 1 ? location + ", " + day + " de " + MONTHS[month - 1] + " de " + year : location + ", " + MONTHS[month - 1] + " de "
+ year;
Document document = documents.get(pageNumber);
if(document == null)
continue;
switch (poemTypeId) {
case TYPE_BREVERIA:
document.addBreveria(new Poem(title, "Brevería", poemNumber, preContent, content, locationDate));
break;
case TYPE_SONNET:
document.addSonnet(new Poem(title, "Soneto", poemNumber, preContent, content, locationDate));
break;
default:
document.addOtherPoem(new Poem(title, null, poemNumber, preContent, content, locationDate));
break;
}
}
final List<Document> result = new ArrayList<Document>();
SortedSet<Integer> pageNumbers = new TreeSet<Integer>();
pageNumbers.addAll(documents.keySet());
for (Integer pageNumber : pageNumbers)
result.add(documents.get(pageNumber));
return result;
}
private static void writeDocument(Document document, String inputTemplatePath, String outputHTMLPath) throws Throwable {
Map<String, Object> root = new HashMap<String, Object>();
root.put("document", document);
Configuration cfg = new Configuration(Configuration.VERSION_2_3_21);
cfg.setTemplateExceptionHandler(TemplateExceptionHandler.HTML_DEBUG_HANDLER);
cfg.setObjectWrapper(new DefaultObjectWrapperBuilder(cfg.getIncompatibleImprovements()).build());
cfg.setDirectoryForTemplateLoading(new File("."));
Template template = cfg.getTemplate(inputTemplatePath);
FileWriter writer = new FileWriter(outputHTMLPath);
template.process(root, writer);
writer.flush();
writer.close();
}
private static String clean(String data) {
if (data == null)
return data;
data = data.trim();
data = data.replaceAll("\\\\n", "\n");
if (data.isEmpty())
data = null;
return data;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment