Skip to content

Instantly share code, notes, and snippets.

@alexandervantrijffel
Last active August 29, 2015 14:19
Show Gist options
  • Save alexandervantrijffel/22becfb924c4aa7159a3 to your computer and use it in GitHub Desktop.
Save alexandervantrijffel/22becfb924c4aa7159a3 to your computer and use it in GitHub Desktop.
HtmlToWordPdfConverter
[Fact]
public void GenerateWordDocument_should_generate()
{
var fileName = "mydoc.html";
var texts = new List<string>();
using (var sr = File.OpenText(fileName))
texts.Add(sr.ReadToEnd());
new Generator().Generate(fileName, "OUT-PUT",texts);
File.Exists("OUT-PUT.docx").Should().BeTrue();
File.Exists("OUT-PUT.pdf").Should().BeTrue();
}
public class Generator
{
public void Generate(string fileName, string outputFileTitle, IEnumerable<string> documentBlockTexts)
{
var htmlGenerator = new HtmlGenerator(fileName, "OUT-PUT",documentBlockTexts).Generate();
new HtmlToWordConverter("OUT-PUT").Generate();
File.Delete((htmlGenerator.GeneratedFilePath));
foreach(var path in htmlGenerator.GeneratedImages)
File.Delete(path);
}
}
public class HtmlToWordConverter
{
private readonly string _outputFileTitle;
public HtmlToWordConverter(string outputFileTitle)
{
_outputFileTitle = outputFileTitle;
}
public void Generate()
{
var fileTitle = GetFilePathRelativeToAssembly(string.Format("{0}.html", _outputFileTitle));
var app = new Application();
try
{
var document = app.Documents.Open(fileTitle, ReadOnly:false);
var outputDocxFile = GetFilePathRelativeToAssembly(string.Format("{0}.docx", _outputFileTitle));
foreach (var shape in document.InlineShapes)
{
var inlineShape = shape as InlineShape;
if (inlineShape == null || inlineShape.LinkFormat == null)
continue;
inlineShape.LinkFormat.SavePictureWithDocument = true;
}
// Save
document.SaveAs2(outputDocxFile,WdSaveFormat.wdFormatXMLDocument);
var outputPDFFile = GetFilePathRelativeToAssembly(string.Format("{0}.pdf", _outputFileTitle));
document.ExportAsFixedFormat(outputPDFFile, WdExportFormat.wdExportFormatPDF);
}
finally
{
app.Quit();
}
}
private static string GetFilePathRelativeToAssembly(string relativePath)
{
var codeBaseUrl = new Uri(Assembly.GetExecutingAssembly().CodeBase);
var codeBasePath = Uri.UnescapeDataString(codeBaseUrl.AbsolutePath);
var dirPath = Path.GetDirectoryName(codeBasePath);
return Path.Combine(dirPath, relativePath);
}
}
public class HtmlGenerator
{
private readonly string _fileName;
private readonly string _outputFileTitle;
private readonly IEnumerable<string> documentBlockTexts;
public IList<string> GeneratedImages { get; set; }
public string GeneratedFilePath { get; set; }
public HtmlGenerator(string fileName, string outputFileTitle, IEnumerable<string> documentBlockTexts)
{
_fileName = fileName;
_outputFileTitle = outputFileTitle;
this.documentBlockTexts = documentBlockTexts;
GeneratedImages = new List<string>();
}
public HtmlGenerator Generate()
{
//Log.Debug("Generating output file {0}", _fileName)
var newDoc = new HtmlDocument();
var html = newDoc.CreateElement("html");
html.AppendChild(newDoc.CreateElement("head"));
var body = newDoc.CreateElement("body");
html.AppendChild(body);
newDoc.DocumentNode.AppendChild(html);
foreach (var text in documentBlockTexts) body.AppendChildren(this.GetDocumentAsNodeCollection(text));
var outputFileName = string.Format("{0}.html", _outputFileTitle);
newDoc.Save(outputFileName);
GeneratedFilePath = outputFileName;
return this;
}
private HtmlNodeCollection GetDocumentAsNodeCollection(string html)
{
var doc = new HtmlDocument { OptionFixNestedTags = true };
doc.LoadHtml(html);
if (doc.ParseErrors.Any())
{
throw new Exception(string.Concat(doc.ParseErrors.Select(p => p.ToString())));
}
var imgCounter = 0;
foreach (var imageNode in doc.DocumentNode.SelectNodes("//img[@src]"))
{
var data = imageNode.Attributes["src"].Value.Split(new[] { ';' });
var extension = MimeTypeMap.MimeTypeMap.GetExtension(data[0].Replace("data:", string.Empty));
var dataType = data[1].Substring(0, 7) != "base64,";
if (dataType)
{
throw new Exception("Expected base64, as image data, not: " + dataType);
}
var bytes = Convert.FromBase64String(data[1].Substring(7));
var outputFile = string.Format("{0}{1}{2}", _outputFileTitle, ++imgCounter, extension);
using (var imageFile = new FileStream(outputFile, FileMode.Create))
{
imageFile.Write(bytes, 0, bytes.Length);
imageFile.Flush();
}
GeneratedImages.Add(outputFile);
imageNode.SetAttributeValue("src", outputFile);
}
return doc.DocumentNode.ChildNodes;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment