Skip to content

Instantly share code, notes, and snippets.

@Upabjojr
Created December 12, 2024 20:05
Show Gist options
  • Save Upabjojr/97d0debbd67e9e3c81e57a4cea0d51a4 to your computer and use it in GitHub Desktop.
Save Upabjojr/97d0debbd67e9e3c81e57a4cea0d51a4 to your computer and use it in GitHub Desktop.
<!DOCTYPE html>
<html>
<head>
<title>Docling JSON Viewer</title>
<style>
.page-container {
position: relative;
display: inline-block;
font-size: xx-small;
}
table, th, td {
border: 1px solid;
}
table {
border-collapse: collapse;
}
.indented {
margin-left: 10px;
}
.category_p {
font-size: xx-small;
font-style: italic;
color: gray;
margin-bottom: 0px;
margin-top: 0px;
}
.non-selectable {
user-select: none;
-moz-user-select: none;
-webkit-user-select: none;
-ms-user-select: none;
}
p {
margin-top: 0px;
}
</style>
<script>
function uploadDocument(input_elem) {
const file = input_elem.files[0];
if (file.type !== 'application/json') {
console.error('Only JSON files are supported.');
return;
}
let rootDiv = document.querySelector("#rootDiv");
rootDiv.innerHTML = "";
console.log(file);
const fileReader = new FileReader();
fileReader.addEventListener('load', (event) => {
const fileContents = event.target.result;
const jsonData = JSON.parse(fileContents);
console.log('Parsed JSON data:', jsonData);
renderDocument(jsonData);
});
fileReader.addEventListener('error', (event) => {
console.error('Error reading file:', event.target.error);
});
fileReader.readAsText(file);
}
function renderDocument(jsonData) {
let render_document = new RenderDocument(jsonData, use_bbox.checked);
render_document.render();
}
class RenderDocument {
constructor(jsonData, use_bbox) {
this.use_bbox = use_bbox;
this.doc = jsonData.document;
this.body = this.doc.body;
this.texts = this.doc.texts;
this.page_no_map = {};
for (let page of Object.values(this.doc.pages)) {
this.page_no_map[page.page_no] = page;
}
this.map_text_refs = {};
for (let text of this.texts) {
this.map_text_refs[text.self_ref] = text;
}
this.map_group_refs = {};
for (let group of this.doc.groups) {
this.map_group_refs[group.self_ref] = group;
}
this.map_picture_refs = {};
for (let picture of this.doc.pictures) {
this.map_picture_refs[picture.self_ref] = picture;
}
this.map_table_refs = {};
for (let table of this.doc.tables) {
this.map_table_refs[table.self_ref] = table;
}
}
render() {
this.page_elems = [];
for (let child of this.body.children) {
let elem = this.createElementNode(child.cref);
let page_no = parseInt(elem.dataset.page_no);
if (this.page_elems[page_no] == undefined) {
let div = document.createElement("div");
div.classList.add("page-container");
div.dataset.page_no = `${page_no}`;
this.page_elems[page_no] = div;
}
let div = this.page_elems[page_no];
div.appendChild(elem);
}
for (let page_elem of this.page_elems) {
if (page_elem == null) {
continue;
}
let page_no = parseInt(page_elem.dataset.page_no);
let page_width = this.page_no_map[page_no].size.width;
let page_height = this.page_no_map[page_no].size.height;
page_elem.style.width = `${page_width}px`;
page_elem.style.height = `${page_height}px`;
rootDiv.appendChild(page_elem);
let hr = document.createElement("hr");
rootDiv.appendChild(hr);
}
}
createElementNode(cref) {
let obj;
let elem;
if (cref.startsWith("#/texts/")) {
obj = this.map_text_refs[cref];
elem = this.createHtmlText(obj);
} else if (cref.startsWith("#/groups/")) {
obj = this.map_group_refs[cref];
elem = this.createHtmlGroup(obj);
} else if (cref.startsWith("#/pictures/")) {
obj = this.map_picture_refs[cref];
elem = this.createHtmlPicture(obj);
} else if (cref.startsWith("#/tables/")) {
obj = this.map_table_refs[cref];
elem = this.createHtmlTable(obj);
} else {
return document.createElement("div");
}
for (let child of obj.children) {
elem.appendChild(this.createElementNode(child.cref));
}
if (Object.keys(obj).includes("prov")) {
let bbox = obj.prov[0].bbox;
let page_no = obj.prov[0].page_no;
let page = this.page_no_map[page_no];
let page_width = page.size.width;
let page_height = page.size.height;
if (this.use_bbox) {
elem.style.position = "absolute";
elem.style.width = `${Math.abs(bbox.r - bbox.l)}px`;
elem.style.height = `${Math.abs(bbox.t - bbox.b)}px`;
elem.style.top = `${page_height - bbox.t}px`;
elem.style.left = `${bbox.l}px`;
}
elem.dataset.page_no = `${page_no}`;
}
return elem;
}
createHtmlForCategory(obj) {
let p = document.createElement("p");
p.innerText = `${obj.label} - ${obj.self_ref}`;
p.classList.add("category_p");
p.classList.add("non-selectable");
return p;
}
createHtmlText(obj) {
let div = document.createElement("div");
let p = document.createElement("p");
let cat_p = this.createHtmlForCategory(obj);
p.innerText = obj.text;
p.setAttribute("title", JSON.stringify(obj, null, 2));
div.appendChild(cat_p);
div.appendChild(p);
return div;
}
createHtmlGroup(obj) {
let div = document.createElement("p");
let p = this.createHtmlForCategory(obj);
p.setAttribute("title", JSON.stringify(obj, null, 2));
div.classList.add("indented");
div.appendChild(p);
return div;
}
createHtmlPicture(obj) {
let div = document.createElement("div");
let p = document.createElement("p");
let cat_p = this.createHtmlForCategory(obj);
div.appendChild(cat_p);
div.appendChild(p);
div.setAttribute("title", JSON.stringify(obj, null, 2));
return div;
}
createHtmlTable(obj) {
let div = document.createElement("div");
let p = document.createElement("p");
let cat_p = this.createHtmlForCategory(obj);
div.appendChild(cat_p);
div.appendChild(p);
div.setAttribute("title", JSON.stringify(obj, null, 2));
let data = obj.data;
let num_rows = data.num_rows;
let num_cols = data.num_cols;
let grid = data.grid;
let table = document.createElement("table");
for (let row of grid) {
let tr = document.createElement("tr");
for (let cell of row) {
let td = document.createElement((cell.column_header)? "th" : "td");
td.innerText = cell.text;
tr.appendChild(td);
}
table.appendChild(tr);
}
div.appendChild(table);
return div;
}
}
</script>
</style>
</head>
<body>
<div>
<input type="file" id="file-input" accept=".json" onchange="uploadDocument(this);" />
<label for="use_bbox">Use bbox</label>
<input type="checkbox" id="use_bbox" checked="true" />
<label for="debug_data">Show debug info</label>
<input type="checkbox" id="debug_data" checked="true" />
</div>
<div id="rootDiv"></div>
</body>
</html>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment