Created
December 12, 2024 20:05
-
-
Save Upabjojr/97d0debbd67e9e3c81e57a4cea0d51a4 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<!DOCTYPE html> | |
<html> | |
<head> | |
<title>Docling JSON Viewer</title> | |
<style> | |
.page-container { | |
position: relative; | |
display: inline-block; | |
font-size: xx-small; | |
} | |
table, th, td { | |
border: 1px solid; | |
} | |
table { | |
border-collapse: collapse; | |
} | |
.indented { | |
margin-left: 10px; | |
} | |
.category_p { | |
font-size: xx-small; | |
font-style: italic; | |
color: gray; | |
margin-bottom: 0px; | |
margin-top: 0px; | |
} | |
.non-selectable { | |
user-select: none; | |
-moz-user-select: none; | |
-webkit-user-select: none; | |
-ms-user-select: none; | |
} | |
p { | |
margin-top: 0px; | |
} | |
</style> | |
<script> | |
function uploadDocument(input_elem) { | |
const file = input_elem.files[0]; | |
if (file.type !== 'application/json') { | |
console.error('Only JSON files are supported.'); | |
return; | |
} | |
let rootDiv = document.querySelector("#rootDiv"); | |
rootDiv.innerHTML = ""; | |
console.log(file); | |
const fileReader = new FileReader(); | |
fileReader.addEventListener('load', (event) => { | |
const fileContents = event.target.result; | |
const jsonData = JSON.parse(fileContents); | |
console.log('Parsed JSON data:', jsonData); | |
renderDocument(jsonData); | |
}); | |
fileReader.addEventListener('error', (event) => { | |
console.error('Error reading file:', event.target.error); | |
}); | |
fileReader.readAsText(file); | |
} | |
function renderDocument(jsonData) { | |
let render_document = new RenderDocument(jsonData, use_bbox.checked); | |
render_document.render(); | |
} | |
class RenderDocument { | |
constructor(jsonData, use_bbox) { | |
this.use_bbox = use_bbox; | |
this.doc = jsonData.document; | |
this.body = this.doc.body; | |
this.texts = this.doc.texts; | |
this.page_no_map = {}; | |
for (let page of Object.values(this.doc.pages)) { | |
this.page_no_map[page.page_no] = page; | |
} | |
this.map_text_refs = {}; | |
for (let text of this.texts) { | |
this.map_text_refs[text.self_ref] = text; | |
} | |
this.map_group_refs = {}; | |
for (let group of this.doc.groups) { | |
this.map_group_refs[group.self_ref] = group; | |
} | |
this.map_picture_refs = {}; | |
for (let picture of this.doc.pictures) { | |
this.map_picture_refs[picture.self_ref] = picture; | |
} | |
this.map_table_refs = {}; | |
for (let table of this.doc.tables) { | |
this.map_table_refs[table.self_ref] = table; | |
} | |
} | |
render() { | |
this.page_elems = []; | |
for (let child of this.body.children) { | |
let elem = this.createElementNode(child.cref); | |
let page_no = parseInt(elem.dataset.page_no); | |
if (this.page_elems[page_no] == undefined) { | |
let div = document.createElement("div"); | |
div.classList.add("page-container"); | |
div.dataset.page_no = `${page_no}`; | |
this.page_elems[page_no] = div; | |
} | |
let div = this.page_elems[page_no]; | |
div.appendChild(elem); | |
} | |
for (let page_elem of this.page_elems) { | |
if (page_elem == null) { | |
continue; | |
} | |
let page_no = parseInt(page_elem.dataset.page_no); | |
let page_width = this.page_no_map[page_no].size.width; | |
let page_height = this.page_no_map[page_no].size.height; | |
page_elem.style.width = `${page_width}px`; | |
page_elem.style.height = `${page_height}px`; | |
rootDiv.appendChild(page_elem); | |
let hr = document.createElement("hr"); | |
rootDiv.appendChild(hr); | |
} | |
} | |
createElementNode(cref) { | |
let obj; | |
let elem; | |
if (cref.startsWith("#/texts/")) { | |
obj = this.map_text_refs[cref]; | |
elem = this.createHtmlText(obj); | |
} else if (cref.startsWith("#/groups/")) { | |
obj = this.map_group_refs[cref]; | |
elem = this.createHtmlGroup(obj); | |
} else if (cref.startsWith("#/pictures/")) { | |
obj = this.map_picture_refs[cref]; | |
elem = this.createHtmlPicture(obj); | |
} else if (cref.startsWith("#/tables/")) { | |
obj = this.map_table_refs[cref]; | |
elem = this.createHtmlTable(obj); | |
} else { | |
return document.createElement("div"); | |
} | |
for (let child of obj.children) { | |
elem.appendChild(this.createElementNode(child.cref)); | |
} | |
if (Object.keys(obj).includes("prov")) { | |
let bbox = obj.prov[0].bbox; | |
let page_no = obj.prov[0].page_no; | |
let page = this.page_no_map[page_no]; | |
let page_width = page.size.width; | |
let page_height = page.size.height; | |
if (this.use_bbox) { | |
elem.style.position = "absolute"; | |
elem.style.width = `${Math.abs(bbox.r - bbox.l)}px`; | |
elem.style.height = `${Math.abs(bbox.t - bbox.b)}px`; | |
elem.style.top = `${page_height - bbox.t}px`; | |
elem.style.left = `${bbox.l}px`; | |
} | |
elem.dataset.page_no = `${page_no}`; | |
} | |
return elem; | |
} | |
createHtmlForCategory(obj) { | |
let p = document.createElement("p"); | |
p.innerText = `${obj.label} - ${obj.self_ref}`; | |
p.classList.add("category_p"); | |
p.classList.add("non-selectable"); | |
return p; | |
} | |
createHtmlText(obj) { | |
let div = document.createElement("div"); | |
let p = document.createElement("p"); | |
let cat_p = this.createHtmlForCategory(obj); | |
p.innerText = obj.text; | |
p.setAttribute("title", JSON.stringify(obj, null, 2)); | |
div.appendChild(cat_p); | |
div.appendChild(p); | |
return div; | |
} | |
createHtmlGroup(obj) { | |
let div = document.createElement("p"); | |
let p = this.createHtmlForCategory(obj); | |
p.setAttribute("title", JSON.stringify(obj, null, 2)); | |
div.classList.add("indented"); | |
div.appendChild(p); | |
return div; | |
} | |
createHtmlPicture(obj) { | |
let div = document.createElement("div"); | |
let p = document.createElement("p"); | |
let cat_p = this.createHtmlForCategory(obj); | |
div.appendChild(cat_p); | |
div.appendChild(p); | |
div.setAttribute("title", JSON.stringify(obj, null, 2)); | |
return div; | |
} | |
createHtmlTable(obj) { | |
let div = document.createElement("div"); | |
let p = document.createElement("p"); | |
let cat_p = this.createHtmlForCategory(obj); | |
div.appendChild(cat_p); | |
div.appendChild(p); | |
div.setAttribute("title", JSON.stringify(obj, null, 2)); | |
let data = obj.data; | |
let num_rows = data.num_rows; | |
let num_cols = data.num_cols; | |
let grid = data.grid; | |
let table = document.createElement("table"); | |
for (let row of grid) { | |
let tr = document.createElement("tr"); | |
for (let cell of row) { | |
let td = document.createElement((cell.column_header)? "th" : "td"); | |
td.innerText = cell.text; | |
tr.appendChild(td); | |
} | |
table.appendChild(tr); | |
} | |
div.appendChild(table); | |
return div; | |
} | |
} | |
</script> | |
</style> | |
</head> | |
<body> | |
<div> | |
<input type="file" id="file-input" accept=".json" onchange="uploadDocument(this);" /> | |
<label for="use_bbox">Use bbox</label> | |
<input type="checkbox" id="use_bbox" checked="true" /> | |
<label for="debug_data">Show debug info</label> | |
<input type="checkbox" id="debug_data" checked="true" /> | |
</div> | |
<div id="rootDiv"></div> | |
</body> | |
</html> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment