Skip to content

Instantly share code, notes, and snippets.

@cyrillsemenov
Last active March 16, 2023 17:08
Show Gist options
  • Save cyrillsemenov/d65626c0a2392c72f479e16a6707d970 to your computer and use it in GitHub Desktop.
Save cyrillsemenov/d65626c0a2392c72f479e16a6707d970 to your computer and use it in GitHub Desktop.
Get all comments from GDoc in right order
// Step 1: Export doc as HTML
// Step 2: open html in browser
// Step 3: run this script in developer console
// Step 4: copy res object, convert it to csv or use as is
// It could (and should) be easily converted to python script
const REGEX = {
"links": /https?:\/\/(?:www\.)?(?:[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b)*(?:\/[\/\d\w\.-]*)*(?:[\?])*(?:[-\w.\/:}{=?!@#$%^&*()_+<>]+)/gmi,
"timecodes": /(?:\d?\d[:\-–—]\d\d) ?[\-–—]? ?(?:\d?\d[:\-–—]\d\d)?(?: ?[+:\-–—] ?(?:\d?\d[:\-–—]\d\d) ?[\-–—]? ?(?:\d?\d[:\-–—]\d\d)?)?/gi,
};
let links = [...document.querySelectorAll(".c1")].map(e => {
e.querySelector("a")?.remove();
return e.textContent
}),
context = [...document.querySelectorAll("sup")].map(e => {
while (e.previousSibling && e.previousSibling.tagName == "SUP") {
e = e.previousSibling;
};
return e.previousSibling.textContent
});
let result = "I,LINK,NAME,FULL\n";
let res = [];
links.forEach((k, i) => {
let name = context[i].substring(0,20).trim().replace(/^[.,!?]+|[.,]+$/gm,'').replace("\n"," ");
let links = k.match(REGEX.links),
timecodes = k.match(REGEX.timecodes);
// scetchy csv, bad idea to use it
result += `${i},${links},${i} ${timecodes?timecodes+" ":""}${name},${context[i]}\n`;
res.push({
"i": i,
"links": links,
"timecodes": timecodes,
"name": `${i} ${timecodes?timecodes+" ":""}${name}`,
"full_context": context[i].replace("\n"," "),
"full_comment": k.replace("\n"," ")
})
});
res
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment