Last active
February 10, 2022 08:47
-
-
Save ngekoding/b198d144982abc1d58ac5512843079e9 to your computer and use it in GitHub Desktop.
Crawling google form response summary (analytics)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const puppeteer = require('puppeteer-extra'); | |
const pluginStealth = require('puppeteer-extra-plugin-stealth'); | |
const fs = require('fs'); | |
const dayjs = require('dayjs'); | |
puppeteer.use(pluginStealth()); | |
// Change default locale to Indonesia | |
require('dayjs/locale/id'); | |
dayjs.locale('id'); | |
// Saving to PDF only work for headless mode | |
const headless = true; | |
// E-mail account | |
const email = '[email protected]'; | |
const password = 'your-email-password'; | |
// Base path where to save the PDF result | |
const resultBasePath = './'; | |
// Data to be crawled | |
// We make this to make dynamic crawler | |
const survey = JSON.parse(fs.readFileSync('survey.json')); | |
// Store last update time | |
const timestampsFile = 'timestamps.json'; | |
function getTimestamps() { | |
return JSON.parse(fs.readFileSync(timestampsFile)); | |
} | |
function setTimestamps(data) { | |
try { | |
fs.writeFileSync(timestampsFile, JSON.stringify(data)); | |
} catch (err) { | |
console.log('Failed to write timestamps', err); | |
} | |
} | |
function updateSurveyTimestamp(key, value) { | |
let timestamps = getTimestamps(); | |
timestamps[key] = value; | |
setTimestamps(timestamps); | |
} | |
(async () => { | |
const browser = await puppeteer.launch({ | |
headless, | |
args: [ | |
'--no-sandbox', | |
'--disable-setuid-sandbox', | |
'--disable-dev-shm-usage', | |
'--disable-accelerated-2d-canvas', | |
'--no-first-run', | |
'--no-zygote', | |
'--disable-gpu', | |
// '--single-process', // <- this one doesn't works in Windows | |
], | |
}); | |
console.log('App running...'); | |
const page = await browser.newPage(); | |
const pages = await browser.pages(); | |
// Close the new tab that chromium always opens first. | |
pages[0].close(); | |
await page.setBypassCSP(true); | |
await page.goto('https://accounts.google.com/signin/v2/identifier', { waitUntil: 'networkidle2' }); | |
// Wait for email input. | |
await page.waitForSelector('#identifierId'); | |
await page.type('#identifierId', email); | |
await page.waitForTimeout(1000); | |
await page.keyboard.press('Enter'); | |
await page.waitForTimeout(1000); | |
// Wait for password input | |
await page.waitForSelector('input[type="checkbox"]'); | |
await page.evaluate(() => { | |
document.querySelector('input[type="checkbox"]').parentElement.click(); | |
}); | |
await page.waitForTimeout(1000); | |
await page.type('input[type="text"]', password); | |
await page.waitForTimeout(1000); | |
await page.keyboard.press('Enter'); | |
await page.waitForSelector('[role="banner"]'); | |
for (let itemIndex = 0; itemIndex < survey.summaries.length; itemIndex++) { | |
const item = survey.summaries[itemIndex]; | |
const now = dayjs(); | |
const lastUpdate = now.format('dddd, D MMMM YYYY') + ' pukul ' + now.format('HH.mm'); | |
// Print the title | |
console.log('Getting for: ' + item.title); | |
await page.goto(item.url); | |
// Wait until loading gone! | |
await page.waitForSelector('.freebirdCommonViewLoadingindicatorLoadingIndicatorContainer', { | |
hidden: true | |
}); | |
// Try to wait 'file unavailable' dialog | |
// It will be removed later | |
try { | |
await page.waitForSelector('.quantumWizDialogBackground.isOpen', { | |
timeout: 5000 // 5 seconds | |
}); | |
} catch (err) { | |
// do nothing | |
} | |
const summariesTitleToRemove = item.ignores; | |
// Remove unused elements | |
await page.evaluate((summariesTitleToRemove, lastUpdate) => { | |
// Remove file unavailable dialog if exists | |
const fileUnavailableDialog = document.querySelector('.quantumWizDialogBackground.isOpen'); | |
if (fileUnavailableDialog) fileUnavailableDialog.remove(); | |
// Change body background | |
document.body.style.backgroundColor = '#ffffff'; | |
// Remove publish button | |
document.querySelector('a.exportButtonNestedLink').remove(); | |
// Remove FAB | |
document.querySelector('.freebirdFormviewerViewNavigationHeaderButtonContainer').remove(); | |
// Change footer content for last update | |
document.querySelector('.freebirdFormviewerViewFooterDisclaimer').innerHTML = 'Diperbarui pada: ' + lastUpdate; | |
// Or you can just remove the footer | |
// document.querySelector('.freebirdFormviewerViewFooterDisclaimer').remove(); | |
// document.querySelector('.freebirdFormviewerViewFooterImageContainer').remove(); | |
// Remove unused summary by title | |
const elementsToRemove = document.querySelectorAll('.freebirdAnalyticsViewAnalyticsItem'); | |
for (let i = 0; i < elementsToRemove.length; i++) { | |
let el = elementsToRemove[i]; | |
// Finding title | |
const title = el.querySelector('.freebirdAnalyticsViewQuestionTitle'); | |
const headerTitle = el.querySelector('.freebirdAnalyticsViewSectionHeader'); | |
if ( | |
(title && summariesTitleToRemove.includes(title.innerText)) || | |
(headerTitle && summariesTitleToRemove.includes(headerTitle.innerText)) | |
) { | |
elementsToRemove[i].parentNode.removeChild(elementsToRemove[i]); | |
} | |
} | |
}, summariesTitleToRemove, lastUpdate); | |
// Save content to file | |
console.log('Saving result: ' + item.filename); | |
// PDF only works when headless active | |
if (headless) { | |
await page.pdf({ | |
path: resultBasePath + item.filename, | |
printBackground: true, | |
format: 'A4', | |
margin: { | |
top: '2cm', | |
bottom: '2cm', | |
left: '1cm', | |
right: '1cm' | |
} | |
}); | |
} | |
// Save last update | |
updateSurveyTimestamp(item.filename, lastUpdate); | |
} | |
console.log('✅ Operation completed, have a good day!'); | |
browser.close(); | |
})(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"name": "g-form-automator", | |
"version": "1.0.0", | |
"description": "", | |
"main": "index.js", | |
"scripts": { | |
"test": "echo \"Error: no test specified\" && exit 1" | |
}, | |
"keywords": [ | |
"google form automator", | |
"google form crawler", | |
"nodejs crawler", | |
"puppeteer" | |
], | |
"author": "Nur Muhammad", | |
"license": "MIT", | |
"dependencies": { | |
"dayjs": "^1.10.7", | |
"puppeteer": "^10.1.0", | |
"puppeteer-extra": "^3.1.18", | |
"puppeteer-extra-plugin-stealth": "^2.7.8" | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"summaries": [ | |
{ | |
"title": "Survei Layanan Persuratan", | |
"url": "https://docs.google.com/forms/d/.../viewanalytics", | |
"filename": "survei-layanan-persuratan.pdf", | |
"ignores": [ | |
"Alamat Email", | |
"Nomor HP (Whatsapp)", | |
] | |
}, | |
{ | |
"title": "Survei Layanan Urusan Internasional", | |
"url": "https://docs.google.com/forms/d/.../viewanalytics", | |
"filename": "survei-layanan-urusan-internasional.pdf", | |
"ignores": [ | |
"Email address:", | |
"Mobile Number (Whatsapp)", | |
"EVALUATION AND IMPROVEMENT" | |
] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Notes:
survey.json
url
: get this URL from google form response -> printignores
: summary title list to remove before generating PDF (sensitive data)How to run?
npm install
to install the librariesnode g-form-automator.js
to start the appFeel free to make a changes to met your needs!