SrJSDev · March 5, 2024 07:14 · SrJSDev · Mar 5, 2024
diff --git a/openai-scrape.js b/openai-scrape.js
 import puppeteer from 'puppeteer-extra';
 import StealthPlugin from 'puppeteer-extra-plugin-stealth';
 import OpenAI from 'openai';
 import readline from 'readline';
 import fs from 'fs';

 // Configure Puppeteer with StealthPlugin
 puppeteer.use(StealthPlugin());

 // Initialize OpenAI and timeout constant
 const openai = new OpenAI();
 const timeout = 8000;

 // Start the main function
 main();

 function async main() {
    console.log("###########################################");
    console.log("# GPT4V-Browsing by Unconventional Coding #");
    console.log("###########################################\n");
    const browser = await puppeteer.launch({ headless: "new" });
    const page = await browser.newPage();
    await page.setViewport({ width: 1200, height: 1200, deviceScaleFactor: 1.75 });

    let messages = [{ "role": "system", "content": systemMessage }];
    console.log("GPT: How can I assist you today?");
    let userPrompt = "";
    while (true) {
        // Decide which user prompt to provide, text or screenshot
        if (!userPrompt) {
            userPrompt = await input("You: ");
            console.log();
            messages.push({ "role": "user", "content": userPrompt });
        }
        else {
            const base64Image = await imageToBase64("screenshot.jpg");
            messages.push({
                "role": "user",
                "content": [
                    { "type": "image_url", "image_url": base64Image },
                    { "type": "text", "text": `Here's the screenshot of the website you are on right now.
                        You can click on links with {"click": "Link text"}.
                        Or you can crawl to another URL if this one is incorrect with {"url": "url goes here"}.
                        If you find the answer to the user's question, you can respond normally.`
                    }
                ]
            });
        }
        
        const response = await openai.chat.completions.create({
            model: "gpt-4-vision-preview",
            max_tokens: 1024,
            messages: messages,
        });
        
        const responseText = response.choices[0].message.content;
        messages.push({ "role": "assistant", "content": responseText });
        console.log("GPT: " + responseText);
        
        screenShotOf = await handleAssistantResponseSS(page, messages, responseText)
        if (!screenShotOf) {
            // Then the LLM gave an answer, logged above.
            // If you want to start clean:
                // messages = [{ "role": "system", "content": systemMessage }];
                // console.log("GPT: How can I assist you today?");
            // You'll need to provide a text prompt on next loop
            userPrompt = "";
        }
    }
 };

 // Convert image to base64 format
 async function imageToBase64(imageFile) {
    try {
        const data = await fs.promises.readFile(imageFile);
        const base64Data = data.toString('base64');
        return `data:image/jpeg;base64,${base64Data}`;
    } catch (error) {
        console.error('Error reading the file:', error);
        throw error;
    }
 };

 // Prompt user for input
 async function input(text) {
    const rl = readline.createInterface({
        input: process.stdin,
        output: process.stdout
    });
    return new Promise(resolve => {
        rl.question(text, (prompt) => {
            rl.close();
            resolve(prompt);
        });
    });
 };

 // Sleep function to introduce delay
 const sleep = (milliseconds) => new Promise(resolve => setTimeout(resolve, milliseconds));

 // Remove attribute from DOM element
 const removeAttribute = (element, attributeName) => {
    element.removeAttribute(attributeName);
 };

 // Check if element is visible on the page
 const isElementVisible = (el) => {
    if (!el) return false;

    function isStyleVisible(el) {
        const style = window.getComputedStyle(el);
        return style.width !== '0' &&
               style.height !== '0' &&
               style.opacity !== '0' &&
               style.display !== 'none' &&
               style.visibility !== 'hidden';
    }

    function isElementInViewport(el) {
        const rect = el.getBoundingClientRect();
        return (
            rect.top >= 0 &&
            rect.left >= 0 &&
            rect.bottom <= (window.innerHeight || document.documentElement.clientHeight) &&
            rect.right <= (window.innerWidth || document.documentElement.clientWidth)
        );
    }

    // Check if the element is visible style-wise
    if (!isStyleVisible(el)) {
        return false;
    }

    // Traverse up the DOM and check if any ancestor element is hidden
    let parent = el;
    while (parent) {
        if (!isStyleVisible(parent)) return false;
        parent = parent.parentElement;
    }

    // Finally, check if the element is within the viewport
    return isElementInViewport(el);
 };

 // Highlight visible links on the page
 async function highlightElement (page, element) {
    await page.evaluate(element => {
        element.style.border = "1px solid red";
        const position = element.getBoundingClientRect();
        if (position.width > 5 && position.height > 5 && isElementVisible(element)) {
            const linkText = element.textContent.replace(/[^a-zA-Z0-9 ]/g, '');
            element.setAttribute("gpt-link-text", linkText);
        }
    }, element);
 };

 // Click on the specified link on the page
 async function clickElement (page, linkText) {
    const elements = await page.$$('[gpt-link-text]');
    let partial, exact;
    for (const element of elements) {
        const attributeValue = await element.getAttribute('gpt-link-text');
        if (attributeValue === linkText) {
            exact = element;
            break;
        }
        if (attributeValue.includes(linkText)) {
            partial = element;
        }
    }
    if (exact) {
        await exact.click();
    } else if (partial) {
        await partial.click();
    } else {
        throw new Error("Can't find link");
    }
 };

 // Handle click event on link specified in the message
 async function handleLinkClickSS(page, messages, messageText) {
    const linkText = messageText.split('{"click": "')[1].split('"}')[0].replace(/[^a-zA-Z0-9 ]/g, '');
    console.log("Clicking on " + linkText);
    try {
        await clickElement(page, linkText);
        await Promise.race([waitForEvent(page, 'load'), sleep(timeout)]);
        await highlightLinks(page);
        await page.screenshot({ path: "screenshot.jpg", quality: 100 });
        return linkText;
    } catch (error) {
        console.log("ERROR: Clicking failed");
        messages.push({ "role": "user", "content": "ERROR: I was unable to click that element" });
        return false;
    }
 };

 // Navigate to the specified URL
 async function navigateToUrl (page, messages, url) {
    console.log("Crawling " + url);
    await page.goto(url, { waitUntil: "domcontentloaded" });
    await highlightLinks(page);
    await Promise.race([waitForEvent(page, 'load'), sleep(timeout)]);
    await highlightLinks(page);
    await page.screenshot({ path: "screenshot.jpg", quality: 100 });
    return url;
 };

 // Handle navigation to URL specified in the message
 async function handleUrlNavigationSS(page, messages, messageText) {
    const url = messageText.split('{"url": "')[1].split('"}')[0];
    return await navigateToUrl(page, messages, url);
 };

 // Handle assistant response and perform appropriate action
 async function handleAssistantResponseSS (page, messages, responseText) {
    if (responseText.includes('{"click": "')) {
        return await handleLinkClick(page, messages, responseText);
    }
    if (responseText.includes('{"url": "')) {
        return await handleUrlNavigation(page, messages, responseText);
    }
    return false;
 };

 const systemMessage = `
 You are a website crawler. You will be given instructions on what to do by browsing.
 You are connected to a web browser and you will be given the screenshot of the website you are on.
 The links on the website will be highlighted in red in the screenshot. Always read exactly what is in the screenshot.
 Don't guess link names.
 You can go to a specific URL by answering with the following JSON format:
 {"url": "url goes here"}
 You can click links on the website by referencing the text inside of the link/button, by answering in the following JSON format:
 {"click": "Text in link"}
 Once you are on a URL and you have found the answer to the user's question, you can answer with a regular message.
 In the beginning, go directly to URL that you think might contain the answer to the user's question.
 Prefer to go directly to sub-urls like 'https://google.com/search?q=search' if possible.
 Prefer to use Google for simple queries.
 If the user message provides a direct URL, always answer by going to that one instead.`
	import puppeteer from 'puppeteer-extra';
	import StealthPlugin from 'puppeteer-extra-plugin-stealth';
	import OpenAI from 'openai';
	import readline from 'readline';
	import fs from 'fs';

	// Configure Puppeteer with StealthPlugin
	puppeteer.use(StealthPlugin());

	// Initialize OpenAI and timeout constant
	const openai = new OpenAI();
	const timeout = 8000;

	// Start the main function
	main();

	function async main() {
	console.log("###########################################");
	console.log("# GPT4V-Browsing by Unconventional Coding #");
	console.log("###########################################\n");
	const browser = await puppeteer.launch({ headless: "new" });
	const page = await browser.newPage();
	await page.setViewport({ width: 1200, height: 1200, deviceScaleFactor: 1.75 });

	let messages = [{ "role": "system", "content": systemMessage }];
	console.log("GPT: How can I assist you today?");
	let userPrompt = "";
	while (true) {
	// Decide which user prompt to provide, text or screenshot
	if (!userPrompt) {
	userPrompt = await input("You: ");
	console.log();
	messages.push({ "role": "user", "content": userPrompt });
	}
	else {
	const base64Image = await imageToBase64("screenshot.jpg");
	messages.push({
	"role": "user",
	"content": [
	{ "type": "image_url", "image_url": base64Image },
	{ "type": "text", "text": `Here's the screenshot of the website you are on right now.
	You can click on links with {"click": "Link text"}.
	Or you can crawl to another URL if this one is incorrect with {"url": "url goes here"}.
	If you find the answer to the user's question, you can respond normally.`
	}
	]
	});
	}

	const response = await openai.chat.completions.create({
	model: "gpt-4-vision-preview",
	max_tokens: 1024,
	messages: messages,
	});

	const responseText = response.choices[0].message.content;
	messages.push({ "role": "assistant", "content": responseText });
	console.log("GPT: " + responseText);

	screenShotOf = await handleAssistantResponseSS(page, messages, responseText)
	if (!screenShotOf) {
	// Then the LLM gave an answer, logged above.
	// If you want to start clean:
	// messages = [{ "role": "system", "content": systemMessage }];
	// console.log("GPT: How can I assist you today?");
	// You'll need to provide a text prompt on next loop
	userPrompt = "";
	}
	}
	};

	// Convert image to base64 format
	async function imageToBase64(imageFile) {
	try {
	const data = await fs.promises.readFile(imageFile);
	const base64Data = data.toString('base64');
	return `data:image/jpeg;base64,${base64Data}`;
	} catch (error) {
	console.error('Error reading the file:', error);
	throw error;
	}
	};

	// Prompt user for input
	async function input(text) {
	const rl = readline.createInterface({
	input: process.stdin,
	output: process.stdout
	});
	return new Promise(resolve => {
	rl.question(text, (prompt) => {
	rl.close();
	resolve(prompt);
	});
	});
	};

	// Sleep function to introduce delay
	const sleep = (milliseconds) => new Promise(resolve => setTimeout(resolve, milliseconds));

	// Remove attribute from DOM element
	const removeAttribute = (element, attributeName) => {
	element.removeAttribute(attributeName);
	};

	// Check if element is visible on the page
	const isElementVisible = (el) => {
	if (!el) return false;

	function isStyleVisible(el) {
	const style = window.getComputedStyle(el);
	return style.width !== '0' &&
	style.height !== '0' &&
	style.opacity !== '0' &&
	style.display !== 'none' &&
	style.visibility !== 'hidden';
	}

	function isElementInViewport(el) {
	const rect = el.getBoundingClientRect();
	return (
	rect.top >= 0 &&
	rect.left >= 0 &&
	rect.bottom <= (window.innerHeight \|\| document.documentElement.clientHeight) &&
	rect.right <= (window.innerWidth \|\| document.documentElement.clientWidth)
	);
	}

	// Check if the element is visible style-wise
	if (!isStyleVisible(el)) {
	return false;
	}

	// Traverse up the DOM and check if any ancestor element is hidden
	let parent = el;
	while (parent) {
	if (!isStyleVisible(parent)) return false;
	parent = parent.parentElement;
	}

	// Finally, check if the element is within the viewport
	return isElementInViewport(el);
	};

	// Highlight visible links on the page
	async function highlightElement (page, element) {
	await page.evaluate(element => {
	element.style.border = "1px solid red";
	const position = element.getBoundingClientRect();
	if (position.width > 5 && position.height > 5 && isElementVisible(element)) {
	const linkText = element.textContent.replace(/[^a-zA-Z0-9 ]/g, '');
	element.setAttribute("gpt-link-text", linkText);
	}
	}, element);
	};

	// Click on the specified link on the page
	async function clickElement (page, linkText) {
	const elements = await page.$$('[gpt-link-text]');
	let partial, exact;
	for (const element of elements) {
	const attributeValue = await element.getAttribute('gpt-link-text');
	if (attributeValue === linkText) {
	exact = element;
	break;
	}
	if (attributeValue.includes(linkText)) {
	partial = element;
	}
	}
	if (exact) {
	await exact.click();
	} else if (partial) {
	await partial.click();
	} else {
	throw new Error("Can't find link");
	}
	};

	// Handle click event on link specified in the message
	async function handleLinkClickSS(page, messages, messageText) {
	const linkText = messageText.split('{"click": "')[1].split('"}')[0].replace(/[^a-zA-Z0-9 ]/g, '');
	console.log("Clicking on " + linkText);
	try {
	await clickElement(page, linkText);
	await Promise.race([waitForEvent(page, 'load'), sleep(timeout)]);
	await highlightLinks(page);
	await page.screenshot({ path: "screenshot.jpg", quality: 100 });
	return linkText;
	} catch (error) {
	console.log("ERROR: Clicking failed");
	messages.push({ "role": "user", "content": "ERROR: I was unable to click that element" });
	return false;
	}
	};

	// Navigate to the specified URL
	async function navigateToUrl (page, messages, url) {
	console.log("Crawling " + url);
	await page.goto(url, { waitUntil: "domcontentloaded" });
	await highlightLinks(page);
	await Promise.race([waitForEvent(page, 'load'), sleep(timeout)]);
	await highlightLinks(page);
	await page.screenshot({ path: "screenshot.jpg", quality: 100 });
	return url;
	};

	// Handle navigation to URL specified in the message
	async function handleUrlNavigationSS(page, messages, messageText) {
	const url = messageText.split('{"url": "')[1].split('"}')[0];
	return await navigateToUrl(page, messages, url);
	};

	// Handle assistant response and perform appropriate action
	async function handleAssistantResponseSS (page, messages, responseText) {
	if (responseText.includes('{"click": "')) {
	return await handleLinkClick(page, messages, responseText);
	}
	if (responseText.includes('{"url": "')) {
	return await handleUrlNavigation(page, messages, responseText);
	}
	return false;
	};

	const systemMessage = `
	You are a website crawler. You will be given instructions on what to do by browsing.
	You are connected to a web browser and you will be given the screenshot of the website you are on.
	The links on the website will be highlighted in red in the screenshot. Always read exactly what is in the screenshot.
	Don't guess link names.
	You can go to a specific URL by answering with the following JSON format:
	{"url": "url goes here"}
	You can click links on the website by referencing the text inside of the link/button, by answering in the following JSON format:
	{"click": "Text in link"}
	Once you are on a URL and you have found the answer to the user's question, you can answer with a regular message.
	In the beginning, go directly to URL that you think might contain the answer to the user's question.
	Prefer to go directly to sub-urls like 'https://google.com/search?q=search' if possible.
	Prefer to use Google for simple queries.
	If the user message provides a direct URL, always answer by going to that one instead.`