/** * Scrapes web content using specified selectors, handling multiple results * @param {string} url The URL to scrape * @param {string} selectors CSS selectors (comma separated) * @param {string} apiUrl API endpoint URL incl. /scrape and ?token= if set * @param {number} timeout (Optional) Timeout in milliseconds * @param {boolean} noHeaders (Optional) If true, only returns values without selectors * @return {Array} Scraped content * @customfunction */ function WEBSCRAPE(url, selectors, apiUrl, timeout, noHeaders = false) { const DEFAULT_API_URL = "YOUR_DEFAULT_API_URL"; const DEFAULT_TIMEOUT = 30000; try { const finalApiUrl = apiUrl || DEFAULT_API_URL; const finalTimeout = parseInt(timeout) || DEFAULT_TIMEOUT; const results = wScrape(url, finalApiUrl, selectors, finalTimeout); const flattenedResults = results.flatMap(item => { const texts = item.results.map(result => result.text); return texts.map(text => [item.selector, text]); }); if (noHeaders) { return flattenedResults.map(row => row[1]); } return flattenedResults; } catch (error) { return `Error: ${error.message}`; } } function wScrape(url, apiURL, selectors, timeout) { if (!url) throw new Error("URL is required"); if (!selectors) throw new Error("Selectors are required"); const selectorArray = selectors.split(',') .map(selector => selector.trim()) .filter(selector => selector) .map(selector => ({ "selector": selector })); if (selectorArray.length === 0) { throw new Error("At least one valid selector is required"); } const payload = { "url": url, "elements": selectorArray, "gotoOptions": { "timeout": timeout || 30000 } }; const options = { method: "post", headers: { "Content-Type": "application/json" }, payload: JSON.stringify(payload), muteHttpExceptions: true }; try { const response = UrlFetchApp.fetch(apiURL, options); const responseCode = response.getResponseCode(); if (responseCode !== 200) { throw new Error(`API returned status code ${responseCode}`); } const resp = JSON.parse(response.getContentText()); if (!resp.data || !Array.isArray(resp.data)) { throw new Error('Invalid API response structure'); } return resp.data; } catch (error) { throw error; } }