Skip to content

Instantly share code, notes, and snippets.

@TheMapSmith
Created October 1, 2025 21:22
Show Gist options
  • Save TheMapSmith/f2bdf598f5c0966da9ab1fb42ffff172 to your computer and use it in GitHub Desktop.
Save TheMapSmith/f2bdf598f5c0966da9ab1fb42ffff172 to your computer and use it in GitHub Desktop.
Instacart Scraper

Add unpacked extension in Chrome

let state = {
status: 'idle', // idle, loading, waiting
queue: [],
currentIndex: 0,
downloadCount: 0,
countdown: 0,
loadStartTime: null,
processingTabId: null,
xhrFilter: '',
downloadFolder: '',
sessionFolder: ''
};
let countdownInterval = null;
chrome.runtime.onMessage.addListener((request, sender, sendResponse) => {
console.log('Background received message:', request.action);
if (request.action === 'startQueue') {
console.log('Starting queue with URLs:', request.urls);
console.log('XHR Filter:', request.xhrFilter);
console.log('Download Folder:', request.downloadFolder);
startQueue(request.urls, request.xhrFilter, request.downloadFolder);
sendResponse({ success: true });
} else if (request.action === 'stopQueue') {
stopQueue();
sendResponse({ success: true });
} else if (request.action === 'getStatus') {
sendResponse(state);
} else if (request.action === 'clearAll') {
clearAll();
sendResponse({ success: true });
} else if (request.action === 'xhrCaptured') {
console.log('XHR captured from tab:', sender.tab?.id, 'Data:', request.data);
handleXhrCapture(request.data, sender.tab);
sendResponse({ success: true });
} else if (request.action === 'pageLoaded') {
console.log('Page loaded in tab:', sender.tab?.id);
handlePageLoaded(sender.tab);
sendResponse({ success: true });
} else if (request.action === 'initiateDownload') {
console.log('Initiating download from content script');
chrome.downloads.download({
url: request.url,
filename: request.filename,
saveAs: false
}).then(downloadId => {
console.log('Download started with ID:', downloadId);
sendResponse({ success: true, downloadId: downloadId });
}).catch(err => {
console.error('Download failed:', err);
sendResponse({ success: false, error: err.message });
});
return true; // Keep channel open for async response
}
return true;
});
function startQueue(urls, xhrFilter = '', downloadFolder = '') {
console.log('startQueue called with', urls.length, 'URLs');
state.queue = urls.map(url => ({ url, completed: false }));
state.currentIndex = 0;
state.status = 'loading';
state.downloadCount = 0;
state.xhrFilter = xhrFilter;
state.downloadFolder = downloadFolder || 'xhr_captures';
// Create session folder with timestamp
const timestamp = new Date().toISOString().replace(/[:.]/g, '-').slice(0, -5);
state.sessionFolder = `${state.downloadFolder}/${timestamp}`;
console.log('Session folder:', state.sessionFolder);
console.log('Queue initialized, starting processNext()');
// Send filter to content scripts
if (state.processingTabId) {
chrome.tabs.sendMessage(state.processingTabId, {
action: 'setFilter',
filter: xhrFilter
}).catch(err => console.log('Could not send filter to existing tab:', err));
}
processNext();
}
function stopQueue() {
state.status = 'idle';
if (countdownInterval) {
clearInterval(countdownInterval);
countdownInterval = null;
}
if (state.processingTabId) {
chrome.tabs.remove(state.processingTabId).catch(() => {});
state.processingTabId = null;
}
}
function clearAll() {
stopQueue();
state.queue = [];
state.currentIndex = 0;
state.downloadCount = 0;
state.countdown = 0;
}
async function processNext() {
console.log('processNext called, currentIndex:', state.currentIndex, 'queue length:', state.queue.length);
if (state.currentIndex >= state.queue.length) {
console.log('Queue complete!');
state.status = 'idle';
state.processingTabId = null;
return;
}
const currentUrl = state.queue[state.currentIndex].url;
console.log('Loading URL:', currentUrl);
state.status = 'loading';
state.loadStartTime = Date.now();
try {
// Create or update tab
if (state.processingTabId) {
console.log('Updating existing tab:', state.processingTabId);
await chrome.tabs.update(state.processingTabId, { url: currentUrl });
// Send filter after navigation
setTimeout(() => {
chrome.tabs.sendMessage(state.processingTabId, {
action: 'setFilter',
filter: state.xhrFilter
}).catch(err => console.log('Could not send filter:', err));
}, 500);
} else {
console.log('Creating new tab');
const tab = await chrome.tabs.create({ url: currentUrl, active: false });
state.processingTabId = tab.id;
console.log('Tab created with ID:', tab.id);
// Send filter after tab is ready
setTimeout(() => {
chrome.tabs.sendMessage(state.processingTabId, {
action: 'setFilter',
filter: state.xhrFilter
}).catch(err => console.log('Could not send filter:', err));
}, 1000);
}
} catch (error) {
console.error('Error loading page:', error);
markCurrentComplete();
state.currentIndex++;
processNext();
}
}
function extractOrderId(xhrUrl) {
try {
// Try to extract orderId from variables parameter
const urlObj = new URL(xhrUrl);
const variables = urlObj.searchParams.get('variables');
if (variables) {
const parsed = JSON.parse(variables);
if (parsed.orderId) {
return parsed.orderId;
}
}
} catch (e) {
console.log('Could not extract orderId from URL:', e);
}
// Fallback to timestamp if no orderId found
return `capture_${Date.now()}`;
}
function handleXhrCapture(data, tab) {
if (tab.id !== state.processingTabId) return;
// Check if this XHR matches the filter
if (state.xhrFilter && !data.url.includes(state.xhrFilter)) {
console.log('XHR does not match filter, ignoring:', data.url);
return;
}
const currentUrl = state.queue[state.currentIndex]?.url;
console.log('Capturing XHR for:', currentUrl);
// Extract order ID from the XHR URL
const orderId = extractOrderId(data.url);
console.log('Extracted order ID:', orderId);
// Create JSON data
const captureData = {
pageUrl: currentUrl,
xhrUrl: data.url,
timestamp: new Date().toISOString(),
orderId: orderId,
method: data.method,
status: data.status,
response: data.response
};
const jsonStr = JSON.stringify(captureData, null, 2);
// Send to content script to handle download (content scripts can create blobs)
chrome.tabs.sendMessage(tab.id, {
action: 'downloadFile',
filename: `${state.sessionFolder}/${orderId}.json`,
content: jsonStr
}).then(() => {
console.log('Download initiated for:', orderId);
state.downloadCount++;
// Immediately move to next page after first capture
console.log('First capture complete, moving to next URL');
markCurrentComplete();
state.currentIndex++;
if (state.currentIndex < state.queue.length) {
// Small delay before loading next page
state.status = 'waiting';
state.countdown = 2;
countdownInterval = setInterval(() => {
state.countdown--;
if (state.countdown <= 0) {
clearInterval(countdownInterval);
countdownInterval = null;
processNext();
}
}, 1000);
} else {
state.status = 'idle';
state.processingTabId = null;
console.log('Queue complete!');
}
}).catch(err => {
console.error('Failed to send download message:', err);
});
}
function handlePageLoaded(tab) {
if (tab.id !== state.processingTabId) return;
if (state.status !== 'loading') return;
console.log('Page loaded, but waiting for XHR capture...');
// Set a timeout in case no XHR is captured
// If no capture after 15 seconds, move to next URL
setTimeout(() => {
// Check if we've already moved on
if (state.status === 'loading' && state.currentIndex < state.queue.length) {
const currentUrl = state.queue[state.currentIndex]?.url;
const captured = state.captures.some(c => c.url === currentUrl);
if (!captured) {
console.log('No XHR captured after 15 seconds, moving to next URL');
markCurrentComplete();
state.currentIndex++;
if (state.currentIndex < state.queue.length) {
state.status = 'waiting';
state.countdown = 2;
countdownInterval = setInterval(() => {
state.countdown--;
if (state.countdown <= 0) {
clearInterval(countdownInterval);
countdownInterval = null;
processNext();
}
}, 1000);
} else {
state.status = 'idle';
state.processingTabId = null;
}
}
}
}, 15000);
}
function markCurrentComplete() {
if (state.currentIndex < state.queue.length) {
state.queue[state.currentIndex].completed = true;
}
}
// Content script - bridges between injected script and background
console.log('[Content] Content script loaded');
let xhrFilter = '';
// Inject the interceptor script into the page context
const script = document.createElement('script');
script.src = chrome.runtime.getURL('injected.js');
script.onload = function() {
console.log('[Content] Injected script loaded');
this.remove();
};
(document.head || document.documentElement).appendChild(script);
// Listen for messages from the injected script
window.addEventListener('message', (event) => {
if (event.source !== window) return;
if (event.data.type === 'XHR_CAPTURED') {
console.log('[Content] Received XHR capture from injected script');
// Forward to background script
chrome.runtime.sendMessage({
action: 'xhrCaptured',
data: event.data.data
}).then(() => {
console.log('[Content] Forwarded to background');
}).catch(err => {
console.error('[Content] Failed to forward to background:', err);
});
}
});
// Listen for filter updates and download requests from background
chrome.runtime.onMessage.addListener((request, sender, sendResponse) => {
if (request.action === 'setFilter') {
xhrFilter = request.filter;
console.log('[Content] Filter updated to:', xhrFilter || '(none)');
// Send filter to injected script
window.postMessage({
type: 'XHR_FILTER_UPDATE',
filter: xhrFilter
}, '*');
sendResponse({ success: true });
} else if (request.action === 'downloadFile') {
console.log('[Content] Handling download:', request.filename);
// Create blob and download in content script context
const blob = new Blob([request.content], { type: 'application/json' });
const url = URL.createObjectURL(blob);
chrome.runtime.sendMessage({
action: 'initiateDownload',
url: url,
filename: request.filename
}).then(() => {
// Clean up after a delay
setTimeout(() => URL.revokeObjectURL(url), 1000);
sendResponse({ success: true });
}).catch(err => {
console.error('[Content] Download failed:', err);
sendResponse({ success: false, error: err.message });
});
return true; // Keep channel open for async response
}
return true;
});
// Notify when page is fully loaded
window.addEventListener('load', () => {
console.log('[Content] Page load event fired');
setTimeout(() => {
console.log('[Content] Sending pageLoaded message to background');
chrome.runtime.sendMessage({ action: 'pageLoaded' }).catch(err => {
console.error('[Content] Failed to send pageLoaded:', err);
});
}, 2000); // Wait 2 seconds to ensure XHR requests complete
});
console.log('[Content] Content script initialization complete');
[...document.querySelectorAll('a[href^="/store/orders/"]')]
.map(a => a.href);
// This script runs in the page's context to intercept XHR/fetch BEFORE they happen
(function() {
console.log('[Injected] XHR interceptor loaded in page context');
let xhrFilter = '';
// Listen for filter updates from content script
window.addEventListener('message', (event) => {
if (event.source !== window) return;
if (event.data.type === 'XHR_FILTER_UPDATE') {
xhrFilter = event.data.filter;
console.log('[Injected] Filter updated to:', xhrFilter || '(none)');
}
});
function shouldCapture(url) {
if (!xhrFilter) {
console.log('[Injected] No filter, capturing:', url);
return true;
}
const matches = url.includes(xhrFilter);
console.log('[Injected] Filter check:', matches, 'for', url);
return matches;
}
// Intercept XMLHttpRequest
const originalOpen = XMLHttpRequest.prototype.open;
const originalSend = XMLHttpRequest.prototype.send;
XMLHttpRequest.prototype.open = function(method, url, ...args) {
this._method = method;
this._url = url;
console.log('[Injected] XHR open:', method, url);
return originalOpen.apply(this, [method, url, ...args]);
};
XMLHttpRequest.prototype.send = function(...args) {
const xhr = this;
this.addEventListener('load', function() {
console.log('[Injected] XHR completed:', xhr._method, xhr._url, 'status:', this.status);
if (!shouldCapture(xhr._url)) {
console.log('[Injected] Skipping - does not match filter');
return;
}
try {
const contentType = this.getResponseHeader('content-type');
console.log('[Injected] Content-Type:', contentType);
if (contentType && contentType.includes('application/json')) {
let responseData;
try {
responseData = JSON.parse(this.responseText);
console.log('[Injected] Parsed JSON response');
} catch (e) {
responseData = this.responseText;
console.log('[Injected] Could not parse as JSON, using raw text');
}
console.log('[Injected] Posting capture to content script');
window.postMessage({
type: 'XHR_CAPTURED',
data: {
method: xhr._method,
url: xhr._url,
status: this.status,
response: responseData
}
}, '*');
} else {
console.log('[Injected] Not JSON, skipping');
}
} catch (error) {
console.error('[Injected] Error capturing XHR:', error);
}
});
return originalSend.apply(this, args);
};
// Intercept fetch
const originalFetch = window.fetch;
window.fetch = async function(...args) {
const url = typeof args[0] === 'string' ? args[0] : args[0].url;
console.log('[Injected] Fetch initiated:', url);
const response = await originalFetch.apply(this, args);
if (!shouldCapture(url)) {
console.log('[Injected] Skipping fetch - does not match filter');
return response;
}
const clonedResponse = response.clone();
try {
const contentType = clonedResponse.headers.get('content-type');
console.log('[Injected] Fetch Content-Type:', contentType);
if (contentType && contentType.includes('application/json')) {
const data = await clonedResponse.json();
console.log('[Injected] Fetch JSON parsed');
window.postMessage({
type: 'XHR_CAPTURED',
data: {
method: args[1]?.method || 'GET',
url: url,
status: clonedResponse.status,
response: data
}
}, '*');
} else {
console.log('[Injected] Not JSON, skipping fetch');
}
} catch (error) {
console.error('[Injected] Error capturing fetch:', error);
}
return response;
};
console.log('[Injected] XHR/Fetch interception active');
})();
{
"manifest_version": 3,
"name": "XHR Queue Browser",
"version": "1.0",
"description": "Automatically browse URLs and capture XHR responses",
"permissions": [
"activeTab",
"tabs",
"storage",
"downloads",
"debugger"
],
"host_permissions": [
"<all_urls>"
],
"background": {
"service_worker": "background.js"
},
"action": {
"default_popup": "popup.html"
},
"content_scripts": [
{
"matches": ["<all_urls>"],
"js": ["content.js"],
"run_at": "document_start"
}
],
"web_accessible_resources": [
{
"resources": ["injected.js"],
"matches": ["<all_urls>"]
}
],
"commands": {
"_execute_action": {
"suggested_key": {
"default": "Ctrl+Shift+Y"
}
}
}
}
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<style>
body {
width: 450px;
padding: 15px;
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Arial, sans-serif;
font-size: 13px;
}
body.fullpage {
width: 800px;
max-width: 90%;
margin: 20px auto;
}
h2 {
margin: 0 0 15px 0;
font-size: 16px;
}
.open-tab-btn {
background: #9C27B0;
color: white;
padding: 10px 20px;
border: none;
border-radius: 4px;
cursor: pointer;
font-size: 14px;
margin-bottom: 15px;
display: block;
width: 100%;
}
.open-tab-btn:hover {
background: #7B1FA2;
}
textarea {
width: 100%;
height: 120px;
margin-bottom: 10px;
padding: 8px;
font-family: monospace;
font-size: 12px;
border: 1px solid #ccc;
border-radius: 4px;
box-sizing: border-box;
}
button {
padding: 8px 16px;
margin-right: 8px;
background: #4CAF50;
color: white;
border: none;
border-radius: 4px;
cursor: pointer;
font-size: 13px;
}
button:hover {
background: #45a049;
}
button:disabled {
background: #ccc;
cursor: not-allowed;
}
.stop-btn {
background: #f44336;
}
.stop-btn:hover {
background: #da190b;
}
.export-btn {
background: #2196F3;
}
.export-btn:hover {
background: #0b7dda;
}
.clear-btn {
background: #ff9800;
}
.clear-btn:hover {
background: #e68900;
}
.status {
margin: 15px 0;
padding: 10px;
background: #f5f5f5;
border-radius: 4px;
}
.countdown {
font-size: 24px;
font-weight: bold;
color: #4CAF50;
text-align: center;
margin: 10px 0;
}
.progress {
margin: 10px 0;
}
.url-list {
max-height: 200px;
overflow-y: auto;
margin: 10px 0;
border: 1px solid #ddd;
border-radius: 4px;
background: white;
}
.url-item {
padding: 6px 10px;
border-bottom: 1px solid #eee;
font-size: 11px;
}
.url-item:last-child {
border-bottom: none;
}
.url-item.completed {
background: #e8f5e9;
color: #2e7d32;
}
.url-item.pending {
color: #666;
}
.url-item.active {
background: #fff3cd;
font-weight: bold;
}
</style>
</head>
<body>
<h2>XHR Queue Browser</h2>
<button class="open-tab-btn" id="openTabBtn">📌 Open in Full Tab (Stays Open)</button>
<div>
<label><strong>Download Folder Name (optional):</strong></label>
<input type="text" id="downloadFolder" placeholder="e.g., instacart_orders (leave blank for default)" style="width: 100%; padding: 8px; margin-bottom: 10px; border: 1px solid #ccc; border-radius: 4px; box-sizing: border-box; font-size: 12px;">
<small style="color: #666;">Files will be saved to Downloads/[folder]/[timestamp]/orderId.json</small>
</div>
<div style="margin-top: 15px;">
<label><strong>XHR URL Filter (optional):</strong></label>
<input type="text" id="xhrFilter" placeholder="e.g., graphql=PostCheckoutOrderDelivery" style="width: 100%; padding: 8px; margin-bottom: 10px; border: 1px solid #ccc; border-radius: 4px; box-sizing: border-box; font-size: 12px;">
<small style="color: #666;">Only capture XHR requests whose URL contains this text. Leave blank to capture all.</small>
</div>
<div style="margin-top: 15px;">
<label><strong>URLs to visit (one per line):</strong></label>
<textarea id="urlInput" placeholder="https://example.com/page1&#10;https://example.com/page2&#10;https://example.com/page3"></textarea>
</div>
<div>
<button id="startBtn">Start Queue</button>
<button id="stopBtn" class="stop-btn" disabled>Stop</button>
<button id="clearBtn" class="clear-btn">Clear Queue</button>
</div>
<div class="status">
<div><strong>Status:</strong> <span id="status">Idle</span></div>
<div class="progress">
<strong>Progress:</strong> <span id="progress">0/0</span>
</div>
<div><strong>Downloaded:</strong> <span id="captured">0 files</span></div>
<div class="countdown" id="countdown"></div>
</div>
<div>
<strong>Queue:</strong>
<div class="url-list" id="urlList"></div>
</div>
<script src="popup.js"></script>
</body>
</html>
// Check if this is the full-page version
const isFullPage = window.location.search.includes('fullpage=true');
if (isFullPage) {
document.body.classList.add('fullpage');
document.getElementById('openTabBtn').style.display = 'none';
}
// Open in full tab button
document.getElementById('openTabBtn').addEventListener('click', () => {
const url = chrome.runtime.getURL('popup.html?fullpage=true');
chrome.tabs.create({ url: url });
});
let countdownInterval = null;
document.getElementById('startBtn').addEventListener('click', async () => {
console.log('Start button clicked');
const urlText = document.getElementById('urlInput').value.trim();
console.log('URL text:', urlText);
if (!urlText) {
alert('Please enter at least one URL');
return;
}
const urls = urlText.split('\n').filter(u => u.trim()).map(u => u.trim());
const xhrFilter = document.getElementById('xhrFilter').value.trim();
const downloadFolder = document.getElementById('downloadFolder').value.trim();
console.log('Starting queue with:', { urls, xhrFilter, downloadFolder });
try {
const response = await chrome.runtime.sendMessage({
action: 'startQueue',
urls: urls,
xhrFilter: xhrFilter,
downloadFolder: downloadFolder
});
console.log('Start queue response:', response);
} catch (error) {
console.error('Error starting queue:', error);
alert('Error starting queue: ' + error.message);
}
updateUI();
});
document.getElementById('stopBtn').addEventListener('click', async () => {
await chrome.runtime.sendMessage({ action: 'stopQueue' });
updateUI();
});
document.getElementById('clearBtn').addEventListener('click', async () => {
if (confirm('Clear queue?')) {
await chrome.runtime.sendMessage({ action: 'clearAll' });
document.getElementById('urlInput').value = '';
updateUI();
}
});
async function updateUI() {
const response = await chrome.runtime.sendMessage({ action: 'getStatus' });
const { status, queue, currentIndex, downloadCount, countdown, xhrFilter } = response;
document.getElementById('status').textContent = status;
document.getElementById('progress').textContent = `${currentIndex}/${queue.length}`;
document.getElementById('captured').textContent = `${downloadCount} files`;
document.getElementById('startBtn').disabled = status !== 'idle';
document.getElementById('stopBtn').disabled = status === 'idle';
// Update countdown
if (countdown > 0 && status === 'waiting') {
document.getElementById('countdown').textContent = `Next page in ${countdown}s`;
} else if (status === 'loading') {
document.getElementById('countdown').textContent = 'Loading page...';
} else {
document.getElementById('countdown').textContent = '';
}
// Update URL list
const urlList = document.getElementById('urlList');
urlList.innerHTML = '';
queue.forEach((item, index) => {
const div = document.createElement('div');
div.className = 'url-item';
if (item.completed) {
div.classList.add('completed');
div.textContent = `✓ ${item.url}`;
} else if (index === currentIndex) {
div.classList.add('active');
div.textContent = `→ ${item.url}`;
} else {
div.classList.add('pending');
div.textContent = ` ${item.url}`;
}
urlList.appendChild(div);
});
}
// Update UI every second
setInterval(updateUI, 1000);
// Initial UI update
updateUI();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment