Last active
March 21, 2025 11:45
-
-
Save natzir/c36e770205858a2f6922ed90d1226a7c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
javascript:(function(){const MAX_RETRIES=3,RETRY_DELAY=2e3,TIMEOUT_MS=3e5;let url=window.location.origin,domain=new URL(url).hostname,archiveAPI="https://web.archive.org/web/timemap/json?url="+encodeURIComponent(url)+"&matchType=prefix&collapse=urlkey&output=json&fl=original,mimetype,timestamp,groupcount&filter=statuscode:200&mimetype:text/html",proxyOptions=["https://api.allorigins.win/get?disableCache=true&url="+encodeURIComponent(archiveAPI),"https://corsproxy.io/?"+encodeURIComponent(archiveAPI),"https://cors-proxy.taskcluster.net/"+encodeURIComponent(archiveAPI)],loadingMessage=document.createElement("div");function updateLoadingMessage(e){loadingMessage.innerHTML=e}function removeLoadingMessage(){document.body.contains(loadingMessage)&&document.body.removeChild(loadingMessage)}function fetchWithTimeout(e,t={},a=TIMEOUT_MS){return Promise.race([fetch(e,t),new Promise((e,t)=>setTimeout(()=>t(new Error("Request timeout - Wayback Machine might be busy")),a))])}async function tryProxies(e=0,t=0){if(t>=proxyOptions.length&&(t=0,++e),e>=MAX_RETRIES)throw new Error("Maximum number of retries exceeded. The Wayback Machine may be experiencing high traffic.");const a="Please wait: Contacting Wayback Machine...<br><br>This might take several minutes. The Wayback Machine servers can be slow at times.<br><br>Don%27t close this tab.",r=`<br><small>Attempt ${e+1}/${MAX_RETRIES} using proxy ${t+1}/${proxyOptions.length}</small>`;updateLoadingMessage(a+r);try{const n=proxyOptions[t];updateLoadingMessage(a+"<br><small>Sending request and waiting for response...</small>"+r);const i=await fetchWithTimeout(n);if(!i.ok)throw new Error(`HTTP error! status: ${i.status}`);updateLoadingMessage(a+"<br><small>Processing response data...</small>"+r);const s=await i.json();let o;if(n.includes("allorigins.win")){if(!s.contents)throw new Error("Empty response from proxy");o=JSON.parse(s.contents)}else o=s;return o}catch(n){console.warn(`Error with proxy ${t+1}:`,n);const i=a+`<br><small>Connection issue. Waiting to retry...</small>${r}`;return updateLoadingMessage(i),await new Promise(e=>setTimeout(e,RETRY_DELAY)),tryProxies(e,t+1)}}loadingMessage.innerText="Please wait: Connecting to Wayback Machine...\nThis might take a few minutes. Don%27t close this tab.",loadingMessage.style.position="fixed",loadingMessage.style.top="10px",loadingMessage.style.left="50%",loadingMessage.style.transform="translateX(-50%)",loadingMessage.style.backgroundColor="#000",loadingMessage.style.color="#fff",loadingMessage.style.padding="15px 20px",loadingMessage.style.borderRadius="5px",loadingMessage.style.zIndex="9999",loadingMessage.style.textAlign="center",loadingMessage.style.maxWidth="400px",loadingMessage.style.lineHeight="1.5",loadingMessage.style.fontFamily="Arial, sans-serif",document.body.appendChild(loadingMessage),tryProxies().then(e=>{if(removeLoadingMessage(),!Array.isArray(e)||e.length<2)return void alert("No archived URLs found on Archive.org for this domain. The site might not be archived or might be using robots.txt to prevent archiving.");updateLoadingMessage("Success! Preparing CSV file with archived URLs...");let t="Domain,URL,MIME Type,Timestamp,Group Count\n",a=0;e.slice(1).forEach(e=>{"text/html"===e[1]&&(t+=%60"${domain}","${e[0]}","${e[1]}","${e[2]}","${e[3]}"\n%60,a++)});let r=new Blob([t],{type:"text/csv"}),n=document.createElement("a");n.href=URL.createObjectURL(r),n.download="archive_urls_"+domain+".csv",document.body.appendChild(n),n.click(),document.body.removeChild(n),alert(%60Success! Downloaded ${a} archived URLs for ${domain}.\n\nThe CSV file contains all HTML pages that Wayback Machine has archived for this domain.%60)}).catch(e=>{removeLoadingMessage(),console.error("Error fetching data from Archive.org:",e),alert("Error: "+e.message+"\n\nYou can try accessing the Wayback Machine directly at:\nhttps://web.archive.org/web/*/"+url)})})(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Wayback Machine URL Extractor Bookmarklet
This bookmarklet extracts archived URLs from the Wayback Machine for any website you're visiting.
What It Does
How to Use
Tips