natzir · March 21, 2025 11:45 · natzir · Mar 19, 2025
diff --git a/archive_org_url_extractor.js b/archive_org_url_extractor.js
 javascript:(function(){const MAX_RETRIES=3,RETRY_DELAY=2e3,TIMEOUT_MS=3e5;let url=window.location.origin,domain=new URL(url).hostname,archiveAPI="https://web.archive.org/web/timemap/json?url="+encodeURIComponent(url)+"&matchType=prefix&collapse=urlkey&output=json&fl=original,mimetype,timestamp,groupcount&filter=statuscode:200&mimetype:text/html",proxyOptions=["https://api.allorigins.win/get?disableCache=true&url="+encodeURIComponent(archiveAPI),"https://corsproxy.io/?"+encodeURIComponent(archiveAPI),"https://cors-proxy.taskcluster.net/"+encodeURIComponent(archiveAPI)],loadingMessage=document.createElement("div");function updateLoadingMessage(e){loadingMessage.innerHTML=e}function removeLoadingMessage(){document.body.contains(loadingMessage)&&document.body.removeChild(loadingMessage)}function fetchWithTimeout(e,t={},a=TIMEOUT_MS){return Promise.race([fetch(e,t),new Promise((e,t)=>setTimeout(()=>t(new Error("Request timeout - Wayback Machine might be busy")),a))])}async function tryProxies(e=0,t=0){if(t>=proxyOptions.length&&(t=0,++e),e>=MAX_RETRIES)throw new Error("Maximum number of retries exceeded. The Wayback Machine may be experiencing high traffic.");const a="Please wait: Contacting Wayback Machine...<br><br>This might take several minutes. The Wayback Machine servers can be slow at times.<br><br>Don%27t close this tab.",r=`<br><small>Attempt ${e+1}/${MAX_RETRIES} using proxy ${t+1}/${proxyOptions.length}</small>`;updateLoadingMessage(a+r);try{const n=proxyOptions[t];updateLoadingMessage(a+"<br><small>Sending request and waiting for response...</small>"+r);const i=await fetchWithTimeout(n);if(!i.ok)throw new Error(`HTTP error! status: ${i.status}`);updateLoadingMessage(a+"<br><small>Processing response data...</small>"+r);const s=await i.json();let o;if(n.includes("allorigins.win")){if(!s.contents)throw new Error("Empty response from proxy");o=JSON.parse(s.contents)}else o=s;return o}catch(n){console.warn(`Error with proxy ${t+1}:`,n);const i=a+`<br><small>Connection issue. Waiting to retry...</small>${r}`;return updateLoadingMessage(i),await new Promise(e=>setTimeout(e,RETRY_DELAY)),tryProxies(e,t+1)}}loadingMessage.innerText="Please wait: Connecting to Wayback Machine...\nThis might take a few minutes. Don%27t close this tab.",loadingMessage.style.position="fixed",loadingMessage.style.top="10px",loadingMessage.style.left="50%",loadingMessage.style.transform="translateX(-50%)",loadingMessage.style.backgroundColor="#000",loadingMessage.style.color="#fff",loadingMessage.style.padding="15px 20px",loadingMessage.style.borderRadius="5px",loadingMessage.style.zIndex="9999",loadingMessage.style.textAlign="center",loadingMessage.style.maxWidth="400px",loadingMessage.style.lineHeight="1.5",loadingMessage.style.fontFamily="Arial, sans-serif",document.body.appendChild(loadingMessage),tryProxies().then(e=>{if(removeLoadingMessage(),!Array.isArray(e)||e.length<2)return void alert("No archived URLs found on Archive.org for this domain. The site might not be archived or might be using robots.txt to prevent archiving.");updateLoadingMessage("Success! Preparing CSV file with archived URLs...");let t="Domain,URL,MIME Type,Timestamp,Group Count\n",a=0;e.slice(1).forEach(e=>{"text/html"===e[1]&&(t+=%60"${domain}","${e[0]}","${e[1]}","${e[2]}","${e[3]}"\n%60,a++)});let r=new Blob([t],{type:"text/csv"}),n=document.createElement("a");n.href=URL.createObjectURL(r),n.download="archive_urls_"+domain+".csv",document.body.appendChild(n),n.click(),document.body.removeChild(n),alert(%60Success! Downloaded ${a} archived URLs for ${domain}.\n\nThe CSV file contains all HTML pages that Wayback Machine has archived for this domain.%60)}).catch(e=>{removeLoadingMessage(),console.error("Error fetching data from Archive.org:",e),alert("Error: "+e.message+"\n\nYou can try accessing the Wayback Machine directly at:\nhttps://web.archive.org/web/*/"+url)})})();