Created
June 29, 2026 13:17
-
-
Save billdueber/32c44f71ff474628126753f57ab65934 to your computer and use it in GitHub Desktop.
context-mode output sample
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| const fs=require('fs'); | |
| const t=fs.readFileSync('captures/test/index.html','utf8'); | |
| const r=fs.readFileSync('captures/reference/index.html','utf8'); | |
| const idxs=[]; | |
| for(let i=0;i<t.length;i++) if(t[i]==='�') idxs.push(i); | |
| console.log('replacement count',idxs.length); | |
| for(const i of idxs){ | |
| const pre=t.slice(Math.max(0,i-40),i); | |
| const post=t.slice(i+1,i+40); | |
| const marker=pre+post; | |
| let pos=-1; | |
| // try find using trimmed context pieces | |
| for(let k=20;k>=6 && pos<0;k-=2){ | |
| const key=t.slice(Math.max(0,i-k),i)+t.slice(i+1,i+1+k); | |
| pos=r.indexOf(key); | |
| } | |
| let expected='?'; | |
| if(pos>=0){ | |
| // find char in ref between matching halves | |
| // brute around pos | |
| const around=r.slice(Math.max(0,pos-5),pos+90); | |
| // estimate expected at split point: char at pos + leftLen maybe unknown; just inspect nearest non-ascii around there | |
| const near=r.slice(Math.max(0,pos-10),pos+60); | |
| const m=near.match(/[\u0080-\uFFFF]/); | |
| if(m) expected=m[0]; | |
| } | |
| console.log('---'); | |
| console.log('test ctx',JSON.stringify(pre+'[�]'+post)); | |
| if(pos>=0){ | |
| console.log('ref ctx ',JSON.stringify(r.slice(Math.max(0,pos-40),pos+80))); | |
| } else { | |
| // fallback by searching pre and post separately | |
| const p=r.indexOf(pre.slice(-20)); | |
| const q=r.indexOf(post.slice(0,20)); | |
| console.log('match fail',p,q); | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment