Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Select an option

  • Save billdueber/32c44f71ff474628126753f57ab65934 to your computer and use it in GitHub Desktop.

Select an option

Save billdueber/32c44f71ff474628126753f57ab65934 to your computer and use it in GitHub Desktop.
context-mode output sample
const fs=require('fs');
const t=fs.readFileSync('captures/test/index.html','utf8');
const r=fs.readFileSync('captures/reference/index.html','utf8');
const idxs=[];
for(let i=0;i<t.length;i++) if(t[i]==='�') idxs.push(i);
console.log('replacement count',idxs.length);
for(const i of idxs){
const pre=t.slice(Math.max(0,i-40),i);
const post=t.slice(i+1,i+40);
const marker=pre+post;
let pos=-1;
// try find using trimmed context pieces
for(let k=20;k>=6 && pos<0;k-=2){
const key=t.slice(Math.max(0,i-k),i)+t.slice(i+1,i+1+k);
pos=r.indexOf(key);
}
let expected='?';
if(pos>=0){
// find char in ref between matching halves
// brute around pos
const around=r.slice(Math.max(0,pos-5),pos+90);
// estimate expected at split point: char at pos + leftLen maybe unknown; just inspect nearest non-ascii around there
const near=r.slice(Math.max(0,pos-10),pos+60);
const m=near.match(/[\u0080-\uFFFF]/);
if(m) expected=m[0];
}
console.log('---');
console.log('test ctx',JSON.stringify(pre+'[�]'+post));
if(pos>=0){
console.log('ref ctx ',JSON.stringify(r.slice(Math.max(0,pos-40),pos+80)));
} else {
// fallback by searching pre and post separately
const p=r.indexOf(pre.slice(-20));
const q=r.indexOf(post.slice(0,20));
console.log('match fail',p,q);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment