Skip to content

Instantly share code, notes, and snippets.

@mgng
Created November 12, 2009 07:30
Show Gist options
  • Save mgng/232689 to your computer and use it in GitHub Desktop.
Save mgng/232689 to your computer and use it in GitHub Desktop.
選択範囲を圧縮して書き換えるやつ
var DocCompRep = {
isMSIE : /*@cc_on!@*/false,
doIE : function(str){
var range = document.selection.createRange();
try{range.pasteHTML(str);}catch(e){}
},
doFF : function(str){
var w = window;
var d = document;
var frg = d.createDocumentFragment();
var div = d.createElement('div');
div.innerHTML = str;
while (div.firstChild){
frg.appendChild(div.firstChild);
}
var sel = w.getSelection();
var range = sel.getRangeAt(0);
range.deleteContents();
var cont = range.startContainer;
var offset = range.startOffset;
switch (cont.nodeType) {
case 1: // Element node
cont.insertBefore(frg, cont.childNodes[offset]);
break;
case 3: // Text node
var node = cont.splitText(offset);
node.parentNode.insertBefore(frg, node);
break;
}
},
reg1 : new RegExp('[一-龠々〆ヵヶ]+|[ぁ-ん]+|[ァ-ヴー]+|[\uFF65-\uFF9F]+|[a-zA-Z0-9]+|[a-zA-Z0-9]+|[,\.\-\_\~\^\=\@\'、。!!?\?・]+|(.*?)|「.*?」|『.*?』|【.*?】|〈.*?〉|\(.*?\)|\[.*?\]|\{.*?\}|\<.*?\>|[\s ]+', 'g'),
reg2 : new RegExp('(でなければ|そういえば|そういや|どうにも|こうにも|について|そりゃ|ぐらい|くらい|ながら|ならば|までを|までの|なのか|として|です|ます|つつ|だに|まで|とは|とて|なら|から|まで|して|だけ|より|にて|ほど|など|って|では)', 'g'),
trim : function(str){return str.replace(/^[\s ]+|[\s ]+$/g, '');},
morpheme : function(str){
var s=str.replace(this.reg2, "$1|"),a=s.split("|"),rt=[],i=0,l=a.length;
for (; i<l; i++) {
var ts=a[i].match(this.reg1);
if(ts){
for(var n=0,tl=ts.length; n<tl; n++){
var t=this.trim(ts[n]);
if(t!==''){rt.push(t);}
}
}
}
return rt;
},
markov : function(s, c_len){
s=this.trim(s);
c_len=c_len||2;
if(s==''){return '';}
var s_ls = this.morpheme(s);
var s_ls_ln = s_ls.length;
var c_idx = 0;
var rt = [];
var lw = '';
var i=0,j=0;
if(s_ls_ln <= c_len){return s_ls.join('');}
s_ls.push(false);
s_ls_ln++;
for(i=0; i<c_len; i++) {
rt.push(s_ls[i]);
lw=s_ls[i];
}
for(i=0; i<s_ls_ln; i++) {
var idxs = [];
for(j=c_idx; j<s_ls_ln; j++) {
if (s_ls[j] == lw && s_ls[j+1]) {
idxs.push(j+1);
}
}
var idx_ln = idxs.length;
if (idx_ln == 0) {
return rt.join('');
}
c_idx=(idx_ln<=3)?idxs[idx_ln-1]:idxs[Math.ceil(idx_ln/2)-1];
for(j=c_idx; j<c_idx+c_len; j++) {
if (s_ls[j] === false) {return rt.join('');}
rt.push(s_ls[j]);
lw=s_ls[j];
}
}
return rt.join('');
},
init:function(){
var d=document,w=window,b=d.createElement('input');
b.type='button';
b.value='圧縮';
b.style.zIndex=9999;
b.style.position='fixed';
b.style.top='2em';
b.style.left='50%';
b.style.display='block';
b.onclick=function(){DocCompRep.run();};
d.body.appendChild(b);
},
run : function(){
var s=(window.getSelection?window.getSelection():document.selection.createRange().text)+'';
s = this.markov(s);
if (s == '') {return;}
(this.isMSIE) ? this.doIE(s) : this.doFF(s);
}
};
DocCompRep.init();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment