Last active
March 30, 2025 10:33
-
-
Save journey-ad/77b0516db6e509e520e1d2d3a67279ef to your computer and use it in GitHub Desktop.
搜狗scel词库转txt
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// ==UserScript== | |
// @name 搜狗scel词库转txt | |
// @namespace https://github.com/journey-ad | |
// @version 0.1 | |
// @description 搜狗scel词库转txt并下载 | |
// @author journey-ad | |
// @license MIT | |
// @match *://pinyin.sogou.com/dict/* | |
// @icon https://www.google.com/s2/favicons?domain=pinyin.sogou.com | |
// @grant none | |
// ==/UserScript== | |
(function() { | |
'use strict'; | |
function readUtf16Str(buffer, offset, length) { | |
let dataView = new DataView(buffer, offset, length); | |
let result = ''; | |
for (let i = 0; i < length; i += 2) { | |
let code = dataView.getUint16(i, true); | |
if (code === 0) break; | |
result += String.fromCharCode(code); | |
} | |
return result; | |
} | |
function readUint16(buffer, offset) { | |
return new DataView(buffer, offset, 2).getUint16(0, true); | |
} | |
function getHzOffset(buffer) { | |
let mask = new DataView(buffer, 4, 1).getUint8(0); | |
return mask === 0x44 ? 0x2628 : mask === 0x45 ? 0x26c4 : -1; | |
} | |
function getPyMap(buffer) { | |
let pyMap = {}, offset = 0x1540 + 4; | |
while (offset + 4 <= buffer.byteLength) { | |
let pyIdx = readUint16(buffer, offset); | |
let pyLen = readUint16(buffer, offset + 2); | |
let pyStr = readUtf16Str(buffer, offset + 4, pyLen); | |
pyMap[pyIdx] = pyStr; | |
offset += 4 + pyLen; | |
if (pyStr === "zuo") break; | |
} | |
return pyMap; | |
} | |
function getRecords(buffer, hzOffset, pyMap) { | |
let records = [], offset = hzOffset; | |
while (offset + 4 <= buffer.byteLength) { | |
let wordCount = readUint16(buffer, offset); | |
let pyIdxCount = readUint16(buffer, offset + 2) / 2; | |
offset += 4; | |
let pySet = []; | |
for (let i = 0; i < pyIdxCount; i++) { | |
let pyIdx = readUint16(buffer, offset); | |
pySet.push(pyMap[pyIdx] || ''); | |
offset += 2; | |
} | |
for (let i = 0; i < wordCount; i++) { | |
let wordLen = readUint16(buffer, offset); | |
let wordStr = readUtf16Str(buffer, offset + 2, wordLen); | |
offset += 2 + wordLen + 12; | |
records.push(wordStr); | |
} | |
} | |
return records; | |
} | |
function convertScelToTxt(buffer, filename) { | |
let hzOffset = getHzOffset(buffer); | |
if (hzOffset < 0) return; | |
let pyMap = getPyMap(buffer); | |
let records = getRecords(buffer, hzOffset, pyMap); | |
let output = records.join("\n"); | |
let blob = new Blob([output], { type: "text/plain" }); | |
let link = document.createElement("a"); | |
link.href = URL.createObjectURL(blob); | |
link.download = filename.replace(".scel", ".txt"); | |
document.body.appendChild(link); | |
link.click(); | |
document.body.removeChild(link); | |
} | |
function fetchAndConvertScel(url) { | |
let urlParams = new URLSearchParams(new URL(url).search); | |
let filename = urlParams.get('name') || "dict.scel"; | |
fetch(url).then(res => res.arrayBuffer()).then(buffer => { | |
convertScelToTxt(buffer, filename); | |
}).catch(err => console.error("下载失败", err)); | |
} | |
function addDownloadButton(aTag) { | |
let downloadBtn = document.createElement("a"); | |
downloadBtn.innerText = "下载txt"; | |
downloadBtn.href = "javascript:;"; | |
downloadBtn.style = "position:relative;z-index:99;background:none;height:auto;font-size:14px;color:hotpink;display:inline-flex;justify-content:center;"; | |
downloadBtn.onclick = function(event) { | |
event.preventDefault(); | |
fetchAndConvertScel(aTag.href); | |
}; | |
aTag.parentNode.appendChild(downloadBtn, aTag.prevSibling); | |
} | |
function processLinks() { | |
document.querySelectorAll("a[href^='https://pinyin.sogou.com/d/dict/download_cell.php'],a[href^='//pinyin.sogou.com/d/dict/download_cell.php']").forEach(addDownloadButton); | |
} | |
processLinks(); | |
})(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
script convert from https://github.com/lewangdev/scel2txt