Skip to content

Instantly share code, notes, and snippets.

@sorrycc
Created June 13, 2022 14:40
Show Gist options
  • Save sorrycc/2adc1e97da9884ee58df3e92032ddda3 to your computer and use it in GitHub Desktop.
Save sorrycc/2adc1e97da9884ee58df3e92032ddda3 to your computer and use it in GitHub Desktop.
import invariant from 'tiny-invariant';
import 'zx/globals';
import { event, info } from '../logger';
// Patched: use domino-ext instead of domino in turndown.umd.js
// ref: https://github.com/fgnass/domino/issues/146
// @ts-ignore
import TurndownService from 'turndown/lib/turndown.umd.js';
(async () => {
info(argv);
invariant(argv.file, 'file is required');
// why not https://github.com/sindresorhus/clipboardy?
// since it's only support plain text
const html = require('pb').get('html');
info(`html: ${html}`);
// const html = `
// <meta charset='utf-8'><h2 dir="auto" style="box-sizing: border-box; margin-top: 24px; margin-bottom: 16px; font-size: 1.5em; font-weight: 600; line-height: 1.25; padding-bottom: 0.3em; border-bottom: 1px solid var(--color-border-muted); color: rgb(36, 41, 47); font-family: -apple-system, &quot;system-ui&quot;, &quot;Segoe UI&quot;, Helvetica, Arial, sans-serif, &quot;Apple Color Emoji&quot;, &quot;Segoe UI Emoji&quot;; font-style: normal; font-variant-ligatures: normal; font-variant-caps: normal; letter-spacing: normal; orphans: 2; text-align: start; text-indent: 0px; text-transform: none; white-space: normal; widows: 2; word-spacing: 0px; -webkit-text-stroke-width: 0px; background-color: rgb(255, 255, 255); text-decoration-thickness: initial; text-decoration-style: initial; text-decoration-color: initial;">Install</h2><div class="highlight highlight-source-shell notranslate position-relative overflow-auto" style="box-sizing: border-box; position: relative !important; overflow: auto !important; margin-bottom: 16px; color: rgb(36, 41, 47); font-family: -apple-system, &quot;system-ui&quot;, &quot;Segoe UI&quot;, Helvetica, Arial, sans-serif, &quot;Apple Color Emoji&quot;, &quot;Segoe UI Emoji&quot;; font-size: 16px; font-style: normal; font-variant-ligatures: normal; font-variant-caps: normal; font-weight: 400; letter-spacing: normal; orphans: 2; text-align: start; text-indent: 0px; text-transform: none; white-space: normal; widows: 2; word-spacing: 0px; -webkit-text-stroke-width: 0px; background-color: rgb(255, 255, 255); text-decoration-thickness: initial; text-decoration-style: initial; text-decoration-color: initial;"><pre style="box-sizing: border-box; font-family: ui-monospace, SFMono-Regular, &quot;SF Mono&quot;, Menlo, Consolas, &quot;Liberation Mono&quot;, monospace; font-size: 13.6px; margin-top: 0px; margin-bottom: 0px; overflow-wrap: normal; padding: 16px; overflow: auto; line-height: 1.45; background-color: var(--color-canvas-subtle); border-radius: 6px; word-break: normal;">npm install clipboardy</pre></div><h2 dir="auto" style="box-sizing: border-box; margin-top: 24px; margin-bottom: 16px; font-size: 1.5em; font-weight: 600; line-height: 1.25; padding-bottom: 0.3em; border-bottom: 1px solid var(--color-border-muted); color: rgb(36, 41, 47); font-family: -apple-system, &quot;system-ui&quot;, &quot;Segoe UI&quot;, Helvetica, Arial, sans-serif, &quot;Apple Color Emoji&quot;, &quot;Segoe UI Emoji&quot;; font-style: normal; font-variant-ligatures: normal; font-variant-caps: normal; letter-spacing: normal; orphans: 2; text-align: start; text-indent: 0px; text-transform: none; white-space: normal; widows: 2; word-spacing: 0px; -webkit-text-stroke-width: 0px; background-color: rgb(255, 255, 255); text-decoration-thickness: initial; text-decoration-style: initial; text-decoration-color: initial;"><a id="user-content-usage" class="anchor" aria-hidden="true" href="https://github.com/sindresorhus/clipboardy#usage" style="box-sizing: border-box; background-color: transparent; color: var(--color-accent-fg); text-decoration: none; transition: color 80ms cubic-bezier(0.33, 1, 0.68, 1) 0s, background-color, box-shadow, border-color; float: left; padding-right: 4px; margin-left: -20px; line-height: 1;"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path fill-rule="evenodd" d="M7.775 3.275a.75.75 0 001.06 1.06l1.25-1.25a2 2 0 112.83 2.83l-2.5 2.5a2 2 0 01-2.83 0 .75.75 0 00-1.06 1.06 3.5 3.5 0 004.95 0l2.5-2.5a3.5 3.5 0 00-4.95-4.95l-1.25 1.25zm-4.69 9.64a2 2 0 010-2.83l2.5-2.5a2 2 0 012.83 0 .75.75 0 001.06-1.06 3.5 3.5 0 00-4.95 0l-2.5 2.5a3.5 3.5 0 004.95 4.95l1.25-1.25a.75.75 0 00-1.06-1.06l-1.25 1.25a2 2 0 01-2.83 0z"></path></svg></a></h2><br class="Apple-interchange-newline">
// `;
const markdown = new TurndownService({
headingStyle: 'atx',
codeBlockStyle: 'fenced',
// fence: '```````',
}).turndown(html);
info(`markdown: ${markdown}`);
// trim line
const trimedMarkdown = markdown
.split('\n')
.map((line: string) => line.trim())
.join('\n');
const file = argv.file;
const mdFile = path.join(__dirname, `../../docs/${file}.md`);
fs.writeFileSync(mdFile, trimedMarkdown, 'utf-8');
event(`✅ DONE`);
})();
import invariant from 'tiny-invariant';
import 'zx/globals';
import { event, info } from '../logger';
const cwd = process.cwd();
const API_URL = 'http://localhost:8080/translate';
const cacheFile = path.join(__dirname, 'translate.cache.json');
const cacheData = fs.existsSync(cacheFile)
? JSON.parse(fs.readFileSync(cacheFile, 'utf-8'))
: {};
const cache = new Map(Object.entries(cacheData));
const saveCache = (key: string, value: string) => {
cache.set(key, value);
fs.writeFileSync(
cacheFile,
JSON.stringify(Object.fromEntries(cache), null, 2),
'utf-8',
);
};
const delay = (ms: number) => new Promise((res) => setTimeout(res, ms));
const translate = async (opts: {
text: string;
sourceLang?: string;
targetLang?: string;
}): Promise<{ data: string; cache: boolean }> => {
const { text, sourceLang = 'auto', targetLang = 'ZH' } = opts;
invariant(text.length < 5000, `Too long... ${text.length}`);
if (cache.has(text)) {
info(`Cache hit.`);
return {
data: cache.get(text) as string,
cache: true,
};
}
const res = await fetch(API_URL, {
method: 'POST',
body: JSON.stringify({
text,
source_lang: sourceLang,
target_lang: targetLang,
}),
});
invariant(res.status === 200, `${res.status} ${res.statusText}`);
const json = await res.json();
const { data } = json;
invariant(data, `data is null, ${JSON.stringify(json)}`);
saveCache(text, data);
return { data, cache: false };
};
(async () => {
info(argv);
const file = argv._[0];
const absFile = path.isAbsolute(file) ? file : path.join(cwd, file);
const SEPARATOR = '\n\n';
const CHAR_COUNT_LIMIT = 4000;
const lineMap = new Map<
string,
{ type: 'CODE_BLOCK' | 'TEXT' | 'IMG'; text?: string }
>();
// read file
invariant(fs.existsSync(absFile), `File not found: ${absFile}`);
const text = fs.readFileSync(absFile, 'utf-8').trim();
info(`text length: ${text.length}`);
const lines = text.split(SEPARATOR);
// split lines to blocks
const blocks: string[][] = [];
let currBlock = [];
let currCharCount = 0;
for (const line of lines) {
let type: 'CODE_BLOCK' | 'TEXT' | 'IMG' = 'TEXT';
if (line.startsWith('```')) {
type = 'CODE_BLOCK';
} else if (/^!\[(.+)?]\(.+?\)$/.test(line)) {
type = 'IMG';
}
lineMap.set(line, { type });
if (type !== 'TEXT') continue;
const charCount = line.length;
if (currCharCount + charCount > CHAR_COUNT_LIMIT) {
blocks.push(currBlock);
currBlock = [];
currBlock.push(line);
currCharCount = charCount;
} else {
currBlock.push(line);
currCharCount += charCount;
}
}
if (currBlock.length > 0) {
blocks.push(currBlock);
}
info(`block length: ${blocks.length}`);
// translate
const blockLength = blocks.length;
for (const [index, block] of blocks.entries()) {
const text = block.join(SEPARATOR);
info(`[${index + 1}/${blockLength}] Translating block...`);
const { data: translatedText, cache } = await translate({
text,
sourceLang: argv.sourceLang,
targetLang: argv.targetLang,
});
const translatedTextArr = translatedText.split(SEPARATOR);
invariant(
block.length === translatedTextArr.length,
`translated length not match`,
);
for (let i = 0; i < block.length; i++) {
const line = block[i];
const translatedLine = translatedTextArr[i];
invariant(lineMap.has(line), `line not found in map: ${line}`);
lineMap.get(line)!.text = translatedLine;
}
if (!cache && index < blockLength - 1) {
const delayTime = 1000 + Math.floor(Math.random() * 2000);
info(`delay ${delayTime}ms...`);
await delay(delayTime);
}
}
// merge
const mergedArr = [];
for (const [line, { type, text }] of lineMap.entries()) {
if (type === 'TEXT') {
mergedArr.push(line);
mergedArr.push(text);
} else {
mergedArr.push(line);
}
}
info(`Merged.`);
// write new file
const absNewFile = absFile.replace(/\.md/, '.translated.md');
fs.writeFileSync(absNewFile, mergedArr.join(SEPARATOR), 'utf-8');
event(`Translated to ${absNewFile}`);
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment