Last active
December 17, 2024 09:16
-
-
Save CN-CODEGOD/6e43d6077e930f71cc70c03f9622ada5 to your computer and use it in GitHub Desktop.
Windows 中文乱码的疑难杂症
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| windowS常见encoding:有 cp936,UTF-8 | |
| 确保你和来源字体编码相同 | |
| powershell一般使用的是utf-8,确保你来源和终端encoding相同即可 | |
| 如果来源JavaScript 可以用 | |
| iconv-lite 模组进行encoding 转码 | |
| 实例: | |
| cp936:utf-8 | |
| const { exec } = require('node:child_process'); | |
| const path = require('node:path'); | |
| const os = require("os"); | |
| const iconv = require('iconv-lite'); | |
| const { JSDOM } = require('jsdom'); | |
| // 设置平台对应的编码 | |
| const encoding = os.platform() === 'win32' ? 'cp936' : 'utf-8'; | |
| const binaryEncoding = 'binary'; // PowerShell 输出的编码 | |
| // 获取脚本路径和参数 | |
| const scriptPath = path.join(__dirname, 'crawl1.ps1'); | |
| const { argv } = require('node:process'); | |
| const url = argv.slice(2)[0]; | |
| if (!url) { | |
| console.error('请提供一个URL作为参数'); | |
| process.exit(1); | |
| } | |
| const command = `powershell -ExecutionPolicy Bypass -File "${scriptPath}" ${url}`; | |
| // 执行脚本并处理编码 | |
| exec(command, { encoding: binaryEncoding }, (error, stdout, stderr) => { | |
| if (error) { | |
| //利用iconv 进行转码 | |
| console.error('错误:', iconv.decode(Buffer.from(error.message, binaryEncoding), encoding)); | |
| return; | |
| } | |
| if (stderr) { | |
| //利用iconv 进行转码 | |
| console.error('标准错误:', iconv.decode(Buffer.from(stderr, binaryEncoding), encoding)); | |
| return; | |
| } | |
| // 转码输出并处理 HTML | |
| const decodedOutput = iconv.decode(Buffer.from(stdout, binaryEncoding), encoding); | |
| try { | |
| const dom = new JSDOM(decodedOutput); | |
| //输出不需要 | |
| console.log(`网页标题: ${dom.window.document.title}`); | |
| } catch (err) { | |
| //利用iconv 进行转码 | |
| console.error('解析失败:', iconv.decode(Buffer.from(err.message, binaryEncoding), encoding)); | |
| } | |
| }); | |
| 常见encoding: | |
| utf-8,cp936(GBK) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment