badbye · July 7, 2023 11:01
diff --git a/rmd-translate.R b/rmd-translate.R
 library(httr2)
 library(knitr)

 AZURE_API_URL = "https://xxx.openai.azure.com/openai/deployments/davinci-003/completions?api-version=2022-12-01"
 AZURE_API_KEY = "xxx"
 PROMPT = "你是一个优秀的文学家和工程师。你将逐行翻译 Human 给定的 markdown 格式的文本，返回流畅易懂的中文。如果给定的内容中某一行以`<--`开头，以`-->`结尾，则忽略该行不翻译。\n行内的文本遇到以下规则时，... 所表示的内容也不翻译：\n1. `r ...`\n2. \\ref(...)\n\nHuman: China is the greatest country in the world. ^[You have to admit it.]\n<-- The US may disagree -->\nsee table \\ref(tb), its population is `r 10**8`.\nAI: 中国是世界上最伟大的国家。 ^[你必须承认这个事实。]\n<-- The US may disagree -->\n见表 \\ref(tb)， 它的人口数量是 `r 10**8`。\n\nHuman: %s\nAI: "

  
 commented = function(line) {
  if (is_commented(line)) line else paste0('<!-- ', line, ' -->')
 }
 is_commented = function(line) {
  line = trimws(line)
  startsWith(line, '<!--') && endsWith(line, '-->')
 }
 translate = function(content) {
  lines = strsplit(content, '\n')[[1]]
  uncommented_lines = lines[!Vectorize(is_commented)(lines)]
  davinci_003_translate(paste(uncommented_lines, collapse = '\n'))
 }
 davinci_003_translate = function(text) {
  req = request(AZURE_API_URL) %>% 
    req_headers("Content-Type" = "application/json") %>%
    req_headers("api-key" = AZURE_API_KEY)
  req_body = list(prompt =sprintf(PROMPT, text),
                  max_tokens = 2000,
                  temperature = 0.9,
                  frequency_penalty = 0,
                  presence_penalty = 0,
                  top_p =  1,
                  best_of = 1,
                  stop = c("Human:","AI:"))
  resp = req_perform(req %>% req_body_json(req_body))
  json_resp = resp_body_json(resp)
  cat(sprintf('; total_tokens usage: %s; prompt_tokens: %s; completion_tokens: %s', 
              json_resp$usage$total_tokens,
              json_resp$usage$prompt_tokens,
              json_resp$usage$completion_tokens))
  resp_body_json(resp)$choices[[1]]$text
 }

 process_chunk = function(x) {
  UseMethod('process_chunk', x)
 }
 process_chunk.block = function(x) {
  params = opts_chunk$merge(x$params)
  code = knit_code$get(params$label)
  engine = tolower(code_param$engine)
  if (engine == 'block2') {
    translated_code = translate(paste(code, collapse = '\n'))
    # keep the original code and comment it
    code = c(sapply(code, commented), '\n', translated_code)
  }
  sprintf("```{%s %s}\n%s\n```", 
          engine,
          x$params.src,
          paste(code, collapse = '\n')
  )
 }
 process_chunk.inline = function(x) {
  content = x$input
  lines = strsplit(content, '\n')[[1]]
  sprintf('%s\n%s',
          paste(sapply(lines, commented), collapse = '\n'),
          translate(content))
 }

 translate_rmd = function(file_path) {
  translated_file_path = gsub('.Rmd', '-trans.Rmd', file_path)
  lines = xfun::read_utf8(file_path)
  opts_chunk$restore()
  knit_code$restore()
  opts_knit$restore()
  
  chunks = knitr:::split_file(lines, patterns = all_patterns$md)
  translated_chunks = character(length = length(chunks))
  t0 = Sys.time()
  for (i in 1:length(chunks)) {
    chunk = chunks[[i]]
    if (class(chunk) == 'inline' && trimws(chunk$input) == "") {
      translated_chunks[i] = '\n'
    } else {
      translated_chunks[i] = process_chunk(chunk)
    }
    t = Sys.time()
    cat(sprintf('\nprogress: %s/%s; time cost: %s secs', i, length(chunks), 
                round(as.numeric(t - t0, units = "secs"), 2)))
  }
  translated_chunks = sapply(chunks, process_chunk)
  writeLines(translated_chunks, translated_file_path)
  cat(sprintf("\nwrite to file: %s", translated_file_path))
 }

 translate_rmd('xxx.Rmd')
	library(httr2)
	library(knitr)

	AZURE_API_URL = "https://xxx.openai.azure.com/openai/deployments/davinci-003/completions?api-version=2022-12-01"
	AZURE_API_KEY = "xxx"
	PROMPT = "你是一个优秀的文学家和工程师。你将逐行翻译 Human 给定的 markdown 格式的文本，返回流畅易懂的中文。如果给定的内容中某一行以`<--`开头，以`-->`结尾，则忽略该行不翻译。\n行内的文本遇到以下规则时，... 所表示的内容也不翻译：\n1. `r ...`\n2. \\ref(...)\n\nHuman: China is the greatest country in the world. ^[You have to admit it.]\n<-- The US may disagree -->\nsee table \\ref(tb), its population is `r 108`.\nAI: 中国是世界上最伟大的国家。 ^[你必须承认这个事实。]\n<-- The US may disagree -->\n见表 \\ref(tb)，它的人口数量是 `r 108`。\n\nHuman: %s\nAI: "


	commented = function(line) {
	if (is_commented(line)) line else paste0('<!-- ', line, ' -->')
	}
	is_commented = function(line) {
	line = trimws(line)
	startsWith(line, '<!--') && endsWith(line, '-->')
	}
	translate = function(content) {
	lines = strsplit(content, '\n')[[1]]
	uncommented_lines = lines[!Vectorize(is_commented)(lines)]
	davinci_003_translate(paste(uncommented_lines, collapse = '\n'))
	}
	davinci_003_translate = function(text) {
	req = request(AZURE_API_URL) %>%
	req_headers("Content-Type" = "application/json") %>%
	req_headers("api-key" = AZURE_API_KEY)
	req_body = list(prompt =sprintf(PROMPT, text),
	max_tokens = 2000,
	temperature = 0.9,
	frequency_penalty = 0,
	presence_penalty = 0,
	top_p = 1,
	best_of = 1,
	stop = c("Human:","AI:"))
	resp = req_perform(req %>% req_body_json(req_body))
	json_resp = resp_body_json(resp)
	cat(sprintf('; total_tokens usage: %s; prompt_tokens: %s; completion_tokens: %s',
	json_resp$usage$total_tokens,
	json_resp$usage$prompt_tokens,
	json_resp$usage$completion_tokens))
	resp_body_json(resp)$choices[[1]]$text
	}

	process_chunk = function(x) {
	UseMethod('process_chunk', x)
	}
	process_chunk.block = function(x) {
	params = opts_chunk$merge(x$params)
	code = knit_code$get(params$label)
	engine = tolower(code_param$engine)
	if (engine == 'block2') {
	translated_code = translate(paste(code, collapse = '\n'))
	# keep the original code and comment it
	code = c(sapply(code, commented), '\n', translated_code)
	}
	sprintf("```{%s %s}\n%s\n```",
	engine,
	x$params.src,
	paste(code, collapse = '\n')
	)
	}
	process_chunk.inline = function(x) {
	content = x$input
	lines = strsplit(content, '\n')[[1]]
	sprintf('%s\n%s',
	paste(sapply(lines, commented), collapse = '\n'),
	translate(content))
	}

	translate_rmd = function(file_path) {
	translated_file_path = gsub('.Rmd', '-trans.Rmd', file_path)
	lines = xfun::read_utf8(file_path)
	opts_chunk$restore()
	knit_code$restore()
	opts_knit$restore()

	chunks = knitr:::split_file(lines, patterns = all_patterns$md)
	translated_chunks = character(length = length(chunks))
	t0 = Sys.time()
	for (i in 1:length(chunks)) {
	chunk = chunks[[i]]
	if (class(chunk) == 'inline' && trimws(chunk$input) == "") {
	translated_chunks[i] = '\n'
	} else {
	translated_chunks[i] = process_chunk(chunk)
	}
	t = Sys.time()
	cat(sprintf('\nprogress: %s/%s; time cost: %s secs', i, length(chunks),
	round(as.numeric(t - t0, units = "secs"), 2)))
	}
	translated_chunks = sapply(chunks, process_chunk)
	writeLines(translated_chunks, translated_file_path)
	cat(sprintf("\nwrite to file: %s", translated_file_path))
	}

	translate_rmd('xxx.Rmd')