Skip to content

Instantly share code, notes, and snippets.

@PhotonQuantum
Created June 6, 2021 17:46
Show Gist options
  • Save PhotonQuantum/a814b540dd2bc8569c03de05789f0148 to your computer and use it in GitHub Desktop.
Save PhotonQuantum/a814b540dd2bc8569c03de05789f0148 to your computer and use it in GitHub Desktop.
#![feature(bool_to_option)]
use serde::Deserialize;
use std::path::PathBuf;
use regex::Regex;
use std::fs::{read_dir, File};
use opencc_rust::*;
#[derive(Debug, Deserialize)]
struct Poetry {
author: String,
paragraphs: Vec<String>,
title: String
}
fn main() {
let dir = PathBuf::from("../chinese-poetry/json");
let file_pattern = Regex::new(r"poet\.\w*\.\d*\.json").unwrap();
let find_pattern = Regex::new(r".*光.*量.*子.*").unwrap();
let converter = OpenCC::new(DefaultConfig::T2S).unwrap();
let files: Vec<_> = read_dir(dir).unwrap().into_iter().filter_map(|f|{
let f = f.unwrap();
file_pattern.is_match(f.file_name().to_str().unwrap()).then_some(f)
}).collect();
let j = files.len();
for (i, file) in files.into_iter().enumerate() {
if i % 10 == 0 {
println!("Progress: {}", i as f32 / j as f32 * 100.0);
}
let f = File::open(file.path()).unwrap();
let poets: Vec<Poetry> = serde_json::from_reader(f).unwrap();
for poet in poets {
for paragraph in &poet.paragraphs {
if find_pattern.is_match(&*converter.convert(paragraph)) {
println!("{:#?}", poet);
}
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment