使用 npm i pdf-stream-cli
安装pdf转文本工具
需要jq工具,macOS可以使用 brew install jq
安装
在账单目录执行
ls | xargs -I{} pdf-stream-cli -t json {} | jq '.[].textContent|select(.!=null) | .items[]|select(.height==7.99)'|jq -s '.' > 2022-d.json
csv文件使用|分割,因为一些值上有冲突
java代码,需要hutool依赖。
@Test
public void readJson() throws IOException {
List<String> skipData = Arrays.asList("人民币账户 RMB A/C",
"交易日", "记账日", "交易摘要", "人民币金额", "卡号末四位", "交易地金额",
"SOLD", "POSTED", "DESCRIPTION", "RMB AMOUNT", "CARD NO(Last 4digits)", "Original Tran Amount"
);
String json = FileUtil.readUtf8String("/Users/jianyun/Downloads/账单/2022-d.json");
JSONArray objects = JSONUtil.parseArray(json);
List<String> data = new ArrayList<>();
for (Object object : objects) {
String str = BeanUtil.getProperty(object, "str");
str = StrUtil.trim(str);
if (skipData.contains(str)) {
continue;
}
data.add(str);
}
List<List<String>> split = CollectionUtil.split(data, 6);
List<String> out = new ArrayList<>(split.size() + 1);
out.add(StrUtil.join("| ", "交易日", "记账日", "交易摘要", "人民币金额", "卡号末四位", "交易地金额"));
for (List<String> strings : split) {
String s = strings.get(0);
if (StrUtil.isNotBlank(s)) {
strings.set(0, "2022/" + s);
}
String s1 = strings.get(1);
if (StrUtil.isNotBlank(s1)) {
strings.set(1, "2022/" + s1);
}
if (strings.get(3).contains(",")) {
strings.set(3, strings.get(3).replace(",", ""));
}
if (strings.get(5).contains(",")) {
strings.set(5, strings.get(5).replace(",", ""));
}
out.add(StrUtil.join("| ", strings));
}
System.out.println(out);
FileUtil.writeLines(out, "/Users/jianyun/Downloads/账单/2022.csv", StandardCharsets.UTF_8);
}