-
-
Save dustinknopoff/0913e25d059f111f57045c904de25980 to your computer and use it in GitHub Desktop.
import { | |
extract, | |
test as containsFrontmatter, | |
} from "https://deno.land/[email protected]/encoding/front_matter/any.ts"; | |
import { walk } from "https://deno.land/[email protected]/fs/mod.ts"; | |
import { stringify } from "npm:[email protected]" | |
async function writeFile(path: string, attrs: { [key: string]: any }, body: string) { | |
await Deno.writeTextFile(path, `---\n${stringify(attrs)}\n---\n\n${body}`) | |
} | |
const permittedTopLevelKeys = new Set(["title", "description", "updated", "weight", "draft", "slug", "path", "aliases", "in_search_index", "template", "taxonomies", "extra", "date"]) | |
const taxonomies = new Set(["tags"]) | |
function difference<T>(setA: Set<T>, setB: Set<T>): Set<T> { | |
const _difference = new Set(setA); | |
for (const elem of setB) { | |
_difference.delete(elem); | |
} | |
return _difference; | |
} | |
for await (const entry of walk("./content/articles", { includeDirs: false })) { | |
if (!entry.path.includes("_index")) { | |
console.log(entry.path); | |
const str = await Deno.readTextFile(entry.path); | |
let post; | |
if (containsFrontmatter(str)) { | |
post = extract(str); | |
} else { | |
post = { body: str, attrs: {} } | |
} | |
if (!post.attrs.extra) { | |
post.attrs.extra = {} | |
} | |
if (!post.attrs.taxonomies) { | |
post.attrs.taxonomies = {} | |
} | |
const diff = difference(new Set(Object.keys(post.attrs)), permittedTopLevelKeys) | |
if (diff.size > 0) { | |
for (const elem of diff) { | |
if (taxonomies.has(elem)) { | |
post.attrs.taxonomies[elem] = post.attrs[elem] | |
} else { | |
post.attrs.extra[elem] = post.attrs[elem] | |
} | |
delete post.attrs[elem] | |
} | |
} | |
await writeFile(entry.path, post.attrs, post.body) | |
} | |
} |
It looks like there is a different path to import from for toml frontmatter https://deno.land/[email protected]/encoding/front_matter/toml.ts
Ah, it still uses ---
as the delimiter though 😅
do you mean changing the export on L4 (https://gist.github.com/dustinknopoff/0913e25d059f111f57045c904de25980#file-migratetotaxonomies-ts-L4)? I had the same result. (I am trying to understand deno as I am not very versed in js.)
and zola behaves as hugo here, expects ---
to be YAML:
Error: Failed to serve the site
Error: Error when parsing front matter of section `xxxxcontent/posts/2020/09/08/assassin-royal/index.md`
Error: Reason: YAML deserialize error: Error("invalid type: string \"date = 2020-09-08T16:18:51+02:00 title = \\\"Lire le cycle de l'Assassin Royal, c'est compliqué\\\"\\n[taxonomies] tags = [\\\"livre\\\", \\\"un\\\", \\\"deux\\\", \\\"trois\\\", \\\"quatre\\\", \\\"cing\\\", \\\"six\\\", \\\"sept\\\", \\\"huit\\\", \\\"neuf\\\", \\\"dix\\\", \\\"etc...\\\"] categories = [\\\"test\\\", \\\"autre catégorie\\\", \\\"un\\\", \\\"deux\\\", \\\"trois\\\", \\\"quatre\\\", \\\"cing\\\", \\\"six\\\", \\\"sept\\\", \\\"huit\\\", \\\"neuf\\\", \\\"dix\\\", \\\"etc...\\\"]\\n[extra] twitter = \\\"https://twitter.com/jpcaruana/status/1303356472705921026\\\"\", expected struct PageFrontMatter", line: 2, column: 1)
looks like a bug to me
Here's an alternate version which overrides the std lib frontmatter to use +++
as the delimiters instead
NOTE: This will convert your frontmatter into YAML (which is still valid for Zola)
import {
Extractor,
Extract,
Format,
Parser,
test as _test,
} from "https://deno.land/[email protected]/encoding/front_matter/mod.ts";
import { parse } from "https://deno.land/[email protected]/encoding/toml.ts";
import { walk } from "https://deno.land/[email protected]/fs/mod.ts";
import { stringify } from "npm:[email protected]"
function _extract<T>(
str: string,
rx: RegExp,
parse: Parser,
): Extract<T> {
const match = rx.exec(str);
if (!match || match.index !== 0) {
throw new TypeError("Unexpected end of input");
}
const frontMatter = match.at(-1)?.replace(/^\s+|\s+$/g, "") || "";
const attrs = parse(frontMatter) as T;
const body = str.replace(match[0], "");
return { frontMatter, body, attrs };
}
function recognize(str: string, formats?: Format[]): Format {
if (!formats) {
formats = Object.keys(MAP_FORMAT_TO_RECOGNIZER_RX) as Format[];
}
const [firstLine] = str.split(/(\r?\n)/);
for (const format of formats) {
if (format === Format.UNKNOWN) {
continue;
}
if (MAP_FORMAT_TO_RECOGNIZER_RX[format].test(firstLine)) {
return format;
}
}
return Format.UNKNOWN;
}
function createExtractor(
formats: Partial<Record<Format, Parser>>,
): Extractor {
const formatKeys = Object.keys(formats) as Format[];
return function extract<T>(str: string): Extract<T> {
const format = recognize(str, formatKeys);
const parser = formats[format];
if (format === Format.UNKNOWN || !parser) {
throw new TypeError(`Unsupported front matter format`);
}
return _extract(str, MAP_FORMAT_TO_EXTRACTOR_RX[format], parser);
};
}
type Delimiter = string | [begin: string, end: string];
function getBeginToken(delimiter: Delimiter): string {
return Array.isArray(delimiter) ? delimiter[0] : delimiter;
}
function getEndToken(delimiter: Delimiter): string {
return Array.isArray(delimiter) ? delimiter[1] : delimiter;
}
function createRegExp(...dv: Delimiter[]): [RegExp, RegExp] {
const beginPattern = "(" + dv.map(getBeginToken).join("|") + ")";
const pattern = "^(" +
"\\ufeff?" + // Maybe byte order mark
beginPattern +
"$([\\s\\S]+?)" +
"^(?:" + dv.map(getEndToken).join("|") + ")\\s*" +
"$" +
(Deno.build.os === "windows" ? "\\r?" : "") +
"(?:\\n)?)";
return [
new RegExp("^" + beginPattern + "$", "im"),
new RegExp(pattern, "im"),
];
}
const [RX_RECOGNIZE_TOML, RX_TOML] = createRegExp(
["\\+\\+\\+", "\\+\\+\\+"],
"= toml =",
);
const MAP_FORMAT_TO_RECOGNIZER_RX: Omit<
Record<Format, RegExp>,
Format.UNKNOWN
> = {
[Format.TOML]: RX_RECOGNIZE_TOML,
};
const MAP_FORMAT_TO_EXTRACTOR_RX: Omit<Record<Format, RegExp>, Format.UNKNOWN> =
{
[Format.TOML]: RX_TOML,
};
const extract = createExtractor({
[Format.TOML]: parse as Parser,
});
function test(str: string, formats?: Format[]): boolean {
if (!formats) {
formats = Object.keys(MAP_FORMAT_TO_EXTRACTOR_RX) as Format[];
}
for (const format of formats) {
if (format === Format.UNKNOWN) {
throw new TypeError("Unable to test for unknown front matter format");
}
const match = MAP_FORMAT_TO_EXTRACTOR_RX[format].exec(str);
if (match?.index === 0) {
return true;
}
}
return false;
}
async function writeFile(path: string, attrs: { [key: string]: any }, body: string) {
await Deno.writeTextFile(path, `---\n${stringify(attrs)}\n---\n\n${body}`)
}
const permittedTopLevelKeys = new Set(["title", "description", "updated", "weight", "draft", "slug", "path", "aliases", "in_search_index", "template", "taxonomies", "extra", "date"])
const taxonomies = new Set(["tags"])
function difference<T>(setA: Set<T>, setB: Set<T>): Set<T> {
const _difference = new Set(setA);
for (const elem of setB) {
_difference.delete(elem);
}
return _difference;
}
for await (const entry of walk("./", { includeDirs: false })) {
if (entry.path.includes("sample")) {
console.log(entry.path);
const str = await Deno.readTextFile(entry.path);
let post;
if (test(str)) {
post = extract(str);
} else {
post = { body: str, attrs: {} }
}
if (!post.attrs.extra) {
post.attrs.extra = {}
}
if (!post.attrs.taxonomies) {
post.attrs.taxonomies = {}
}
const diff = difference(new Set(Object.keys(post.attrs)), permittedTopLevelKeys)
if (diff.size > 0) {
for (const elem of diff) {
if (taxonomies.has(elem)) {
post.attrs.taxonomies[elem] = post.attrs[elem]
} else {
post.attrs.extra[elem] = post.attrs[elem]
}
delete post.attrs[elem]
}
}
await writeFile(entry.path, post.attrs, post.body)
}
}
Thanks for the upgrade. I still have issues with the script, as regular working posts fail.
content/posts/2013/03/04/afpy.md
error: Uncaught Error: Parse error on line 1, column 26: Unexpected character: "+"
throw new TOMLParseError(message);
^
at parse (https://deno.land/[email protected]/encoding/_toml/parser.ts:890:13)
at _extract (file:///xxxmigrate_taxonomies.ts:21:19)
at extract (file:///xxx/migrate_taxonomies.ts:59:16)
at file:///Usersjxxxmigrate_taxonomies.ts:153:20
I issued a PR on deno
Great ! THanks for sharing.
Sadly it does not detect TOML frontmatter contained within
+++
and not---
(as described in https://gohugo.io/content-management/front-matter/#front-matter-formats)