Last active
October 12, 2024 10:05
-
-
Save dustinknopoff/0913e25d059f111f57045c904de25980 to your computer and use it in GitHub Desktop.
This is written expecting to be in the top level directory of a Zola project and can be run `deno run --allow-read=. --allow-write=. migrateToTaxonomies.ts`
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import { | |
extract, | |
test as containsFrontmatter, | |
} from "https://deno.land/[email protected]/encoding/front_matter/any.ts"; | |
import { walk } from "https://deno.land/[email protected]/fs/mod.ts"; | |
import { stringify } from "npm:[email protected]" | |
async function writeFile(path: string, attrs: { [key: string]: any }, body: string) { | |
await Deno.writeTextFile(path, `---\n${stringify(attrs)}\n---\n\n${body}`) | |
} | |
const permittedTopLevelKeys = new Set(["title", "description", "updated", "weight", "draft", "slug", "path", "aliases", "in_search_index", "template", "taxonomies", "extra", "date"]) | |
const taxonomies = new Set(["tags"]) | |
function difference<T>(setA: Set<T>, setB: Set<T>): Set<T> { | |
const _difference = new Set(setA); | |
for (const elem of setB) { | |
_difference.delete(elem); | |
} | |
return _difference; | |
} | |
for await (const entry of walk("./content/articles", { includeDirs: false })) { | |
if (!entry.path.includes("_index")) { | |
console.log(entry.path); | |
const str = await Deno.readTextFile(entry.path); | |
let post; | |
if (containsFrontmatter(str)) { | |
post = extract(str); | |
} else { | |
post = { body: str, attrs: {} } | |
} | |
if (!post.attrs.extra) { | |
post.attrs.extra = {} | |
} | |
if (!post.attrs.taxonomies) { | |
post.attrs.taxonomies = {} | |
} | |
const diff = difference(new Set(Object.keys(post.attrs)), permittedTopLevelKeys) | |
if (diff.size > 0) { | |
for (const elem of diff) { | |
if (taxonomies.has(elem)) { | |
post.attrs.taxonomies[elem] = post.attrs[elem] | |
} else { | |
post.attrs.extra[elem] = post.attrs[elem] | |
} | |
delete post.attrs[elem] | |
} | |
} | |
await writeFile(entry.path, post.attrs, post.body) | |
} | |
} |
Here's an alternate version which overrides the std lib frontmatter to use +++
as the delimiters instead
NOTE: This will convert your frontmatter into YAML (which is still valid for Zola)
import {
Extractor,
Extract,
Format,
Parser,
test as _test,
} from "https://deno.land/[email protected]/encoding/front_matter/mod.ts";
import { parse } from "https://deno.land/[email protected]/encoding/toml.ts";
import { walk } from "https://deno.land/[email protected]/fs/mod.ts";
import { stringify } from "npm:[email protected]"
function _extract<T>(
str: string,
rx: RegExp,
parse: Parser,
): Extract<T> {
const match = rx.exec(str);
if (!match || match.index !== 0) {
throw new TypeError("Unexpected end of input");
}
const frontMatter = match.at(-1)?.replace(/^\s+|\s+$/g, "") || "";
const attrs = parse(frontMatter) as T;
const body = str.replace(match[0], "");
return { frontMatter, body, attrs };
}
function recognize(str: string, formats?: Format[]): Format {
if (!formats) {
formats = Object.keys(MAP_FORMAT_TO_RECOGNIZER_RX) as Format[];
}
const [firstLine] = str.split(/(\r?\n)/);
for (const format of formats) {
if (format === Format.UNKNOWN) {
continue;
}
if (MAP_FORMAT_TO_RECOGNIZER_RX[format].test(firstLine)) {
return format;
}
}
return Format.UNKNOWN;
}
function createExtractor(
formats: Partial<Record<Format, Parser>>,
): Extractor {
const formatKeys = Object.keys(formats) as Format[];
return function extract<T>(str: string): Extract<T> {
const format = recognize(str, formatKeys);
const parser = formats[format];
if (format === Format.UNKNOWN || !parser) {
throw new TypeError(`Unsupported front matter format`);
}
return _extract(str, MAP_FORMAT_TO_EXTRACTOR_RX[format], parser);
};
}
type Delimiter = string | [begin: string, end: string];
function getBeginToken(delimiter: Delimiter): string {
return Array.isArray(delimiter) ? delimiter[0] : delimiter;
}
function getEndToken(delimiter: Delimiter): string {
return Array.isArray(delimiter) ? delimiter[1] : delimiter;
}
function createRegExp(...dv: Delimiter[]): [RegExp, RegExp] {
const beginPattern = "(" + dv.map(getBeginToken).join("|") + ")";
const pattern = "^(" +
"\\ufeff?" + // Maybe byte order mark
beginPattern +
"$([\\s\\S]+?)" +
"^(?:" + dv.map(getEndToken).join("|") + ")\\s*" +
"$" +
(Deno.build.os === "windows" ? "\\r?" : "") +
"(?:\\n)?)";
return [
new RegExp("^" + beginPattern + "$", "im"),
new RegExp(pattern, "im"),
];
}
const [RX_RECOGNIZE_TOML, RX_TOML] = createRegExp(
["\\+\\+\\+", "\\+\\+\\+"],
"= toml =",
);
const MAP_FORMAT_TO_RECOGNIZER_RX: Omit<
Record<Format, RegExp>,
Format.UNKNOWN
> = {
[Format.TOML]: RX_RECOGNIZE_TOML,
};
const MAP_FORMAT_TO_EXTRACTOR_RX: Omit<Record<Format, RegExp>, Format.UNKNOWN> =
{
[Format.TOML]: RX_TOML,
};
const extract = createExtractor({
[Format.TOML]: parse as Parser,
});
function test(str: string, formats?: Format[]): boolean {
if (!formats) {
formats = Object.keys(MAP_FORMAT_TO_EXTRACTOR_RX) as Format[];
}
for (const format of formats) {
if (format === Format.UNKNOWN) {
throw new TypeError("Unable to test for unknown front matter format");
}
const match = MAP_FORMAT_TO_EXTRACTOR_RX[format].exec(str);
if (match?.index === 0) {
return true;
}
}
return false;
}
async function writeFile(path: string, attrs: { [key: string]: any }, body: string) {
await Deno.writeTextFile(path, `---\n${stringify(attrs)}\n---\n\n${body}`)
}
const permittedTopLevelKeys = new Set(["title", "description", "updated", "weight", "draft", "slug", "path", "aliases", "in_search_index", "template", "taxonomies", "extra", "date"])
const taxonomies = new Set(["tags"])
function difference<T>(setA: Set<T>, setB: Set<T>): Set<T> {
const _difference = new Set(setA);
for (const elem of setB) {
_difference.delete(elem);
}
return _difference;
}
for await (const entry of walk("./", { includeDirs: false })) {
if (entry.path.includes("sample")) {
console.log(entry.path);
const str = await Deno.readTextFile(entry.path);
let post;
if (test(str)) {
post = extract(str);
} else {
post = { body: str, attrs: {} }
}
if (!post.attrs.extra) {
post.attrs.extra = {}
}
if (!post.attrs.taxonomies) {
post.attrs.taxonomies = {}
}
const diff = difference(new Set(Object.keys(post.attrs)), permittedTopLevelKeys)
if (diff.size > 0) {
for (const elem of diff) {
if (taxonomies.has(elem)) {
post.attrs.taxonomies[elem] = post.attrs[elem]
} else {
post.attrs.extra[elem] = post.attrs[elem]
}
delete post.attrs[elem]
}
}
await writeFile(entry.path, post.attrs, post.body)
}
}
Thanks for the upgrade. I still have issues with the script, as regular working posts fail.
content/posts/2013/03/04/afpy.md
error: Uncaught Error: Parse error on line 1, column 26: Unexpected character: "+"
throw new TOMLParseError(message);
^
at parse (https://deno.land/[email protected]/encoding/_toml/parser.ts:890:13)
at _extract (file:///xxxmigrate_taxonomies.ts:21:19)
at extract (file:///xxx/migrate_taxonomies.ts:59:16)
at file:///Usersjxxxmigrate_taxonomies.ts:153:20
I issued a PR on deno
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
looks like a bug to me
denoland/std#3094