Skip to content

Instantly share code, notes, and snippets.

@psyomn
Created July 31, 2023 03:52
Show Gist options
  • Save psyomn/55b5711d14ac763259cacfbbf7ce9c81 to your computer and use it in GitHub Desktop.
Save psyomn/55b5711d14ac763259cacfbbf7ce9c81 to your computer and use it in GitHub Desktop.
cmus music player cache extractor/converter
/*
** cce converts a cmus cache to another format for export.
**
** Copyright (C) 2023 Simon Symeonidis
**
** This program is free software: you can redistribute it and/or modify
** it under the terms of the GNU General Public License as published by
** the Free Software Foundation, either version 3 of the License, or
** (at your option) any later version.
**
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
** GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License
** along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
/*
* Adapted from the source code in cmus:
* - https://github.com/cmus/cmus
* - https://github.com/cmus/cmus/blob/master/cache.c
*
* To run, you can do the following:
*
* for raw output:
* ./cce -f cache -r
* for json output:
* ./cce -f cache -j
* with my current collection:
* $ ./cce -j -f cache | jq -c . | wc -l
* 2206
*
* be warned: the json is not stress tested, so it might break on your setup.
*
* I couldn't find any tool that does this, so I decided it might be a fun
* little program to write, and to learn how cmus does it (and an excuse to use
* mmap and other system stuff).
*
* The overall format seems to be something like this:
*
* [header]
* [entry ]
* [entry ]
* [... ]
* [entry ]
*
* The header section seems to be 3 magic bytes, accompanied by 5 bytes to be
* used to denote version number, and other properties:
*
* [header ] = [CTC 0x0d 0x00 0x00 0x00 0x01]
*
* an entry is simply:
*
* u32 size (full size of struct including field)
* i32 play_count
* i64 mtime
* i32 duration
* i32 bitrate
* i32 bpm
* u8[52] reserved bytes (set to 0xff by default)
* u8[] strings: a variable sized array of strings, delimited by 0's.
*
* strings is specified to be first a triad:
* filename, codec, codec profile
* -> and after those, key/value pairs.
*
* For example: mysong.mp3
*
* /path/to/song.mp3[0x00] filename
* mp3[0x00] codec
* CBR[0x00] codec profile
* artist[0x00] key
* Awesome[0x20]Artist value
* title[0x00] key
* the[0x20]song[0x00] value
* ...
*
* The limit on the strings is calculated by calculating the proper offset
* using the compile time known value of ths struct.
*
* a final note: everything is aligned to 8 bytes. That is, whenever there is a
* new entry, the insertion point will be inserted at the nearest 8th multiple.
* Here's some data to exemplify this statement:
*
* raw size: 320 [00000140]; aligned size: 320 (padded 0)
* raw size: 352 [00000160]; aligned size: 352 (padded 0)
* x raw size: 324 [00000144]; aligned size: 328 (padded 4)
* x raw size: 306 [00000132]; aligned size: 312 (padded 6)
* x raw size: 324 [00000144]; aligned size: 328 (padded 4)
* x raw size: 314 [0000013a]; aligned size: 320 (padded 6)
* x raw size: 330 [0000014a]; aligned size: 336 (padded 6)
* x raw size: 322 [00000142]; aligned size: 328 (padded 6)
* x raw size: 324 [00000144]; aligned size: 328 (padded 4)
* x raw size: 316 [0000013c]; aligned size: 320 (padded 4)
* x raw size: 378 [0000017a]; aligned size: 384 (padded 6)
*
* The raw sizes are the size element in the entry struct, which include the
* variable itself. Because of this alignment, it would be erroneous to skip to
* the next entry by simply adding the size to the pointer. The correct way
* would be to calculate some sort of padding (in our case these are zero
* bytes), to make the pointer land on a multiple of 8 index.
*
* SPDX-License-Identifier: GPL-3.0-or-later
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <getopt.h>
#include <linux/limits.h> /* PATH_MAX */
enum cache_versions {
VERSION_13 = 0x0d,
};
enum export_type {
RAW,
DEBUG,
JSON,
};
struct session {
char filepath[PATH_MAX];
enum export_type et;
} sess = { .filepath = {0}, .et = RAW };
/* This might vary per version; if the reader of this code is not the author, it
* would be advised to read the cmus source cache.c file */
#define CACHE_RESERVED_SZ 52
/* taken, and necessary from cache.c; this calculates padding according to
* entry sizes. */
#define ALIGN(size) (((size) + sizeof(long) - 1) & ~(sizeof(long) - 1))
/* consult cache.c in project cmus */
struct cache_header {
uint8_t magic[3];
uint8_t version[5];
};
struct entry {
uint32_t size;
int32_t play_count;
int64_t mtime;
int32_t duration;
int32_t bitrate;
int32_t bpm;
uint8_t _reserved[CACHE_RESERVED_SZ];
char strings[];
};
static void usage(const char *name)
{
printf("usage:\n"
" %s [options] -f <cmus-cachefile>"
"\n"
"options are: \n"
" -j for json \n"
" -r for raw \n"
" -d for debug \n"
" -l to print license \n"
, name);
}
static void init_session_or_die(struct session *ss, int argc, char *argv[])
{
if (argc <= 1) {
usage(argv[0]);
exit(EXIT_FAILURE);
}
int opt = 0;
while ((opt = getopt(argc, argv, "f:jr")) != -1) {
switch(opt) {
case 'f':
strncpy(ss->filepath, optarg, PATH_MAX);
break;
case 'j':
ss->et = JSON;
break;
case 'r':
ss->et = RAW;
break;
case 'd':
ss->et = DEBUG;
break;
default:
usage(argv[0]);
exit(EXIT_FAILURE);
break;
}
}
}
static void export_raw(const struct entry *const e)
{
printf(
"================================================\n"
"size of struct: %16d " "[%04x]\n"
"mtime : %16lu " "[%08lx]\n"
"duration : %16d " "[%04x]\n"
"bitrate : %16d " "[%04x]\n"
"bpm : %16d " "[%04x]\n"
"reserved : first: [%02x] last: [%02x]\n",
e->size, e->size,
e->mtime, e->mtime,
e->duration, e->duration,
e->bitrate, e->bitrate,
e->bpm, e->bpm,
e->_reserved[0], e->_reserved[CACHE_RESERVED_SZ-1]
);
const size_t all_strings_sz = e->size - sizeof(struct entry);
for (size_t i = 0; i < all_strings_sz; ++i) {
const uint8_t c = e->strings[i];
printf("%c", c == 0 ? '\n' : c);
}
printf("\n");
}
static void export_json(const struct entry *const e)
{
/* hopefully I won't be judged for generating json this way :^) */
printf(
"{"
"\"size\": %d, "
"\"mtime\": %lu, "
"\"duration\": %d, "
"\"bitrate\": %d, "
"\"bpm\": %d, ",
e->size,
e->mtime,
e->duration,
e->bitrate,
e->bpm
);
printf("\"strings\": [");
int str_begin = 1;
const size_t all_strings_sz = e->size - sizeof(struct entry);
for (size_t i = 0; i < all_strings_sz; ++i) {
if (str_begin) {
str_begin = 0;
printf("\"");
}
if (e->strings[i] == 0) {
str_begin = 1;
printf("\"");
if (i + 1 < all_strings_sz) printf(",");
continue;
}
switch (e->strings[i]) {
case '"':
printf("\\\"");
break;
case 0x0a:
printf("\\n");
break;
case 0x0e:
case 0x0d: /* ignore */
break;
default:
printf("%c", e->strings[i]);
break;
}
}
printf("]");
printf("}\n");
}
static void export_debug(const struct entry *const e)
{
printf("%c raw size: %d [%08x]; aligned size: %ld (padded %ld)\n",
e->size != ALIGN(e->size) ? 'x' : ' ',
e->size,
e->size, ALIGN(e->size),
ALIGN(e->size) - e->size
);
}
static void check_version(const struct cache_header *const header) {
if (*header->version != VERSION_13)
fprintf(stderr, "warning: unknown version %d (%x)", *header->version, *header->version);
}
int main(int argc, char *argv[])
{
init_session_or_die(&sess, argc, argv);
int ret = EXIT_SUCCESS;
int fd = open(sess.filepath, O_RDONLY);
if (fd < 0) {
perror("open");
return EXIT_FAILURE;
}
struct stat st = {0};
if (stat(sess.filepath, &st) < 0) {
perror("stat");
ret = EXIT_FAILURE;
goto cleanup_fd;
}
const size_t cache_sz = st.st_size;
uint8_t *buf = mmap(NULL, cache_sz, PROT_READ, MAP_PRIVATE, fd, 0);
if (buf == NULL) {
perror("mmap");
ret = EXIT_FAILURE;
goto cleanup_fd;
}
struct cache_header *header = (struct cache_header*) buf;
check_version(header);
buf += sizeof(struct cache_header);
const uint8_t *start_buf = buf;
void (*export_fn)(const struct entry *const);
switch (sess.et) {
case RAW:
export_fn = export_raw;
break;
case JSON:
export_fn = export_json;
break;
case DEBUG:
export_fn = export_debug;
break;
default:
fprintf(stderr, "ureachable\n");
abort();
};
while ((size_t)(buf - start_buf) + 8 < cache_sz) {
struct entry *ent = (struct entry *)buf;
export_fn(ent);
buf += ALIGN(ent->size);
}
munmap(buf, cache_sz);
cleanup_fd:
close(fd);
return ret;
}
CFLAGS=-std=c11 \
-pedantic \
-march=native \
-Wall \
-Wextra \
-Werror \
-Wcast-align \
-Wwrite-strings \
-Wformat=2 \
-Wswitch-default \
-Wswitch-enum \
-Wcast-align \
-Wpointer-arith \
-Wbad-function-cast \
-Wstrict-overflow=5 \
-Wstrict-prototypes \
-Winline \
-Wundef \
-Wnested-externs \
-Wcast-qual \
-Wshadow \
-Wlogical-op \
-Wfloat-equal \
-Wredundant-decls \
-Wold-style-definition \
-fno-omit-frame-pointer \
-ffloat-store \
-fno-common \
-fstrict-aliasing \
-ggdb3 \
-g \
-pipe \
-O2 \
-flto
all: build
run:
./cce -f cache
run-json: build
./cce -j -f cache
build:
gcc $(CFLAGS) cmus-cache-extract.c -o cce
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment