Skip to content

Instantly share code, notes, and snippets.

@smoser
Last active May 23, 2025 09:56
Show Gist options
  • Save smoser/083195bbc2a5ddc9517bdee2d8e4dd9e to your computer and use it in GitHub Desktop.
Save smoser/083195bbc2a5ddc9517bdee2d8e4dd9e to your computer and use it in GitHub Desktop.
apksql - populate sql database with apk data

apksql - populate sql database with apk data

package main
import (
"archive/tar"
"compress/gzip"
"crypto/sha256"
"database/sql"
"encoding/hex"
"fmt"
"io"
"log"
"os"
"strings"
_ "modernc.org/sqlite"
)
var fileTypeMap = map[byte]string{
tar.TypeReg: "file",
tar.TypeRegA: "file",
tar.TypeDir: "dir",
tar.TypeSymlink: "symlink",
tar.TypeLink: "hardlink",
tar.TypeChar: "char",
tar.TypeBlock: "block",
tar.TypeFifo: "fifo",
// tar.TypeSocket: "socket",
}
const schema = `
CREATE TABLE IF NOT EXISTS origins (
id INTEGER PRIMARY KEY AUTOINCREMENT,
origin TEXT NOT NULL,
version TEXT NOT NULL,
license TEXT,
melange_yaml TEXT
);
CREATE TABLE IF NOT EXISTS packages (
id INTEGER PRIMARY KEY AUTOINCREMENT,
origin_id INTEGER NOT NULL,
name TEXT NOT NULL,
arch TEXT,
size INTEGER,
builddate TEXT,
gitcommit TEXT,
description TEXT,
FOREIGN KEY (origin_id) REFERENCES origins(id)
);
CREATE TABLE IF NOT EXISTS provides (
id INTEGER PRIMARY KEY AUTOINCREMENT,
package_id INTEGER NOT NULL,
value TEXT NOT NULL,
FOREIGN KEY (package_id) REFERENCES packages(id)
);
CREATE TABLE IF NOT EXISTS depends (
id INTEGER PRIMARY KEY AUTOINCREMENT,
package_id INTEGER NOT NULL,
value TEXT NOT NULL,
FOREIGN KEY (package_id) REFERENCES packages(id)
);
-- Files with metadata and links to packages and types
CREATE TABLE IF NOT EXISTS files (
id INTEGER PRIMARY KEY AUTOINCREMENT,
package_id INTEGER NOT NULL,
path TEXT NOT NULL,
ftype TEXT NOT NULL,
size INTEGER,
sha256 TEXT,
mode INTEGER,
uid TEXT,
gid TEXT,
target TEXT, -- for symlinks, hardlinks
FOREIGN KEY (package_id) REFERENCES packages(id)
);
CREATE TABLE IF NOT EXISTS xattrs (
file_id INTEGER NOT NULL,
key TEXT NOT NULL,
value TEXT NOT NULL,
FOREIGN KEY (file_id) REFERENCES files(id)
);
`
func main() {
if len(os.Args) != 3 {
log.Fatalf("Usage: %s <apk-file> <sqlite-db>", os.Args[0])
}
apkPath := os.Args[1]
dbPath := os.Args[2]
db, err := sql.Open("sqlite", dbPath)
if err != nil {
log.Fatalf("open DB: %v", err)
}
_, err = db.Exec("PRAGMA busy_timeout = 5000;")
if err != nil {
log.Fatalf("pragma failed: %v", err)
}
defer db.Close()
_, err = db.Exec(schema)
if err != nil {
log.Fatalf("failed init %s: %v", dbPath, err)
}
err = processApk(apkPath, db)
if err != nil {
log.Fatalf("process APK: %v", err)
}
}
func processApk(apkPath string, db *sql.DB) error {
f, err := os.Open(apkPath)
if err != nil {
return fmt.Errorf("open apk: %w", err)
}
defer f.Close()
gzf, err := gzip.NewReader(f)
if err != nil {
return fmt.Errorf("gunzip apk: %w", err)
}
defer gzf.Close()
tarReader := tar.NewReader(gzf)
var pkginfo string
var melangeYaml string
var originID, pkgID int64
var meta map[string]string
tx, err := db.Begin()
if err != nil {
return fmt.Errorf("begin failed: %w", err)
}
success := false
defer func() {
if !success {
tx.Rollback()
}
}()
for {
hdr, err := tarReader.Next()
if err == io.EOF {
break
}
if err != nil {
return fmt.Errorf("read tar: %w", err)
}
switch hdr.Name {
case ".PKGINFO":
data, err := io.ReadAll(tarReader)
if err != nil {
return fmt.Errorf("read .PKGINFO: %w", err)
}
pkginfo = string(data)
meta = parsePkgInfo(pkginfo)
log.Printf("looking for %s@%s", meta["origin"], meta["pkgver"])
if err := db.QueryRow("SELECT id from origins WHERE origin = ? AND version = ?",
meta["origin"], meta["pkgver"]).Scan(&originID); err != nil {
if err != sql.ErrNoRows {
return fmt.Errorf("select origin failed: %w", err)
}
// Insert origin
res, err := tx.Exec(`INSERT INTO origins (origin, version, license, melange_yaml) VALUES (?, ?, ?, ?)`,
meta["origin"], meta["pkgver"], meta["license"], melangeYaml)
if err != nil {
return fmt.Errorf("insert origin: %w", err)
}
originID, _ = res.LastInsertId()
}
log.Printf("checking %d/%s", originID, meta["pkgname"])
if err := db.QueryRow("SELECT id from packages WHERE origin_id = ? AND name = ?",
originID, meta["pkgname"]).Scan(&pkgID); err != nil {
if err != sql.ErrNoRows {
return fmt.Errorf("select packages failed: %w", err)
}
// Insert package
res, err := tx.Exec(`INSERT INTO packages
(origin_id, name, arch, description, size, builddate, gitcommit)
VALUES (?, ?, ?, ?, ?, ?, ?)`,
originID, meta["pkgname"], meta["arch"], meta["pkgdesc"], meta["size"], meta["builddate"],
meta["commit"])
if err != nil {
return fmt.Errorf("insert package: %w", err)
}
pkgID, _ = res.LastInsertId()
} else {
log.Printf("%s/%s already present", meta["pkgname"], meta["pkgver"])
return nil
}
log.Printf("inserting %s/%s", meta["pkgname"], meta["pkgver"])
// Insert depends
for _, dep := range metaList(pkginfo, "depend") {
_, err := tx.Exec(`INSERT INTO depends (package_id, value) VALUES (?, ?)`, pkgID, dep)
if err != nil {
return fmt.Errorf("insert depends: %w", err)
}
}
// Insert provides
for _, prov := range metaList(pkginfo, "provides") {
_, err := tx.Exec(`INSERT INTO provides (package_id, value) VALUES (?, ?)`, pkgID, prov)
if err != nil {
return fmt.Errorf("insert provides: %w", err)
}
}
case ".melange.yaml":
data, err := io.ReadAll(tarReader)
if err != nil {
return fmt.Errorf("read melange.yaml: %w", err)
}
melangeYaml = string(data)
if originID == 0 {
return fmt.Errorf("Did not find .PKGINFO before melange.yaml")
}
_, err = tx.Exec(`UPDATE origins set melange_yaml = ? where id = ?`, melangeYaml, originID)
if err != nil {
return fmt.Errorf("insert melange_yaml failed: %w", err)
}
default:
var shaSum string
if hdr.Typeflag == tar.TypeReg || hdr.Typeflag == tar.TypeRegA {
hasher := sha256.New()
if _, err := io.Copy(hasher, tarReader); err != nil {
return fmt.Errorf("hash file: %w", err)
}
shaSum = hex.EncodeToString(hasher.Sum(nil))
}
ftype := ""
switch hdr.Typeflag {
case tar.TypeReg:
ftype = "file"
case tar.TypeRegA:
ftype = "file"
case tar.TypeDir:
ftype = "dir"
case tar.TypeSymlink:
ftype = "symlink"
case tar.TypeLink:
ftype = "hardlink"
case tar.TypeChar:
ftype = "char"
case tar.TypeBlock:
ftype = "block"
case tar.TypeFifo:
ftype = "fifo"
default:
ftype = "unknown-%s"
}
if ftype == "dir" {
continue
}
res, err := tx.Exec(`
INSERT INTO files (
path, package_id, ftype, size, sha256, mode, uid, gid, target)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
hdr.Name,
pkgID,
ftype,
hdr.Size,
shaSum,
hdr.Mode,
hdr.Uid,
hdr.Gid,
hdr.Linkname,
)
if err != nil {
return fmt.Errorf("insert file: %w", err)
}
fileID, _ := res.LastInsertId()
prefix := "SCHILY.xattr."
log.Printf("got pax records: %v", hdr.PAXRecords)
for k, val := range hdr.PAXRecords {
if !strings.HasPrefix(k, prefix) {
continue
}
key := strings.SplitN(k[len(prefix):], "=", 2)[0]
_, err := tx.Exec(`
INSERT INTO xattrs (file_id, key, value) VALUES (?, ?, ?)`,
fileID, key, val)
if err != nil {
return fmt.Errorf("could not add xattr fileID=%d %s=%s: %v", fileID, key, val, err)
}
}
}
}
if err := tx.Commit(); err != nil {
return fmt.Errorf("failed to commit %s: %w", apkPath, err)
}
success = true
log.Printf("Processed APK: %s %s/%s-%s", apkPath, meta["origin"], meta["pkgname"], meta["pkgver"])
return nil
}
func getFileTypeID(db *sql.DB, flag byte) (int, error) {
name := fileTypeMap[flag]
var id int
err := db.QueryRow(`SELECT id FROM ftypes WHERE name = ?`, name).Scan(&id)
if err != nil {
return 0, fmt.Errorf("lookup ftype %q: %w", name, err)
}
return id, nil
}
func parsePkgInfo(content string) map[string]string {
meta := make(map[string]string)
lines := strings.Split(content, "\n")
for _, line := range lines {
if parts := strings.SplitN(line, " = ", 2); len(parts) == 2 {
meta[strings.ToLower(parts[0])] = parts[1]
}
}
return meta
}
func metaList(content, key string) []string {
var values []string
lines := strings.Split(content, "\n")
prefix := key + " = "
for _, line := range lines {
if strings.HasPrefix(line, prefix) {
values = append(values, strings.TrimPrefix(line, prefix))
}
}
return values
}
#!/bin/sh
#
pkgs="fping-5.1-r0.apk
fping-5.1-r1.apk
fping-doc-5.1-r0.apk
fping-doc-5.1-r1.apk
fping-5.2-r0.apk
fping-doc-5.2-r0.apk
fping-5.2-r1.apk
fping-doc-5.2-r1.apk
fping-doc-5.3-r0.apk
fping-5.3-r0.apk
fping-doc-5.3-r40.apk
fping-5.3-r40.apk
"
dl() {
local url="$1" out="$2"
curl -sL "$url" > "$out.$$" && mv "$out.$$" "$out" ||
{ rm -f "$out.$$"; return 1; }
}
mkdir -p apks || { echo "cant mkdir apks"; exit 1; }
bburl="https://packages.wolfi.dev/os/"
for arch in x86_64 aarch64; do
burl=$bburl/$arch
mkdir -p "apks/$arch"
for pkg in $pkgs; do
f="apks/$arch/$pkg"
[ -e "$f" ] && { echo "$f - existed"; continue; }
echo "$f"
dl "$burl/$pkg" "$f" || { echo "failed $f"; exit 1; }
done
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment