Last active
May 23, 2025 09:56
-
-
Save smoser/083195bbc2a5ddc9517bdee2d8e4dd9e to your computer and use it in GitHub Desktop.
apksql - populate sql database with apk data
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"archive/tar" | |
"compress/gzip" | |
"crypto/sha256" | |
"database/sql" | |
"encoding/hex" | |
"fmt" | |
"io" | |
"log" | |
"os" | |
"strings" | |
_ "modernc.org/sqlite" | |
) | |
var fileTypeMap = map[byte]string{ | |
tar.TypeReg: "file", | |
tar.TypeRegA: "file", | |
tar.TypeDir: "dir", | |
tar.TypeSymlink: "symlink", | |
tar.TypeLink: "hardlink", | |
tar.TypeChar: "char", | |
tar.TypeBlock: "block", | |
tar.TypeFifo: "fifo", | |
// tar.TypeSocket: "socket", | |
} | |
const schema = ` | |
CREATE TABLE IF NOT EXISTS origins ( | |
id INTEGER PRIMARY KEY AUTOINCREMENT, | |
origin TEXT NOT NULL, | |
version TEXT NOT NULL, | |
license TEXT, | |
melange_yaml TEXT | |
); | |
CREATE TABLE IF NOT EXISTS packages ( | |
id INTEGER PRIMARY KEY AUTOINCREMENT, | |
origin_id INTEGER NOT NULL, | |
name TEXT NOT NULL, | |
arch TEXT, | |
size INTEGER, | |
builddate TEXT, | |
gitcommit TEXT, | |
description TEXT, | |
FOREIGN KEY (origin_id) REFERENCES origins(id) | |
); | |
CREATE TABLE IF NOT EXISTS provides ( | |
id INTEGER PRIMARY KEY AUTOINCREMENT, | |
package_id INTEGER NOT NULL, | |
value TEXT NOT NULL, | |
FOREIGN KEY (package_id) REFERENCES packages(id) | |
); | |
CREATE TABLE IF NOT EXISTS depends ( | |
id INTEGER PRIMARY KEY AUTOINCREMENT, | |
package_id INTEGER NOT NULL, | |
value TEXT NOT NULL, | |
FOREIGN KEY (package_id) REFERENCES packages(id) | |
); | |
-- Files with metadata and links to packages and types | |
CREATE TABLE IF NOT EXISTS files ( | |
id INTEGER PRIMARY KEY AUTOINCREMENT, | |
package_id INTEGER NOT NULL, | |
path TEXT NOT NULL, | |
ftype TEXT NOT NULL, | |
size INTEGER, | |
sha256 TEXT, | |
mode INTEGER, | |
uid TEXT, | |
gid TEXT, | |
target TEXT, -- for symlinks, hardlinks | |
FOREIGN KEY (package_id) REFERENCES packages(id) | |
); | |
CREATE TABLE IF NOT EXISTS xattrs ( | |
file_id INTEGER NOT NULL, | |
key TEXT NOT NULL, | |
value TEXT NOT NULL, | |
FOREIGN KEY (file_id) REFERENCES files(id) | |
); | |
` | |
func main() { | |
if len(os.Args) != 3 { | |
log.Fatalf("Usage: %s <apk-file> <sqlite-db>", os.Args[0]) | |
} | |
apkPath := os.Args[1] | |
dbPath := os.Args[2] | |
db, err := sql.Open("sqlite", dbPath) | |
if err != nil { | |
log.Fatalf("open DB: %v", err) | |
} | |
_, err = db.Exec("PRAGMA busy_timeout = 5000;") | |
if err != nil { | |
log.Fatalf("pragma failed: %v", err) | |
} | |
defer db.Close() | |
_, err = db.Exec(schema) | |
if err != nil { | |
log.Fatalf("failed init %s: %v", dbPath, err) | |
} | |
err = processApk(apkPath, db) | |
if err != nil { | |
log.Fatalf("process APK: %v", err) | |
} | |
} | |
func processApk(apkPath string, db *sql.DB) error { | |
f, err := os.Open(apkPath) | |
if err != nil { | |
return fmt.Errorf("open apk: %w", err) | |
} | |
defer f.Close() | |
gzf, err := gzip.NewReader(f) | |
if err != nil { | |
return fmt.Errorf("gunzip apk: %w", err) | |
} | |
defer gzf.Close() | |
tarReader := tar.NewReader(gzf) | |
var pkginfo string | |
var melangeYaml string | |
var originID, pkgID int64 | |
var meta map[string]string | |
tx, err := db.Begin() | |
if err != nil { | |
return fmt.Errorf("begin failed: %w", err) | |
} | |
success := false | |
defer func() { | |
if !success { | |
tx.Rollback() | |
} | |
}() | |
for { | |
hdr, err := tarReader.Next() | |
if err == io.EOF { | |
break | |
} | |
if err != nil { | |
return fmt.Errorf("read tar: %w", err) | |
} | |
switch hdr.Name { | |
case ".PKGINFO": | |
data, err := io.ReadAll(tarReader) | |
if err != nil { | |
return fmt.Errorf("read .PKGINFO: %w", err) | |
} | |
pkginfo = string(data) | |
meta = parsePkgInfo(pkginfo) | |
log.Printf("looking for %s@%s", meta["origin"], meta["pkgver"]) | |
if err := db.QueryRow("SELECT id from origins WHERE origin = ? AND version = ?", | |
meta["origin"], meta["pkgver"]).Scan(&originID); err != nil { | |
if err != sql.ErrNoRows { | |
return fmt.Errorf("select origin failed: %w", err) | |
} | |
// Insert origin | |
res, err := tx.Exec(`INSERT INTO origins (origin, version, license, melange_yaml) VALUES (?, ?, ?, ?)`, | |
meta["origin"], meta["pkgver"], meta["license"], melangeYaml) | |
if err != nil { | |
return fmt.Errorf("insert origin: %w", err) | |
} | |
originID, _ = res.LastInsertId() | |
} | |
log.Printf("checking %d/%s", originID, meta["pkgname"]) | |
if err := db.QueryRow("SELECT id from packages WHERE origin_id = ? AND name = ?", | |
originID, meta["pkgname"]).Scan(&pkgID); err != nil { | |
if err != sql.ErrNoRows { | |
return fmt.Errorf("select packages failed: %w", err) | |
} | |
// Insert package | |
res, err := tx.Exec(`INSERT INTO packages | |
(origin_id, name, arch, description, size, builddate, gitcommit) | |
VALUES (?, ?, ?, ?, ?, ?, ?)`, | |
originID, meta["pkgname"], meta["arch"], meta["pkgdesc"], meta["size"], meta["builddate"], | |
meta["commit"]) | |
if err != nil { | |
return fmt.Errorf("insert package: %w", err) | |
} | |
pkgID, _ = res.LastInsertId() | |
} else { | |
log.Printf("%s/%s already present", meta["pkgname"], meta["pkgver"]) | |
return nil | |
} | |
log.Printf("inserting %s/%s", meta["pkgname"], meta["pkgver"]) | |
// Insert depends | |
for _, dep := range metaList(pkginfo, "depend") { | |
_, err := tx.Exec(`INSERT INTO depends (package_id, value) VALUES (?, ?)`, pkgID, dep) | |
if err != nil { | |
return fmt.Errorf("insert depends: %w", err) | |
} | |
} | |
// Insert provides | |
for _, prov := range metaList(pkginfo, "provides") { | |
_, err := tx.Exec(`INSERT INTO provides (package_id, value) VALUES (?, ?)`, pkgID, prov) | |
if err != nil { | |
return fmt.Errorf("insert provides: %w", err) | |
} | |
} | |
case ".melange.yaml": | |
data, err := io.ReadAll(tarReader) | |
if err != nil { | |
return fmt.Errorf("read melange.yaml: %w", err) | |
} | |
melangeYaml = string(data) | |
if originID == 0 { | |
return fmt.Errorf("Did not find .PKGINFO before melange.yaml") | |
} | |
_, err = tx.Exec(`UPDATE origins set melange_yaml = ? where id = ?`, melangeYaml, originID) | |
if err != nil { | |
return fmt.Errorf("insert melange_yaml failed: %w", err) | |
} | |
default: | |
var shaSum string | |
if hdr.Typeflag == tar.TypeReg || hdr.Typeflag == tar.TypeRegA { | |
hasher := sha256.New() | |
if _, err := io.Copy(hasher, tarReader); err != nil { | |
return fmt.Errorf("hash file: %w", err) | |
} | |
shaSum = hex.EncodeToString(hasher.Sum(nil)) | |
} | |
ftype := "" | |
switch hdr.Typeflag { | |
case tar.TypeReg: | |
ftype = "file" | |
case tar.TypeRegA: | |
ftype = "file" | |
case tar.TypeDir: | |
ftype = "dir" | |
case tar.TypeSymlink: | |
ftype = "symlink" | |
case tar.TypeLink: | |
ftype = "hardlink" | |
case tar.TypeChar: | |
ftype = "char" | |
case tar.TypeBlock: | |
ftype = "block" | |
case tar.TypeFifo: | |
ftype = "fifo" | |
default: | |
ftype = "unknown-%s" | |
} | |
if ftype == "dir" { | |
continue | |
} | |
res, err := tx.Exec(` | |
INSERT INTO files ( | |
path, package_id, ftype, size, sha256, mode, uid, gid, target) | |
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`, | |
hdr.Name, | |
pkgID, | |
ftype, | |
hdr.Size, | |
shaSum, | |
hdr.Mode, | |
hdr.Uid, | |
hdr.Gid, | |
hdr.Linkname, | |
) | |
if err != nil { | |
return fmt.Errorf("insert file: %w", err) | |
} | |
fileID, _ := res.LastInsertId() | |
prefix := "SCHILY.xattr." | |
log.Printf("got pax records: %v", hdr.PAXRecords) | |
for k, val := range hdr.PAXRecords { | |
if !strings.HasPrefix(k, prefix) { | |
continue | |
} | |
key := strings.SplitN(k[len(prefix):], "=", 2)[0] | |
_, err := tx.Exec(` | |
INSERT INTO xattrs (file_id, key, value) VALUES (?, ?, ?)`, | |
fileID, key, val) | |
if err != nil { | |
return fmt.Errorf("could not add xattr fileID=%d %s=%s: %v", fileID, key, val, err) | |
} | |
} | |
} | |
} | |
if err := tx.Commit(); err != nil { | |
return fmt.Errorf("failed to commit %s: %w", apkPath, err) | |
} | |
success = true | |
log.Printf("Processed APK: %s %s/%s-%s", apkPath, meta["origin"], meta["pkgname"], meta["pkgver"]) | |
return nil | |
} | |
func getFileTypeID(db *sql.DB, flag byte) (int, error) { | |
name := fileTypeMap[flag] | |
var id int | |
err := db.QueryRow(`SELECT id FROM ftypes WHERE name = ?`, name).Scan(&id) | |
if err != nil { | |
return 0, fmt.Errorf("lookup ftype %q: %w", name, err) | |
} | |
return id, nil | |
} | |
func parsePkgInfo(content string) map[string]string { | |
meta := make(map[string]string) | |
lines := strings.Split(content, "\n") | |
for _, line := range lines { | |
if parts := strings.SplitN(line, " = ", 2); len(parts) == 2 { | |
meta[strings.ToLower(parts[0])] = parts[1] | |
} | |
} | |
return meta | |
} | |
func metaList(content, key string) []string { | |
var values []string | |
lines := strings.Split(content, "\n") | |
prefix := key + " = " | |
for _, line := range lines { | |
if strings.HasPrefix(line, prefix) { | |
values = append(values, strings.TrimPrefix(line, prefix)) | |
} | |
} | |
return values | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# | |
pkgs="fping-5.1-r0.apk | |
fping-5.1-r1.apk | |
fping-doc-5.1-r0.apk | |
fping-doc-5.1-r1.apk | |
fping-5.2-r0.apk | |
fping-doc-5.2-r0.apk | |
fping-5.2-r1.apk | |
fping-doc-5.2-r1.apk | |
fping-doc-5.3-r0.apk | |
fping-5.3-r0.apk | |
fping-doc-5.3-r40.apk | |
fping-5.3-r40.apk | |
" | |
dl() { | |
local url="$1" out="$2" | |
curl -sL "$url" > "$out.$$" && mv "$out.$$" "$out" || | |
{ rm -f "$out.$$"; return 1; } | |
} | |
mkdir -p apks || { echo "cant mkdir apks"; exit 1; } | |
bburl="https://packages.wolfi.dev/os/" | |
for arch in x86_64 aarch64; do | |
burl=$bburl/$arch | |
mkdir -p "apks/$arch" | |
for pkg in $pkgs; do | |
f="apks/$arch/$pkg" | |
[ -e "$f" ] && { echo "$f - existed"; continue; } | |
echo "$f" | |
dl "$burl/$pkg" "$f" || { echo "failed $f"; exit 1; } | |
done | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment