Skip to content

Instantly share code, notes, and snippets.

@andreabedini
Last active February 12, 2025 08:31
Show Gist options
  • Save andreabedini/d103042d34308af4dafe274f0055dd47 to your computer and use it in GitHub Desktop.
Save andreabedini/d103042d34308af4dafe274f0055dd47 to your computer and use it in GitHub Desktop.
#!/usr/bin/env -S awk -f walkarray.awk -f getopt.awk --exec
# ghc-pkg.awk: Inspect GHC package information files.
#
# This script parses GHC package description files (InstalledPackageInfo files)
# and extracts information about packages. It supports specifying package
# databases to search, listing specific fields, and recursively processing
# dependencies.
#
# Usage: ghc-pkg.awk [options] <unit-id>...
#
# Options:
# --package-db <path> Add a package database to the search path.
# --field <field> Output only the specified field(s). Can be given
# multiple times.
# --package-id
# --recurse Recursively process dependencies.
# --help Display this help message.
#
# Function: find_package_conf(unitid, paths, pkgroots)
#
# Finds the .conf file for a given unit ID in the specified package databases.
#
# Args:
# unitid: The unit ID to search for.
# paths: An array of package database paths.
# pkgroots: An array to store the unit's packagedb.
#
# Returns:
# The full path to the .conf file if found, or exits with an error if not.
function find_package_conf(unitid) {
for (i in package_dbs) {
fn = package_dbs[i] "/" unitid ".conf"
if (getline junk <fn > 0) {
close(fn) # Important: Close the file after use.
# The pkgroot is the directory containing the package database.
pkgroot = package_dbs[i]
gsub("/[^/]*$", "", pkgroot) # Remove the last path element
pkgroots[unitid] = pkgroot
return fn
}
}
print unitid, "not found" > "/dev/stderr"
exit 1
}
function usage() {
print "Usage: ghc-pkg-inspect.awk [options]"
print ""
print "Options:"
print " --package-db <path> Add a package database to the search path."
print " --package-id <id> "
print " --field <field> Output only the specified field(s). Can be given"
print " multiple times."
print " --recurse Recursively process dependencies."
print " --simple-output Output only the field values and not the names."
print " --help Display this help message."
}
BEGIN {
Opterr = 1 # Enable error messages from getopt
# Initialize options
recurse = 0
simple_output = 0
delete fields
fields_count = 0
delete package_dbs
package_dbs_count = 0
delete package_ids
package_ids_count = 0
delete scheduled
scheduled_count = 0
delete pkgroots
# Define command-line options
_myshortopts = ""
_mylongopts = "field:,recurse,simple-output,clear-package-db,global-package-db,package-db:,package-id:,help"
ghc = "ghc"
# Parse command-line options
while ((_go_c = getopt(ARGC, ARGV, _myshortopts, _mylongopts)) != -1) {
switch(_go_c) {
case "field":
fields[fields_count++] = Optarg
ARGC -= 2
break
case "clear-package-db":
delete package_dbs
package_dbs_count = 0
ARGC -= 1
break
case "global-package-db":
(ghc " --print-global-package-db") | getline globaldb
package_dbs[package_dbs_count++] = globaldb
ARGC -= 1
break
case "package-db":
package_dbs[package_dbs_count++] = Optarg
ARGC -= 2
break
case "package-id":
package_ids[package_ids_count++] = Optarg
ARGC -= 2
break
case "recurse":
ARGC -= 1
recurse = 1
break
case "help":
usage()
exit 0
break # Added for clarity
default:
print "Unknown option:", _go_c > "/dev/stderr"
exit 1
}
}
# Resolved unit IDs into file paths
for (i in package_ids) {
unit_id = package_ids[i]
scheduled[unit_id] = 1
scheduled_count ++
ARGV[ARGC++] = find_package_conf(unit_id)
}
if (package_dbs_count == 0 || scheduled_count == 0) { # Check for missing args
print "Specify --package-db and unit-id(s)." > "/dev/stderr"
usage()
exit 1
}
}
BEGIN {
FS = ":\\s*" # Field separator for the .conf files
delete graph
delete graph_key_order
}
# Process InstalledPackageInfo files.
BEGINFILE {
in_layout = 0 # Flag for multi-line values
delete ipi # Clear ipi for each file (important!)
key_order = ""
}
# Match lines with key-value pairs.
/^[a-z\-_]+:/ {
in_layout = 0
key = $1
key_order = (key_order ? key_order " " : " ") key
if ($2 != "") {
ipi[key] = $2
} else {
in_layout = 1 # Set flag for multi-line values
next # Skip to the next line
}
}
# Handle multi-line values.
in_layout == 1 {
sub(/^[ \t]+/, "") # Remove leading whitespace
if ($0) { # Check for non-empty lines
ipi[key] = (ipi[key] ? ipi[key] " " : "") $0 # Concatenate or assign
}
}
ENDFILE {
unit_id = ipi["id"]
ipi["pkgroot"] = pkgroots[unit_id]
split(key_order, graph_key_order[unit_id], " ")
# Store in the dependency graph
for (key in ipi) {
graph[unit_id][key] = ipi[key]
}
# Append dependencies in the list of files process
if (recurse) {
split(ipi["depends"], depends, " ")
for (i in depends) {
schedule(depends[i])
}
}
}
function schedule(unit_id) {
if (!(unit_id in scheduled)) {
scheduled[unit_id] = 1
scheduled_count++
ARGV[ARGC++] = find_package_conf(unit_id)
}
}
function topological_sort(node) {
visited[node] = 1
for (n in graph) {
split(graph[n]["depends"], depends, " ")
for (dep in depends) {
m = depends[dep]
if (m == node && !(n in visited)) {
topological_sort(n)
}
}
}
sorted[length(sorted)] = node
}
END {
delete sorted
delete visited
for (node in graph) {
if (!(node in visited)) {
topological_sort(node)
}
}
# Output package information.
for (i in sorted) {
unit_id = sorted[i]
print_output(graph[unit_id])
}
}
function print_output(ipi) {
unit_id = ipi["id"]
pkgroot = pkgroots[unit_id] # Path to the package database where this unit was found.
if (length(fields) > 0) {
for (i in fields) {
key = fields[i]
gsub(/\${pkgroot}/, pkgroot, ipi[key])
printf "%s: %s\n", key, ipi[key]
}
} else {
for (i in graph_key_order[unit_id]) {
key = graph_key_order[unit_id][i]
gsub(/\${pkgroot}/, pkgroot, ipi[key])
printf "%s: %s\n", key, ipi[key]
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment