Skip to content

Instantly share code, notes, and snippets.

@ayghri
Last active August 7, 2023 12:55
Show Gist options
  • Save ayghri/d4dc677590c0ac608e8762b89231b7d1 to your computer and use it in GitHub Desktop.
Save ayghri/d4dc677590c0ac608e8762b89231b7d1 to your computer and use it in GitHub Desktop.
Path reduction algorithms for IDE

Shorten paths for duplicate filenames in an IDE (Neovim)

I was unsatisfied of how the IDE diffirentiates between opened files with the same name. So I came up with this. This was coded in lua with some Neovim API. So it would need some adjustments.

I'm aware that this an opinionated (why not show ? for hidden root folders) solution and there is probably some algorithm out there with a fancy name that does the same thing... so feel free to adjust it to your needs.

I've seen a VSCode solution would do

'a\b\c\d', 'a\f\b\c\d' -> 'a\b\...', '...\f\...'  vs 'a\...', 'f\...' (mine) 
'a\b\a',   'b\b\a'     -> 'a\...', 'b\b\...'      vs   'a\...' , 'b\...'

(Other than their running time of $k n^2$ where n the number of paths and k the average length. Mine is $nk$ that's like 2ms in practice for < 10 paths)

input = {
  ["0"] = "/home/f4",
  ["1"] = "/home/f1/f2",
  ["2"] = "/b2/home/f1/f2",
  ["3"] = "/b3/home/f1/f2",
  ["4"] = "/b1/b2/home/f1/f2",
  ["5"] = "/home/b2/f3",
  ["6"] = "/f3",
  ["7"] = "/home/b3/f3",
  ["8"] = "/b4/lome/f1/f2",
  ["9"] = "/b4/gome/f1/f2"
}

First, I did some endoding to translate the paths to short strings in reverse order:

input = {
  ["0"] = "KE   ",
  ["1"] = "DCE  ",
  ["2"] = "DCEJ ",
  ["3"] = "DCEG ",
  ["4"] = "DCEJI",
  ["5"] = "HJE  ",
  ["6"] = "H    ",
  ["7"] = "HGE  ",
  ["8"] = "DCFA ",
  ["9"] = "DCBA "
}
  • We recursively group by first letter, it the group has one element, then we stop,
  • otherwise we recurse:
    • If a group member reached its end we keep the first element and we go to differentiate between the other elements
    • Since all elements are unique, we reach a level where all groups have 1 element, so the recursion should terminate
-- encoded shorts
{
  firsts = { "D", "I", "K" },
  hash = {
    ["0"] = "K",
    ["1"] = "?B",
    ["2"] = "?BA",
    ["3"] = "?BH",
    ["4"] = "?BAJ",
    ["5"] = "IA",
    ["6"] = "I",
    ["7"] = "IH",
    ["8"] = "?G",
    ["9"] = "?F"
  }
}
-- reduce_paths(input)
{
  ["0"] = "f4",
  ["1"] = "home/?",
  ["2"] = "b2/home/?",
  ["3"] = "b3/home/?",
  ["4"] = "b1/b2/home/?",
  ["5"] = "b2/f3",
  ["6"] = "f3",
  ["7"] = "b3/f3",
  ["8"] = "lome/?",
  ["9"] = "gome/?"
}
local M = {}
-- Map intergers 1... to A....
local function int_to_char(i) return string.char(i + 64) end
local function group_by_first(b_to_s)
local starts = {}
for b, s in pairs(b_to_s) do
if #s == 0 then
error("empty string in hash_by_first" .. vim.inspect(b_to_s))
end
local c = s:sub(1, 1)
if starts[c] == nil then
starts[c] = { b }
else
table.insert(starts[c], b)
end
end
return starts
end
-- encodes paths as strings
local function hash_paths(paths, node_to_char, char_to_node)
local num_nodes = 1
local buf_hash = {}
local max_len = 0
local r
for buf, path in pairs(paths) do
r = ""
path = path .. "/"
for m in path:gmatch("(.-)/") do
if m ~= "" then
if not node_to_char[m] then
node_to_char[m] = int_to_char(num_nodes)
char_to_node[int_to_char(num_nodes)] = m
num_nodes = num_nodes + 1
end
r = node_to_char[m] .. r
end
end
buf_hash[buf] = r
max_len = math.max(#r, max_len)
end
for b, h in pairs(buf_hash) do
buf_hash[b] = h .. string.rep(" ", max_len - #h)
end
return buf_hash
end
-- convert strings back to paths
local function dehash(buf_hash, char_to_node)
local buf_path = {}
for b, h in pairs(buf_hash) do
local rs = h:reverse()
local p = {}
for i = 1, #rs do
if rs:sub(i, i) == "?" then
table.insert(p, "?")
else
table.insert(p, char_to_node[rs:sub(i, i)])
end
end
-- buf_path[b] = "/" .. table.concat(p, "/")
buf_path[b] = table.concat(p, "/")
end
return buf_path
end
-- show_first is you want the immediate parent to always be displayed
local function find_divergence(buf_hash, show_first)
-- group by first
local starts = group_by_first(buf_hash)
-- we use firts to detect the space in the encoding: space => end of the path
local unique = { hash = {}, firsts = {} }
for fr, bufs in pairs(starts) do
table.insert(unique.firsts, fr)
if #bufs == 1 then
local b = bufs[1]
unique.hash[b] = fr
else
local sub_buf_hash = {}
for _, bf in ipairs(bufs) do
sub_buf_hash[bf] = buf_hash[bf]:sub(2)
end
local sub_unique = find_divergence(sub_buf_hash)
local pad = "?"
-- if one of the group elements reached its end, we keep the last path
if vim.tbl_contains(sub_unique.firsts, " ") or show_first then
pad = fr
end
-- we add the pad
for b, u in pairs(sub_unique.hash) do
if u:sub(1, 1) == " " then
unique.hash[b] = pad
else
unique.hash[b] = pad .. u
end
-- collapse multiple ?
unique.hash[b] = unique.hash[b]:gsub("?+", "?")
end
end
end
return unique
end
M.reduce_paths = function(paths)
local node_to_char, char_to_node = {}, {}
local buf_hash = hash_paths(paths, node_to_char, char_to_node)
local unique = find_divergence(buf_hash)
return dehash(unique.hash, char_to_node)
end
return M
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment