Skip to content

Instantly share code, notes, and snippets.

@AspireOne
Last active January 6, 2025 17:14
Show Gist options
  • Save AspireOne/5b05c5ef5dfb0afbd857486e882f846e to your computer and use it in GitHub Desktop.
Save AspireOne/5b05c5ef5dfb0afbd857486e882f846e to your computer and use it in GitHub Desktop.
One-liners for checking text statistics inside of files
# All command are inside a /src directory and excluding node_modules and files starting with a dot.
# Get amount of characters
(Get-ChildItem -Path ./src -Recurse -File | Where-Object { $_.FullName -notmatch '\\node_modules\\' -and $_.Name -notmatch '^\.' -and $_.Directory.Name -notmatch '^\.' } | ForEach-Object { (Get-Content $_.FullName -Raw).Length } | Measure-Object -Sum).Sum
# Get amount of OpenAI tokens (based on 1 token ≈ 4.325 characters, which I confirmed to be roughly accurate)
[math]::Round(((Get-ChildItem -Path ./src -Recurse -File | Where-Object { $_.FullName -notmatch '\\node_modules\\' -and $_.Name -notmatch '^\.' -and $_.Directory.Name -notmatch '^\.' } | ForEach-Object { (Get-Content $_.FullName -Raw).Length } | Measure-Object -Sum).Sum * 0.23129), 0)
# Ge amount of lines
(Get-ChildItem -Path ./src -Recurse -File | Where-Object { $_.FullName -notmatch '\\node_modules\\' -and $_.Name -notmatch '^\.' -and $_.Directory.Name -notmatch '^\.' } | ForEach-Object { (Get-Content $_.FullName | Where-Object { $_.Trim() }).Count } | Measure-Object -Sum).Sum
# Count total files
(Get-ChildItem -Path ./src -Recurse -File | Where-Object { $_.FullName -notmatch '\\node_modules\\' -and $_.Name -notmatch '^\.' -and $_.Directory.Name -notmatch '^\.' }).Count
# Get total size in MB
[math]::Round((Get-ChildItem -Path ./src -Recurse -File | Where-Object { $_.FullName -notmatch '\\node_modules\\' -and $_.Name -notmatch '^\.' -and $_.Directory.Name -notmatch '^\.' } | Measure-Object -Property Length -Sum).Sum / 1MB, 2)
# Count files by extension
Get-ChildItem -Path ./src -Recurse -File | Where-Object { $_.FullName -notmatch '\\node_modules\\' -and $_.Name -notmatch '^\.' -and $_.Directory.Name -notmatch '^\.' } | Group-Object Extension -NoElement | Sort-Object Count -Descending
# Get average lines per file
[math]::Round((Get-ChildItem -Path ./src -Recurse -File | Where-Object { $_.FullName -notmatch '\\node_modules\\' -and $_.Name -notmatch '^\.' -and $_.Directory.Name -notmatch '^\.' } | ForEach-Object { (Get-Content $_.FullName).Count } | Measure-Object -Average).Average, 2)
# Find largest files (top 5)
Get-ChildItem -Path ./src -Recurse -File | Where-Object { $_.FullName -notmatch '\\node_modules\\' -and $_.Name -notmatch '^\.' -and $_.Directory.Name -notmatch '^\.' } | Sort-Object Length -Descending | Select-Object -First 5 Name,@{N='SizeKB';E={[math]::Round($_.Length/1KB,2)}}
# Count files modified in last 30 days
(Get-ChildItem -Path ./src -Recurse -File | Where-Object { $_.FullName -notmatch '\\node_modules\\' -and $_.Name -notmatch '^\.' -and $_.Directory.Name -notmatch '^\.' -and $_.LastWriteTime -gt (Get-Date).AddDays(-30) }).Count
####### BONUS: #######
# Copy the content of all files to clipboard, and encapsulate each file/content into a xml tag:
(Get-ChildItem -Path . -Directory -Recurse | Where-Object { $_.FullName -notmatch '(^|\\)(node_modules|dist|build|coverage|\.[^\\]*)($|\\)' -and $_.Name -notmatch '^\.' } | Get-ChildItem -File | Where-Object { $_.Name -notmatch '^\.' -and $_.Name -notmatch '^(package-lock\.json|yarn\.lock)$' -and $_.Length -lt 512000 -and $_.Extension -match '\.(txt|js|jsx|ts|tsx|md|yml|yaml|json|xml|html|css|scss|less|sh|bash|zsh|ps1|sql|graphql|vue|svelte|php|py|rb|java|cs|go|rs|dart|swift|kt|h|c|cpp|hpp|jsx?|tsx?)$' } | ForEach-Object { try { $null = [System.IO.File]::ReadAllText($_.FullName); $isText = $true } catch { $isText = $false }; if ($isText) { $tagName = ($_.FullName -replace [regex]::Escape((Get-Item .).FullName + "\"), "") -replace "[\\\/]", "-" -replace "[^a-zA-Z0-9\-_]", "" -replace "^-", "" -replace "\.ts$|\.js$|\.tsx$|\.jsx$", ""; $path = ($_.FullName -replace [regex]::Escape((Get-Item .).FullName + "\"), "") -replace "\\", "/"; "`n<file-$tagName path=`"$path`">`n" + (Get-Content $_.FullName -Raw) + "`n</file-$tagName>`n" } }) -join "" | Set-Clipboard
@AspireOne
Copy link
Author

(Get-ChildItem -Path . -Directory -Recurse | Where-Object { $.FullName -notmatch '(^|\)(node_modules|dist|build|coverage|.[^\\]*)($|\)' -and $.Name -notmatch '^.' } | Get-ChildItem -File | Where-Object { $.Name -notmatch '^.' -and $.Name -notmatch '^(package-lock.json|yarn.lock)$' -and $.Length -lt 512000 -and $.Extension -match '.(txt|js|jsx|ts|tsx|md|yml|yaml|json|xml|html|css|scss|less|sh|bash|zsh|ps1|sql|graphql|vue|svelte|php|py|rb|java|cs|go|rs|dart|swift|kt|h|c|cpp|hpp|jsx?|tsx?)$' } | ForEach-Object { try { $null = [System.IO.File]::ReadAllText($.FullName); $isText = $true } catch { $isText = $false }; if ($isText) { $tagName = ($.FullName -replace [regex]::Escape((Get-Item .).FullName + ""), "") -replace "[\/]", "-" -replace "[^a-zA-Z0-9\-_]", "" -replace "^-", "" -replace ".ts$|.js$|.tsx$|.jsx$", ""; $path = ($.FullName -replace [regex]::Escape((Get-Item .).FullName + ""), "") -replace "\", "/"; "n<file-$tagName path="$path">n" + (Get-Content $.FullName -Raw) + "n</file-$tagName>n" } }) -join "" | Set-Clipboard

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment