Last active
January 6, 2025 17:14
-
-
Save AspireOne/5b05c5ef5dfb0afbd857486e882f846e to your computer and use it in GitHub Desktop.
One-liners for checking text statistics inside of files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# All command are inside a /src directory and excluding node_modules and files starting with a dot. | |
# Get amount of characters | |
(Get-ChildItem -Path ./src -Recurse -File | Where-Object { $_.FullName -notmatch '\\node_modules\\' -and $_.Name -notmatch '^\.' -and $_.Directory.Name -notmatch '^\.' } | ForEach-Object { (Get-Content $_.FullName -Raw).Length } | Measure-Object -Sum).Sum | |
# Get amount of OpenAI tokens (based on 1 token ≈ 4.325 characters, which I confirmed to be roughly accurate) | |
[math]::Round(((Get-ChildItem -Path ./src -Recurse -File | Where-Object { $_.FullName -notmatch '\\node_modules\\' -and $_.Name -notmatch '^\.' -and $_.Directory.Name -notmatch '^\.' } | ForEach-Object { (Get-Content $_.FullName -Raw).Length } | Measure-Object -Sum).Sum * 0.23129), 0) | |
# Ge amount of lines | |
(Get-ChildItem -Path ./src -Recurse -File | Where-Object { $_.FullName -notmatch '\\node_modules\\' -and $_.Name -notmatch '^\.' -and $_.Directory.Name -notmatch '^\.' } | ForEach-Object { (Get-Content $_.FullName | Where-Object { $_.Trim() }).Count } | Measure-Object -Sum).Sum | |
# Count total files | |
(Get-ChildItem -Path ./src -Recurse -File | Where-Object { $_.FullName -notmatch '\\node_modules\\' -and $_.Name -notmatch '^\.' -and $_.Directory.Name -notmatch '^\.' }).Count | |
# Get total size in MB | |
[math]::Round((Get-ChildItem -Path ./src -Recurse -File | Where-Object { $_.FullName -notmatch '\\node_modules\\' -and $_.Name -notmatch '^\.' -and $_.Directory.Name -notmatch '^\.' } | Measure-Object -Property Length -Sum).Sum / 1MB, 2) | |
# Count files by extension | |
Get-ChildItem -Path ./src -Recurse -File | Where-Object { $_.FullName -notmatch '\\node_modules\\' -and $_.Name -notmatch '^\.' -and $_.Directory.Name -notmatch '^\.' } | Group-Object Extension -NoElement | Sort-Object Count -Descending | |
# Get average lines per file | |
[math]::Round((Get-ChildItem -Path ./src -Recurse -File | Where-Object { $_.FullName -notmatch '\\node_modules\\' -and $_.Name -notmatch '^\.' -and $_.Directory.Name -notmatch '^\.' } | ForEach-Object { (Get-Content $_.FullName).Count } | Measure-Object -Average).Average, 2) | |
# Find largest files (top 5) | |
Get-ChildItem -Path ./src -Recurse -File | Where-Object { $_.FullName -notmatch '\\node_modules\\' -and $_.Name -notmatch '^\.' -and $_.Directory.Name -notmatch '^\.' } | Sort-Object Length -Descending | Select-Object -First 5 Name,@{N='SizeKB';E={[math]::Round($_.Length/1KB,2)}} | |
# Count files modified in last 30 days | |
(Get-ChildItem -Path ./src -Recurse -File | Where-Object { $_.FullName -notmatch '\\node_modules\\' -and $_.Name -notmatch '^\.' -and $_.Directory.Name -notmatch '^\.' -and $_.LastWriteTime -gt (Get-Date).AddDays(-30) }).Count | |
####### BONUS: ####### | |
# Copy the content of all files to clipboard, and encapsulate each file/content into a xml tag: | |
(Get-ChildItem -Path . -Directory -Recurse | Where-Object { $_.FullName -notmatch '(^|\\)(node_modules|dist|build|coverage|\.[^\\]*)($|\\)' -and $_.Name -notmatch '^\.' } | Get-ChildItem -File | Where-Object { $_.Name -notmatch '^\.' -and $_.Name -notmatch '^(package-lock\.json|yarn\.lock)$' -and $_.Length -lt 512000 -and $_.Extension -match '\.(txt|js|jsx|ts|tsx|md|yml|yaml|json|xml|html|css|scss|less|sh|bash|zsh|ps1|sql|graphql|vue|svelte|php|py|rb|java|cs|go|rs|dart|swift|kt|h|c|cpp|hpp|jsx?|tsx?)$' } | ForEach-Object { try { $null = [System.IO.File]::ReadAllText($_.FullName); $isText = $true } catch { $isText = $false }; if ($isText) { $tagName = ($_.FullName -replace [regex]::Escape((Get-Item .).FullName + "\"), "") -replace "[\\\/]", "-" -replace "[^a-zA-Z0-9\-_]", "" -replace "^-", "" -replace "\.ts$|\.js$|\.tsx$|\.jsx$", ""; $path = ($_.FullName -replace [regex]::Escape((Get-Item .).FullName + "\"), "") -replace "\\", "/"; "`n<file-$tagName path=`"$path`">`n" + (Get-Content $_.FullName -Raw) + "`n</file-$tagName>`n" } }) -join "" | Set-Clipboard |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
(Get-ChildItem -Path . -Directory -Recurse | Where-Object { $.FullName -notmatch '(^|\)(node_modules|dist|build|coverage|.[^\\]*)($|\)' -and $.Name -notmatch '^.' } | Get-ChildItem -File | Where-Object { $.Name -notmatch '^.' -and $.Name -notmatch '^(package-lock.json|yarn.lock)$' -and $.Length -lt 512000 -and $.Extension -match '.(txt|js|jsx|ts|tsx|md|yml|yaml|json|xml|html|css|scss|less|sh|bash|zsh|ps1|sql|graphql|vue|svelte|php|py|rb|java|cs|go|rs|dart|swift|kt|h|c|cpp|hpp|jsx?|tsx?)$' } | ForEach-Object { try {$null = [System.IO.File]::ReadAllText($ .FullName); $isText = $true } catch { $isText = $false }; if ($isText) { $tagName = ($.FullName -replace [regex]::Escape((Get-Item .).FullName + ""), "") -replace "[\/]", "-" -replace "[^a-zA-Z0-9\-_]", "" -replace "^-", "" -replace ".ts$|.js$|.tsx$|.jsx$", ""; $path = ($ .FullName -replace [regex]::Escape((Get-Item .).FullName + ""), "") -replace "\", "/"; "
n<file-$tagName path=
"$path">
n" + (Get-Content $.FullName -Raw) + "n</file-$tagName>
n" } }) -join "" | Set-Clipboard