Last active
March 9, 2024 14:08
-
-
Save eugrus/152ef8382d2551f5dcbf7a834148ed48 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Set error reporting | |
Set-StrictMode -Version latest | |
# Check if the required arguments are provided | |
if ($args.Count -ne 2) { | |
Write-Host "Usage: searchindocs.ps1 <SearchedString> <SearchPath>" | |
exit | |
} | |
# Retrieve command line arguments | |
$findtext = $args[0] | |
$path = $args[1] | |
# COM object for Word | |
$application = New-Object -ComObject Word.Application | |
$application.Visible = $false | |
$charactersAround = 30 | |
$results = @() | |
Function GetStringMatch { | |
# Get all Word document files in the specified directory and subdirectories | |
$files = Get-ChildItem $path -Include *.docx, *.doc, *.odt, *.rtf -Recurse | Where-Object { -not $_.PSIsContainer } | |
# Loop through each Word document | |
foreach ($file in $files) { | |
$document = $application.Documents.Open($file.FullName, $true, $false) | |
$range = $document.Content | |
# Skip password protected documents | |
if (!$document.HasPassword) { | |
# Search for the word and extract surrounding text | |
if ($range.Text -match ".{$charactersAround}$findtext.{$charactersAround}") { | |
Write-Host "File: $($file.FullName)" | |
Write-Host "Match: $findtext" | |
Write-Host "TextAround: $($Matches[0])" | |
Write-Host | |
} | |
} | |
$document.Close() | |
} | |
} | |
GetStringMatch |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Check if the required arguments are provided | |
if [ "$#" -ne 2 ]; then | |
echo "Usage: searchindocs.sh <SearchedString> <SearchPath>" | |
exit 1 | |
fi | |
findtext="$1" | |
path="$2" | |
charactersAround=30 | |
# Loop through each ODT document | |
while IFS= read -r -d '' file; do | |
# Extract text from the ODT document and search for the string | |
text=$(libreoffice --headless --cat "$file") | |
if [[ $text =~ .{${charactersAround}}$findtext.{${charactersAround}} ]]; then | |
echo "File: $file" | |
echo "Match: $findtext" | |
echo "TextAround: ${BASH_REMATCH[0]}" | |
echo | |
fi | |
done < <(find "$path" -type f -name "*.odt" -print0) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment