Last active
April 10, 2025 09:39
-
-
Save bouroo/ae9ad5ca25a1eb8f998e57543b163155 to your computer and use it in GitHub Desktop.
Find files that contains all keywords
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"bufio" | |
"context" | |
"encoding/csv" | |
"fmt" | |
"os" | |
"path/filepath" | |
"runtime" | |
"strings" | |
"sync" | |
"sync/atomic" | |
) | |
// Job represents a task to check a file with a given keyword set. | |
type Job struct { | |
filePath string | |
tableName string | |
operation string | |
keywords map[string]bool | |
} | |
// Result represents the outcome of processing a file. | |
type Result struct { | |
success bool // true if all keywords were found | |
tableName string | |
operation string | |
keywordsList string | |
workflowName string | |
errMsg string | |
} | |
// checkFileContainsKeywords checks if the file contains all the specified keywords. | |
func checkFileContainsKeywords(filePath string, keywords map[string]bool) (bool, error) { | |
file, err := os.Open(filePath) | |
if err != nil { | |
return false, err | |
} | |
defer file.Close() | |
// Clone the keywords map to track the remaining ones. | |
remaining := make(map[string]bool, len(keywords)) | |
for k, v := range keywords { | |
remaining[k] = v | |
} | |
scanner := bufio.NewScanner(file) | |
buf := make([]byte, 64*1024) | |
scanner.Buffer(buf, 10*1024*1024) | |
for scanner.Scan() { | |
line := scanner.Text() | |
// Remove any found keywords from the remaining set. | |
for k := range remaining { | |
if strings.Contains(line, k) { | |
delete(remaining, k) | |
} | |
} | |
// Early exit if no keywords remain. | |
if len(remaining) == 0 { | |
return true, nil | |
} | |
} | |
if err := scanner.Err(); err != nil { | |
return false, err | |
} | |
return len(remaining) == 0, nil | |
} | |
// worker processes jobs from jobCh and sends results to resultCh. | |
func worker(jobCh <-chan Job, resultCh chan<- Result, wg *sync.WaitGroup) { | |
defer wg.Done() | |
for job := range jobCh { | |
contains, err := checkFileContainsKeywords(job.filePath, job.keywords) | |
var wfName string | |
// Derive workflow name if filePath starts with "workflows/" | |
if strings.HasPrefix(job.filePath, "workflows/") { | |
parts := strings.Split(job.filePath, "/") | |
if len(parts) > 1 { | |
wfName = parts[1] | |
} | |
} | |
// Build comma-separated keywords string. | |
var sb strings.Builder | |
for k := range job.keywords { | |
if sb.Len() > 0 { | |
sb.WriteString(", ") | |
} | |
sb.WriteString(strings.TrimSuffix(k, "(")) | |
} | |
res := Result{ | |
tableName: job.tableName, | |
operation: job.operation, | |
keywordsList: sb.String(), | |
workflowName: wfName, | |
} | |
if err != nil { | |
res.errMsg = err.Error() | |
} else { | |
res.success = contains | |
} | |
resultCh <- res | |
} | |
} | |
// addJob creates a Job with the given operation and enqueues it. | |
func addJob(jobCh chan<- Job, filePath, tableName, operation string, baseKeywords map[string]bool) { | |
// Copy the base keyword map | |
keywords := make(map[string]bool, len(baseKeywords)) | |
for k, v := range baseKeywords { | |
keywords[k] = v | |
} | |
// Append the operation-specific keyword (with an opening parenthesis) | |
keywords[operation+"("] = true | |
jobCh <- Job{ | |
filePath: filePath, | |
tableName: tableName, | |
operation: operation, | |
keywords: keywords, | |
} | |
} | |
func main() { | |
// Setup reader for interactive input. | |
reader := bufio.NewReader(os.Stdin) | |
fmt.Print("Enter the directory path: ") | |
dirPath, err := reader.ReadString('\n') | |
if err != nil { | |
fmt.Printf("Error reading directory path: %v\n", err) | |
return | |
} | |
dirPath = strings.TrimSpace(dirPath) | |
fmt.Print("Enter table name: ") | |
tableName, err := reader.ReadString('\n') | |
if err != nil { | |
fmt.Printf("Error reading table name: %v\n", err) | |
return | |
} | |
tableName = strings.TrimSpace(tableName) | |
// Define operations based on the table name. | |
insertOperation := "insert_ngl_" + tableName | |
updateOperation := "update_ngl_" + tableName | |
fmt.Print("Enter keywords separated by commas: ") | |
keywordsInput, err := reader.ReadString('\n') | |
if err != nil { | |
fmt.Printf("Error reading keywords: %v\n", err) | |
return | |
} | |
keywordsInput = strings.TrimSpace(keywordsInput) | |
keywordsSlice := strings.Split(keywordsInput, ",") | |
// Build the base keyword map. | |
baseKeywordMap := make(map[string]bool, len(keywordsSlice)) | |
for _, kw := range keywordsSlice { | |
trimmed := strings.TrimSpace(kw) | |
if trimmed != "" { | |
baseKeywordMap[trimmed] = true | |
} | |
} | |
// Open (or create) the CSV output file. | |
outputFile, err := os.OpenFile("results.csv", os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0644) | |
if err != nil { | |
fmt.Printf("Error opening/creating CSV file: %v\n", err) | |
return | |
} | |
defer outputFile.Close() | |
csvWriter := csv.NewWriter(outputFile) | |
defer csvWriter.Flush() | |
// Write header if the file is empty. | |
if stat, err := outputFile.Stat(); err == nil && stat.Size() == 0 { | |
if err := csvWriter.Write([]string{"Table name", "Operation", "Keywords", "Workflow name"}); err != nil { | |
fmt.Printf("Error writing header to CSV: %v\n", err) | |
return | |
} | |
} else if err != nil { | |
fmt.Printf("Error stating CSV file: %v\n", err) | |
return | |
} | |
// Setup channels. | |
jobCh := make(chan Job, 100) | |
resultCh := make(chan Result, 100) | |
// Start worker goroutines. | |
var workersWg sync.WaitGroup | |
maxWorkers := runtime.NumCPU() | |
for i := 0; i < maxWorkers; i++ { | |
workersWg.Add(1) | |
go worker(jobCh, resultCh, &workersWg) | |
} | |
// Counters. | |
var totalFiles int64 | |
var filesContainingKeywords int64 | |
// Context for directory walk (could be extended for cancellation handling) | |
ctx, cancel := context.WithCancel(context.Background()) | |
defer cancel() | |
// Walk the directory and enqueue jobs. | |
go func() { | |
defer close(jobCh) | |
err := filepath.WalkDir(dirPath, func(path string, d os.DirEntry, err error) error { | |
if err != nil { | |
// Report error for this path but continue walking. | |
fmt.Printf("Error accessing path %s: %v\n", path, err) | |
return nil | |
} | |
// Skip directories. | |
if d.IsDir() { | |
return nil | |
} | |
atomic.AddInt64(&totalFiles, 1) | |
// Enqueue jobs for both operations. | |
addJob(jobCh, path, tableName, insertOperation, baseKeywordMap) | |
addJob(jobCh, path, tableName, updateOperation, baseKeywordMap) | |
// Check for cancellation through context. | |
select { | |
case <-ctx.Done(): | |
return ctx.Err() | |
default: | |
} | |
return nil | |
}) | |
if err != nil { | |
fmt.Printf("Error walking the directory: %v\n", err) | |
} | |
}() | |
// Print results concurrently. | |
var printWg sync.WaitGroup | |
printWg.Add(1) | |
go func() { | |
defer printWg.Done() | |
fmt.Println("\n=== Keyword Search Results ===") | |
for r := range resultCh { | |
if r.errMsg != "" { | |
fmt.Printf("❌ Error processing file for '%s': %s\n", r.tableName, r.errMsg) | |
continue | |
} | |
if r.success { | |
atomic.AddInt64(&filesContainingKeywords, 1) | |
if err := csvWriter.Write([]string{r.tableName, strings.TrimSuffix(r.operation, "("), r.keywordsList, r.workflowName}); err != nil { | |
fmt.Printf("Error writing to CSV: %v\n", err) | |
} | |
} | |
} | |
}() | |
// Wait for workers to finish then close the results channel. | |
workersWg.Wait() | |
close(resultCh) | |
printWg.Wait() | |
// Final summary. | |
fmt.Printf("\n=== Summary ===\n") | |
fmt.Printf("Total files checked: %d\n", totalFiles) | |
fmt.Printf("Files containing all keywords: %d\n", filesContainingKeywords) | |
fmt.Println("==============================") | |
fmt.Println("Results have been saved to results.csv") | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment