Skip to content

Instantly share code, notes, and snippets.

@bouroo
Last active April 10, 2025 09:39
Show Gist options
  • Save bouroo/ae9ad5ca25a1eb8f998e57543b163155 to your computer and use it in GitHub Desktop.
Save bouroo/ae9ad5ca25a1eb8f998e57543b163155 to your computer and use it in GitHub Desktop.
Find files that contains all keywords
package main
import (
"bufio"
"context"
"encoding/csv"
"fmt"
"os"
"path/filepath"
"runtime"
"strings"
"sync"
"sync/atomic"
)
// Job represents a task to check a file with a given keyword set.
type Job struct {
filePath string
tableName string
operation string
keywords map[string]bool
}
// Result represents the outcome of processing a file.
type Result struct {
success bool // true if all keywords were found
tableName string
operation string
keywordsList string
workflowName string
errMsg string
}
// checkFileContainsKeywords checks if the file contains all the specified keywords.
func checkFileContainsKeywords(filePath string, keywords map[string]bool) (bool, error) {
file, err := os.Open(filePath)
if err != nil {
return false, err
}
defer file.Close()
// Clone the keywords map to track the remaining ones.
remaining := make(map[string]bool, len(keywords))
for k, v := range keywords {
remaining[k] = v
}
scanner := bufio.NewScanner(file)
buf := make([]byte, 64*1024)
scanner.Buffer(buf, 10*1024*1024)
for scanner.Scan() {
line := scanner.Text()
// Remove any found keywords from the remaining set.
for k := range remaining {
if strings.Contains(line, k) {
delete(remaining, k)
}
}
// Early exit if no keywords remain.
if len(remaining) == 0 {
return true, nil
}
}
if err := scanner.Err(); err != nil {
return false, err
}
return len(remaining) == 0, nil
}
// worker processes jobs from jobCh and sends results to resultCh.
func worker(jobCh <-chan Job, resultCh chan<- Result, wg *sync.WaitGroup) {
defer wg.Done()
for job := range jobCh {
contains, err := checkFileContainsKeywords(job.filePath, job.keywords)
var wfName string
// Derive workflow name if filePath starts with "workflows/"
if strings.HasPrefix(job.filePath, "workflows/") {
parts := strings.Split(job.filePath, "/")
if len(parts) > 1 {
wfName = parts[1]
}
}
// Build comma-separated keywords string.
var sb strings.Builder
for k := range job.keywords {
if sb.Len() > 0 {
sb.WriteString(", ")
}
sb.WriteString(strings.TrimSuffix(k, "("))
}
res := Result{
tableName: job.tableName,
operation: job.operation,
keywordsList: sb.String(),
workflowName: wfName,
}
if err != nil {
res.errMsg = err.Error()
} else {
res.success = contains
}
resultCh <- res
}
}
// addJob creates a Job with the given operation and enqueues it.
func addJob(jobCh chan<- Job, filePath, tableName, operation string, baseKeywords map[string]bool) {
// Copy the base keyword map
keywords := make(map[string]bool, len(baseKeywords))
for k, v := range baseKeywords {
keywords[k] = v
}
// Append the operation-specific keyword (with an opening parenthesis)
keywords[operation+"("] = true
jobCh <- Job{
filePath: filePath,
tableName: tableName,
operation: operation,
keywords: keywords,
}
}
func main() {
// Setup reader for interactive input.
reader := bufio.NewReader(os.Stdin)
fmt.Print("Enter the directory path: ")
dirPath, err := reader.ReadString('\n')
if err != nil {
fmt.Printf("Error reading directory path: %v\n", err)
return
}
dirPath = strings.TrimSpace(dirPath)
fmt.Print("Enter table name: ")
tableName, err := reader.ReadString('\n')
if err != nil {
fmt.Printf("Error reading table name: %v\n", err)
return
}
tableName = strings.TrimSpace(tableName)
// Define operations based on the table name.
insertOperation := "insert_ngl_" + tableName
updateOperation := "update_ngl_" + tableName
fmt.Print("Enter keywords separated by commas: ")
keywordsInput, err := reader.ReadString('\n')
if err != nil {
fmt.Printf("Error reading keywords: %v\n", err)
return
}
keywordsInput = strings.TrimSpace(keywordsInput)
keywordsSlice := strings.Split(keywordsInput, ",")
// Build the base keyword map.
baseKeywordMap := make(map[string]bool, len(keywordsSlice))
for _, kw := range keywordsSlice {
trimmed := strings.TrimSpace(kw)
if trimmed != "" {
baseKeywordMap[trimmed] = true
}
}
// Open (or create) the CSV output file.
outputFile, err := os.OpenFile("results.csv", os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0644)
if err != nil {
fmt.Printf("Error opening/creating CSV file: %v\n", err)
return
}
defer outputFile.Close()
csvWriter := csv.NewWriter(outputFile)
defer csvWriter.Flush()
// Write header if the file is empty.
if stat, err := outputFile.Stat(); err == nil && stat.Size() == 0 {
if err := csvWriter.Write([]string{"Table name", "Operation", "Keywords", "Workflow name"}); err != nil {
fmt.Printf("Error writing header to CSV: %v\n", err)
return
}
} else if err != nil {
fmt.Printf("Error stating CSV file: %v\n", err)
return
}
// Setup channels.
jobCh := make(chan Job, 100)
resultCh := make(chan Result, 100)
// Start worker goroutines.
var workersWg sync.WaitGroup
maxWorkers := runtime.NumCPU()
for i := 0; i < maxWorkers; i++ {
workersWg.Add(1)
go worker(jobCh, resultCh, &workersWg)
}
// Counters.
var totalFiles int64
var filesContainingKeywords int64
// Context for directory walk (could be extended for cancellation handling)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
// Walk the directory and enqueue jobs.
go func() {
defer close(jobCh)
err := filepath.WalkDir(dirPath, func(path string, d os.DirEntry, err error) error {
if err != nil {
// Report error for this path but continue walking.
fmt.Printf("Error accessing path %s: %v\n", path, err)
return nil
}
// Skip directories.
if d.IsDir() {
return nil
}
atomic.AddInt64(&totalFiles, 1)
// Enqueue jobs for both operations.
addJob(jobCh, path, tableName, insertOperation, baseKeywordMap)
addJob(jobCh, path, tableName, updateOperation, baseKeywordMap)
// Check for cancellation through context.
select {
case <-ctx.Done():
return ctx.Err()
default:
}
return nil
})
if err != nil {
fmt.Printf("Error walking the directory: %v\n", err)
}
}()
// Print results concurrently.
var printWg sync.WaitGroup
printWg.Add(1)
go func() {
defer printWg.Done()
fmt.Println("\n=== Keyword Search Results ===")
for r := range resultCh {
if r.errMsg != "" {
fmt.Printf("❌ Error processing file for '%s': %s\n", r.tableName, r.errMsg)
continue
}
if r.success {
atomic.AddInt64(&filesContainingKeywords, 1)
if err := csvWriter.Write([]string{r.tableName, strings.TrimSuffix(r.operation, "("), r.keywordsList, r.workflowName}); err != nil {
fmt.Printf("Error writing to CSV: %v\n", err)
}
}
}
}()
// Wait for workers to finish then close the results channel.
workersWg.Wait()
close(resultCh)
printWg.Wait()
// Final summary.
fmt.Printf("\n=== Summary ===\n")
fmt.Printf("Total files checked: %d\n", totalFiles)
fmt.Printf("Files containing all keywords: %d\n", filesContainingKeywords)
fmt.Println("==============================")
fmt.Println("Results have been saved to results.csv")
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment