Created
March 5, 2025 10:31
-
-
Save IAmSurajBobade/6c47e5e648bd4ae70ebde71037c802fd to your computer and use it in GitHub Desktop.
aerospike-data-verify-csv
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"encoding/csv" | |
"encoding/json" | |
"fmt" | |
"os" | |
"strings" | |
) | |
type Record map[string]interface{} | |
type DumpData struct { | |
Records []Record | |
Status []map[string]int | |
} | |
func main() { | |
// File paths (replace with your actual file paths) | |
dumpFilePath := "aerospike_dump.json" | |
csvFilePath := "test_data.csv" | |
// Read Aerospike dump | |
dumpRecords, err := readAerospikeDump(dumpFilePath) | |
if err != nil { | |
fmt.Printf("Error reading dump file: %v\n", err) | |
return | |
} | |
// Read CSV | |
csvRecords, err := readCSV(csvFilePath) | |
if err != nil { | |
fmt.Printf("Error reading CSV file: %v\n", err) | |
return | |
} | |
// Validate records | |
validateRecords(dumpRecords, csvRecords) | |
} | |
// readAerospikeDump reads the Aerospike JSON dump file and returns a slice of Records | |
func readAerospikeDump(filePath string) ([]Record, error) { | |
file, err := os.Open(filePath) | |
if err != nil { | |
return nil, err | |
} | |
defer file.Close() | |
// Read the entire file content | |
data, err := os.ReadFile(filePath) | |
if err != nil { | |
return nil, err | |
} | |
// Clean up the file content to extract just the JSON array | |
jsonStr := extractJSON(string(data)) | |
if jsonStr == "" { | |
return nil, fmt.Errorf("no valid JSON found in dump file") | |
} | |
// Parse JSON | |
var dumpData [][]Record | |
err = json.Unmarshal([]byte(jsonStr), &dumpData) | |
if err != nil { | |
return nil, fmt.Errorf("error parsing JSON: %v", err) | |
} | |
// Check if we have at least the records array | |
if len(dumpData) < 1 { | |
return nil, fmt.Errorf("no records found in JSON") | |
} | |
return dumpData[0], nil | |
} | |
// extractJSON extracts the JSON array portion from the dump file | |
func extractJSON(content string) string { | |
start := strings.Index(content, "[") | |
end := strings.LastIndex(content, "]") | |
if start == -1 || end == -1 || start >= end { | |
return "" | |
} | |
return content[start : end+1] | |
} | |
// readCSV reads the CSV file and returns a slice of Records | |
func readCSV(filePath string) ([]Record, error) { | |
file, err := os.Open(filePath) | |
if err != nil { | |
return nil, err | |
} | |
defer file.Close() | |
reader := csv.NewReader(file) | |
headers, err := reader.Read() | |
if err != nil { | |
return nil, err | |
} | |
var records []Record | |
for { | |
row, err := reader.Read() | |
if err != nil { | |
if err.Error() == "EOF" { | |
break | |
} | |
return nil, err | |
} | |
record := make(Record) | |
for i, value := range row { | |
if i < len(headers) { | |
// Attempt to parse boolean values | |
if value == "true" || value == "false" { | |
record[headers[i]] = value == "true" | |
} else { | |
record[headers[i]] = value | |
} | |
} | |
} | |
records = append(records, record) | |
} | |
return records, nil | |
} | |
// validateRecords compares Aerospike dump records with CSV records | |
func validateRecords(dumpRecords, csvRecords []Record) { | |
if len(dumpRecords) != len(csvRecords) { | |
fmt.Printf("Record count mismatch: Dump has %d, CSV has %d\n", len(dumpRecords), len(csvRecords)) | |
return | |
} | |
for i := range dumpRecords { | |
fmt.Printf("Validating record %d:\n", i+1) | |
for key, dumpValue := range dumpRecords[i] { | |
csvValue, exists := csvRecords[i][key] | |
if !exists { | |
fmt.Printf(" Field '%s' missing in CSV\n", key) | |
continue | |
} | |
// Convert to string for comparison if needed | |
if fmt.Sprintf("%v", dumpValue) != fmt.Sprintf("%v", csvValue) { | |
fmt.Printf(" Field '%s' mismatch: Dump='%v' (%T), CSV='%v' (%T)\n", | |
key, dumpValue, dumpValue, csvValue, csvValue) | |
} | |
} | |
for key := range csvRecords[i] { | |
if _, exists := dumpRecords[i][key]; !exists { | |
fmt.Printf(" Field '%s' missing in Dump\n", key) | |
} | |
} | |
} | |
fmt.Println("Validation complete") | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment