Created
October 28, 2024 01:19
-
-
Save Quorafind/192d9658aadffbdc779c1f01226ce500 to your computer and use it in GitHub Desktop.
Markdown To CSV
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"bufio" | |
"encoding/csv" | |
"fmt" | |
"io" | |
"net/http" | |
"os" | |
"path/filepath" | |
"regexp" | |
"sort" | |
"strings" | |
) | |
type Section struct { | |
Title string // 章节标题 | |
Content string // 章节内容 | |
File string // 来源文件名 | |
} | |
func main() { | |
// 步骤1: 获取当前目录下的所有文件夹 | |
folders := listFolders(".") | |
if len(folders) == 0 { | |
fmt.Println("当前目录下没有文件夹") | |
return | |
} | |
// 步骤2: 让用户选择要处理的文件夹 | |
fmt.Println("请选择要处理的文件夹:") | |
for i, folder := range folders { | |
fmt.Printf("[%d] %s\n", i+1, folder) | |
} | |
reader := bufio.NewReader(os.Stdin) | |
// 循环直到用户输入有效的选择或直接回车 | |
var choice int | |
for { | |
fmt.Print("请输入数字选择文件夹 (直接回车选择第一个): ") | |
input, err := reader.ReadString('\n') | |
if err != nil { | |
fmt.Println("读取输入错误,请重试") | |
continue | |
} | |
input = strings.TrimSpace(input) | |
if input == "" { | |
choice = 1 | |
break | |
} | |
if _, err := fmt.Sscanf(input, "%d", &choice); err != nil { | |
fmt.Println("请输入有效的数字") | |
continue | |
} | |
if choice < 1 || choice > len(folders) { | |
fmt.Println("无效的选择,请输入 1 到", len(folders), "之间的数字") | |
continue | |
} | |
break | |
} | |
selectedFolder := folders[choice-1] | |
fmt.Printf("已选择文件夹: %s\n", selectedFolder) | |
// 步骤3: 读取选定文件夹中的所有MD文件 | |
mdFiles, err := filepath.Glob(filepath.Join(selectedFolder, "*.md")) | |
if err != nil { | |
fmt.Printf("读取MD文件错误: %v\n", err) | |
return | |
} | |
if len(mdFiles) == 0 { | |
fmt.Println("所选文件夹中没有MD文件") | |
return | |
} | |
// 对文件名进行排序,确保顺序一致 | |
sort.Strings(mdFiles) | |
// 步骤4: 解析所有MD文件并收集所有的一级标题 | |
var allSections []Section | |
titleOrder := []string{} | |
titleSet := make(map[string]bool) | |
for _, file := range mdFiles { | |
sections := parseMarkdown(file) | |
for i := range sections { | |
sections[i].File = filepath.Base(file) | |
} | |
allSections = append(allSections, sections...) | |
// 只使用第一个文件的标题顺序 | |
if len(titleOrder) == 0 { | |
for _, section := range sections { | |
if !titleSet[section.Title] { | |
titleOrder = append(titleOrder, section.Title) | |
titleSet[section.Title] = true | |
} | |
} | |
} | |
} | |
// 步骤5: 显示所有可用的标题供用户选择 | |
fmt.Println("\n可用的标题:") | |
for i, title := range titleOrder { | |
fmt.Printf("[%d] %s\n", i+1, title) | |
} | |
// 步骤6: 用户多选标题 | |
fmt.Print("\n请输入要包含的标题编号(用空格分隔,如: 1 3 4,输入 -1 选择除1以外的全部,直接回车选择全部): ") | |
var selectedNumbers []int | |
for { | |
input, err := reader.ReadString('\n') | |
if err != nil { | |
fmt.Println("读取输入错误,请重试") | |
continue | |
} | |
input = strings.TrimSpace(input) | |
if input == "" { | |
// 如果用户直接回车,选择全部标题 | |
for i := range titleOrder { | |
selectedNumbers = append(selectedNumbers, i) | |
} | |
break | |
} | |
numberStrs := strings.Fields(input) | |
if len(numberStrs) == 0 { | |
fmt.Println("请至少选择一个标题") | |
continue | |
} | |
// 验证所有输入都是有效的数字 | |
valid := true | |
for _, numStr := range numberStrs { | |
var num int | |
if _, err := fmt.Sscanf(numStr, "%d", &num); err != nil { | |
fmt.Printf("无效的输入 '%s',请输入有效的数字\n", numStr) | |
valid = false | |
break | |
} | |
if num == -1 { | |
// 选择除1以外的全部 | |
for i := 1; i < len(titleOrder); i++ { | |
selectedNumbers = append(selectedNumbers, i) | |
} | |
valid = true | |
break | |
} | |
if num < 1 || num > len(titleOrder) { | |
fmt.Printf("无效的输入 '%d',请输入 1 到 %d 之间的数字\n", num, len(titleOrder)) | |
valid = false | |
break | |
} | |
selectedNumbers = append(selectedNumbers, num-1) | |
} | |
if valid { | |
break | |
} | |
selectedNumbers = []int{} // 重置选择 | |
} | |
// 步骤7: 创建选中标题的列表,保持用户输入的顺序 | |
var selectedTitles []string | |
for _, num := range selectedNumbers { | |
selectedTitles = append(selectedTitles, titleOrder[num]) | |
} | |
fmt.Printf("已选择 %d 个标题\n", len(selectedTitles)) | |
// 步骤8: 询问用户是否下载图片 | |
var downloadImages bool | |
fmt.Print("是否下载图片?(y/n,默认为n): ") | |
answer, _ := reader.ReadString('\n') | |
answer = strings.TrimSpace(strings.ToLower(answer)) | |
downloadImages = answer == "yes" || answer == "y" | |
var imageColumn int | |
if downloadImages { | |
// 步骤9: 如果下载图片,让用户选择图片列 | |
fmt.Println("\n请选择包含图片的列:") | |
for i, title := range selectedTitles { | |
fmt.Printf("[%d] %s\n", i+1, title) | |
} | |
for { | |
fmt.Print("请输入数字选择图片列: ") | |
input, err := reader.ReadString('\n') | |
if err != nil { | |
fmt.Println("读取输入错误,请重试") | |
continue | |
} | |
input = strings.TrimSpace(input) | |
if _, err := fmt.Sscanf(input, "%d", &imageColumn); err != nil { | |
fmt.Println("请输入有效的数字") | |
continue | |
} | |
if imageColumn < 1 || imageColumn > len(selectedTitles) { | |
fmt.Printf("无效的选择,请输入 1 到 %d 之间的数字\n", len(selectedTitles)) | |
continue | |
} | |
imageColumn-- // 转换为索引 | |
break | |
} | |
} | |
// 步骤10: 组织数据:按文件分组 | |
fileGroups := make(map[string][]string) // 键为文件名,值为内容切片 | |
files := make([]string, 0) // 保持文件顺序 | |
seenFiles := make(map[string]bool) | |
// 收集所有唯一的文件名,保持它们第一次出现的顺序 | |
for _, section := range allSections { | |
if !seenFiles[section.File] { | |
files = append(files, section.File) | |
seenFiles[section.File] = true | |
} | |
} | |
imageCounter := 1 | |
// 为每个文件准备一行数据 | |
for _, file := range files { | |
fileGroups[file] = make([]string, len(selectedTitles)) | |
// 查找该文件中每个选中标题的内容 | |
for i, title := range selectedTitles { | |
for _, section := range allSections { | |
if section.File == file && section.Title == title { | |
content := section.Content | |
if downloadImages && i == imageColumn { | |
content = downloadAndReplaceImages(content, selectedFolder, &imageCounter) | |
} | |
// 替换CSV不支持的字符 | |
content = strings.ReplaceAll(content, ",", ",") | |
fileGroups[file][i] = content | |
break | |
} | |
} | |
} | |
} | |
// 步骤11: 生成CSV文件 | |
outputFile := "output.csv" | |
file, err := os.Create(outputFile) | |
if err != nil { | |
fmt.Printf("创建CSV文件错误: %v\n", err) | |
return | |
} | |
defer file.Close() | |
writer := csv.NewWriter(file) | |
defer writer.Flush() | |
// 写入CSV表头(选中的标题) | |
if err := writer.Write(selectedTitles); err != nil { | |
fmt.Printf("写入CSV表头错误: %v\n", err) | |
return | |
} | |
// 按文件顺序写入内容 | |
for _, fileName := range files { | |
if err := writer.Write(fileGroups[fileName]); err != nil { | |
fmt.Printf("写入CSV内容错误: %v\n", err) | |
return | |
} | |
} | |
fmt.Printf("\nCSV文件已生成: %s\n", outputFile) | |
} | |
// listFolders 函数返回指定路径下的所有文件夹名称 | |
func listFolders(path string) []string { | |
var folders []string | |
entries, err := os.ReadDir(path) | |
if err != nil { | |
return folders | |
} | |
for _, entry := range entries { | |
if entry.IsDir() { | |
folders = append(folders, entry.Name()) | |
} | |
} | |
return folders | |
} | |
// parseMarkdown 函数解析Markdown文件,返回Section切片 | |
func parseMarkdown(filePath string) []Section { | |
content, err := os.ReadFile(filePath) | |
if err != nil { | |
return nil | |
} | |
var sections []Section | |
var currentSection *Section | |
lines := strings.Split(string(content), "\n") | |
for _, line := range lines { | |
trimmedLine := strings.TrimSpace(line) | |
if strings.HasPrefix(trimmedLine, "# ") { | |
// 如果已经有正在处理的部分,保存它 | |
if currentSection != nil { | |
currentSection.Content = strings.TrimSpace(currentSection.Content) | |
sections = append(sections, *currentSection) | |
} | |
// 开始新的部分 | |
currentSection = &Section{ | |
Title: strings.TrimPrefix(trimmedLine, "# "), | |
Content: "", | |
} | |
} else if currentSection != nil { | |
if currentSection.Content != "" { | |
currentSection.Content += " " | |
} | |
currentSection.Content += strings.TrimSpace(line) | |
} | |
} | |
// 保存最后一个部分 | |
if currentSection != nil { | |
currentSection.Content = strings.TrimSpace(currentSection.Content) | |
sections = append(sections, *currentSection) | |
} | |
return sections | |
} | |
// downloadAndReplaceImages 函数下载图片并替换内容中的图片链接 | |
func downloadAndReplaceImages(content string, folderName string, counter *int) string { | |
imageURLs := extractImageURLs(content) | |
for _, url := range imageURLs { | |
localFileName := fmt.Sprintf("%s-%d.png", folderName, *counter) | |
err := downloadImage(url, localFileName) | |
if err != nil { | |
fmt.Printf("下载图片失败: %v\n", err) | |
continue | |
} | |
content = strings.Replace(content, url, localFileName, 1) | |
*counter++ | |
} | |
return content | |
} | |
// extractImageURLs 函数从内容中提取图片URL | |
func extractImageURLs(content string) []string { | |
var urls []string | |
// 使用正则表达式匹配URL和常见图片格式 | |
urlRegex := regexp.MustCompile(`https?://[^\s]+`) | |
matches := urlRegex.FindAllString(content, -1) | |
for _, match := range matches { | |
// 检查URL中是否包含常见图片格式,不限于后缀 | |
if strings.Contains(match, ".png") || strings.Contains(match, ".jpg") || strings.Contains(match, ".jpeg") || strings.Contains(match, ".gif") { | |
urls = append(urls, match) | |
} | |
} | |
return urls | |
} | |
// downloadImage 函数下载图片并保存到本地 | |
func downloadImage(url string, fileName string) error { | |
response, err := http.Get(url) | |
if err != nil { | |
return err | |
} | |
defer response.Body.Close() | |
file, err := os.Create(fileName) | |
if err != nil { | |
return err | |
} | |
defer file.Close() | |
_, err = io.Copy(file, response.Body) | |
return err | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment