Skip to content

Instantly share code, notes, and snippets.

@Quorafind
Created October 28, 2024 01:19
Show Gist options
  • Save Quorafind/192d9658aadffbdc779c1f01226ce500 to your computer and use it in GitHub Desktop.
Save Quorafind/192d9658aadffbdc779c1f01226ce500 to your computer and use it in GitHub Desktop.
Markdown To CSV
package main
import (
"bufio"
"encoding/csv"
"fmt"
"io"
"net/http"
"os"
"path/filepath"
"regexp"
"sort"
"strings"
)
type Section struct {
Title string // 章节标题
Content string // 章节内容
File string // 来源文件名
}
func main() {
// 步骤1: 获取当前目录下的所有文件夹
folders := listFolders(".")
if len(folders) == 0 {
fmt.Println("当前目录下没有文件夹")
return
}
// 步骤2: 让用户选择要处理的文件夹
fmt.Println("请选择要处理的文件夹:")
for i, folder := range folders {
fmt.Printf("[%d] %s\n", i+1, folder)
}
reader := bufio.NewReader(os.Stdin)
// 循环直到用户输入有效的选择或直接回车
var choice int
for {
fmt.Print("请输入数字选择文件夹 (直接回车选择第一个): ")
input, err := reader.ReadString('\n')
if err != nil {
fmt.Println("读取输入错误,请重试")
continue
}
input = strings.TrimSpace(input)
if input == "" {
choice = 1
break
}
if _, err := fmt.Sscanf(input, "%d", &choice); err != nil {
fmt.Println("请输入有效的数字")
continue
}
if choice < 1 || choice > len(folders) {
fmt.Println("无效的选择,请输入 1 到", len(folders), "之间的数字")
continue
}
break
}
selectedFolder := folders[choice-1]
fmt.Printf("已选择文件夹: %s\n", selectedFolder)
// 步骤3: 读取选定文件夹中的所有MD文件
mdFiles, err := filepath.Glob(filepath.Join(selectedFolder, "*.md"))
if err != nil {
fmt.Printf("读取MD文件错误: %v\n", err)
return
}
if len(mdFiles) == 0 {
fmt.Println("所选文件夹中没有MD文件")
return
}
// 对文件名进行排序,确保顺序一致
sort.Strings(mdFiles)
// 步骤4: 解析所有MD文件并收集所有的一级标题
var allSections []Section
titleOrder := []string{}
titleSet := make(map[string]bool)
for _, file := range mdFiles {
sections := parseMarkdown(file)
for i := range sections {
sections[i].File = filepath.Base(file)
}
allSections = append(allSections, sections...)
// 只使用第一个文件的标题顺序
if len(titleOrder) == 0 {
for _, section := range sections {
if !titleSet[section.Title] {
titleOrder = append(titleOrder, section.Title)
titleSet[section.Title] = true
}
}
}
}
// 步骤5: 显示所有可用的标题供用户选择
fmt.Println("\n可用的标题:")
for i, title := range titleOrder {
fmt.Printf("[%d] %s\n", i+1, title)
}
// 步骤6: 用户多选标题
fmt.Print("\n请输入要包含的标题编号(用空格分隔,如: 1 3 4,输入 -1 选择除1以外的全部,直接回车选择全部): ")
var selectedNumbers []int
for {
input, err := reader.ReadString('\n')
if err != nil {
fmt.Println("读取输入错误,请重试")
continue
}
input = strings.TrimSpace(input)
if input == "" {
// 如果用户直接回车,选择全部标题
for i := range titleOrder {
selectedNumbers = append(selectedNumbers, i)
}
break
}
numberStrs := strings.Fields(input)
if len(numberStrs) == 0 {
fmt.Println("请至少选择一个标题")
continue
}
// 验证所有输入都是有效的数字
valid := true
for _, numStr := range numberStrs {
var num int
if _, err := fmt.Sscanf(numStr, "%d", &num); err != nil {
fmt.Printf("无效的输入 '%s',请输入有效的数字\n", numStr)
valid = false
break
}
if num == -1 {
// 选择除1以外的全部
for i := 1; i < len(titleOrder); i++ {
selectedNumbers = append(selectedNumbers, i)
}
valid = true
break
}
if num < 1 || num > len(titleOrder) {
fmt.Printf("无效的输入 '%d',请输入 1 到 %d 之间的数字\n", num, len(titleOrder))
valid = false
break
}
selectedNumbers = append(selectedNumbers, num-1)
}
if valid {
break
}
selectedNumbers = []int{} // 重置选择
}
// 步骤7: 创建选中标题的列表,保持用户输入的顺序
var selectedTitles []string
for _, num := range selectedNumbers {
selectedTitles = append(selectedTitles, titleOrder[num])
}
fmt.Printf("已选择 %d 个标题\n", len(selectedTitles))
// 步骤8: 询问用户是否下载图片
var downloadImages bool
fmt.Print("是否下载图片?(y/n,默认为n): ")
answer, _ := reader.ReadString('\n')
answer = strings.TrimSpace(strings.ToLower(answer))
downloadImages = answer == "yes" || answer == "y"
var imageColumn int
if downloadImages {
// 步骤9: 如果下载图片,让用户选择图片列
fmt.Println("\n请选择包含图片的列:")
for i, title := range selectedTitles {
fmt.Printf("[%d] %s\n", i+1, title)
}
for {
fmt.Print("请输入数字选择图片列: ")
input, err := reader.ReadString('\n')
if err != nil {
fmt.Println("读取输入错误,请重试")
continue
}
input = strings.TrimSpace(input)
if _, err := fmt.Sscanf(input, "%d", &imageColumn); err != nil {
fmt.Println("请输入有效的数字")
continue
}
if imageColumn < 1 || imageColumn > len(selectedTitles) {
fmt.Printf("无效的选择,请输入 1 到 %d 之间的数字\n", len(selectedTitles))
continue
}
imageColumn-- // 转换为索引
break
}
}
// 步骤10: 组织数据:按文件分组
fileGroups := make(map[string][]string) // 键为文件名,值为内容切片
files := make([]string, 0) // 保持文件顺序
seenFiles := make(map[string]bool)
// 收集所有唯一的文件名,保持它们第一次出现的顺序
for _, section := range allSections {
if !seenFiles[section.File] {
files = append(files, section.File)
seenFiles[section.File] = true
}
}
imageCounter := 1
// 为每个文件准备一行数据
for _, file := range files {
fileGroups[file] = make([]string, len(selectedTitles))
// 查找该文件中每个选中标题的内容
for i, title := range selectedTitles {
for _, section := range allSections {
if section.File == file && section.Title == title {
content := section.Content
if downloadImages && i == imageColumn {
content = downloadAndReplaceImages(content, selectedFolder, &imageCounter)
}
// 替换CSV不支持的字符
content = strings.ReplaceAll(content, ",", ",")
fileGroups[file][i] = content
break
}
}
}
}
// 步骤11: 生成CSV文件
outputFile := "output.csv"
file, err := os.Create(outputFile)
if err != nil {
fmt.Printf("创建CSV文件错误: %v\n", err)
return
}
defer file.Close()
writer := csv.NewWriter(file)
defer writer.Flush()
// 写入CSV表头(选中的标题)
if err := writer.Write(selectedTitles); err != nil {
fmt.Printf("写入CSV表头错误: %v\n", err)
return
}
// 按文件顺序写入内容
for _, fileName := range files {
if err := writer.Write(fileGroups[fileName]); err != nil {
fmt.Printf("写入CSV内容错误: %v\n", err)
return
}
}
fmt.Printf("\nCSV文件已生成: %s\n", outputFile)
}
// listFolders 函数返回指定路径下的所有文件夹名称
func listFolders(path string) []string {
var folders []string
entries, err := os.ReadDir(path)
if err != nil {
return folders
}
for _, entry := range entries {
if entry.IsDir() {
folders = append(folders, entry.Name())
}
}
return folders
}
// parseMarkdown 函数解析Markdown文件,返回Section切片
func parseMarkdown(filePath string) []Section {
content, err := os.ReadFile(filePath)
if err != nil {
return nil
}
var sections []Section
var currentSection *Section
lines := strings.Split(string(content), "\n")
for _, line := range lines {
trimmedLine := strings.TrimSpace(line)
if strings.HasPrefix(trimmedLine, "# ") {
// 如果已经有正在处理的部分,保存它
if currentSection != nil {
currentSection.Content = strings.TrimSpace(currentSection.Content)
sections = append(sections, *currentSection)
}
// 开始新的部分
currentSection = &Section{
Title: strings.TrimPrefix(trimmedLine, "# "),
Content: "",
}
} else if currentSection != nil {
if currentSection.Content != "" {
currentSection.Content += " "
}
currentSection.Content += strings.TrimSpace(line)
}
}
// 保存最后一个部分
if currentSection != nil {
currentSection.Content = strings.TrimSpace(currentSection.Content)
sections = append(sections, *currentSection)
}
return sections
}
// downloadAndReplaceImages 函数下载图片并替换内容中的图片链接
func downloadAndReplaceImages(content string, folderName string, counter *int) string {
imageURLs := extractImageURLs(content)
for _, url := range imageURLs {
localFileName := fmt.Sprintf("%s-%d.png", folderName, *counter)
err := downloadImage(url, localFileName)
if err != nil {
fmt.Printf("下载图片失败: %v\n", err)
continue
}
content = strings.Replace(content, url, localFileName, 1)
*counter++
}
return content
}
// extractImageURLs 函数从内容中提取图片URL
func extractImageURLs(content string) []string {
var urls []string
// 使用正则表达式匹配URL和常见图片格式
urlRegex := regexp.MustCompile(`https?://[^\s]+`)
matches := urlRegex.FindAllString(content, -1)
for _, match := range matches {
// 检查URL中是否包含常见图片格式,不限于后缀
if strings.Contains(match, ".png") || strings.Contains(match, ".jpg") || strings.Contains(match, ".jpeg") || strings.Contains(match, ".gif") {
urls = append(urls, match)
}
}
return urls
}
// downloadImage 函数下载图片并保存到本地
func downloadImage(url string, fileName string) error {
response, err := http.Get(url)
if err != nil {
return err
}
defer response.Body.Close()
file, err := os.Create(fileName)
if err != nil {
return err
}
defer file.Close()
_, err = io.Copy(file, response.Body)
return err
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment