Created
April 23, 2023 15:30
-
-
Save wingedpig/c1cf3de88cb1c251b92fff399b8d2a8c to your computer and use it in GitHub Desktop.
A Go program to parse ChatGPT export files
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"encoding/json" | |
"flag" | |
"fmt" | |
"io/ioutil" | |
"log" | |
"os" | |
"sort" | |
"strconv" | |
"strings" | |
"time" | |
) | |
// Time defines a timestamp encoded as epoch seconds in JSON | |
type Time time.Time | |
// MarshalJSON is used to convert the timestamp to JSON | |
func (t Time) MarshalJSON() ([]byte, error) { | |
return []byte(strconv.FormatInt(time.Time(t).Unix(), 10)), nil | |
} | |
// UnmarshalJSON is used to convert the timestamp from JSON | |
func (t *Time) UnmarshalJSON(s []byte) (err error) { | |
r := string(s) | |
q, err := strconv.ParseInt(strings.Split(r, ".")[0], 10, 64) | |
if err != nil { | |
return err | |
} | |
*(*time.Time)(t) = time.Unix(q, 0) | |
return nil | |
} | |
func (t Time) Unix() int64 { | |
return time.Time(t).Unix() | |
} | |
// Time returns the JSON time as a time.Time instance in UTC | |
func (t Time) Time() time.Time { | |
return time.Time(t).UTC() | |
} | |
func (t Time) String() string { | |
return t.Time().String() | |
} | |
type Chunk struct { | |
ID string `json:"id"` | |
Message struct { | |
ID string `json:"id"` | |
Author struct { | |
Role string `json:"role"` | |
} `json:"author"` | |
Created Time `json:"create_time"` | |
Content struct { | |
Type string `json:"content_type"` | |
Parts []string `json:"parts"` | |
} `json:"content"` | |
} `json:"message"` | |
} | |
type Convo struct { | |
Title string `json:"title"` | |
Created Time `json:"create_time"` | |
Updated Time `json:"update_time"` | |
Chunks map[string]Chunk `json:"mapping"` | |
} | |
func main() { | |
flag.Parse() | |
configPtr := flag.Args() | |
l := len(configPtr) | |
if l < 2 { | |
fmt.Printf("Usage: chatgptparseexport conversations.json outdir\n") | |
os.Exit(1) | |
} | |
in := configPtr[0] | |
outdir := strings.TrimSuffix(configPtr[1], "/") | |
content, err := ioutil.ReadFile(in) | |
if err != nil { | |
log.Fatal("Error when opening file: ", err) | |
} | |
var convos []Convo | |
err = json.Unmarshal(content, &convos) | |
if err != nil { | |
log.Fatal("Error during Unmarshal(): ", err) | |
} | |
fmt.Printf("Read %d conversations\n", len(convos)) | |
for _, convo := range convos { | |
// sort the chunks by time | |
var chunks []Chunk | |
for _, chunk := range convo.Chunks { | |
chunks = append(chunks, chunk) | |
} | |
sort.Slice(chunks, func(i, j int) bool { | |
return chunks[i].Message.Created.Unix() < chunks[j].Message.Created.Unix() | |
}) | |
// now write the chunks to a file | |
title := strings.TrimSuffix(convo.Title, ".") | |
filename := fmt.Sprintf("%s/%s.md", outdir, strings.ReplaceAll(title, "/", "-")) | |
fmt.Printf("Generating %s\n", filename) | |
f, err := os.Create(fmt.Sprintf("%s/%s.md", outdir, title)) | |
if err != nil { | |
log.Fatal("Error when writing file: ", err) | |
} | |
f.WriteString(fmt.Sprintf("Created: %s\n", convo.Created.Time().Format("01-02-2006"))) | |
f.WriteString(fmt.Sprintf("Updated: %s\n", convo.Updated.Time().Format("01-02-2006"))) | |
f.WriteString("\n") | |
for _, chunk := range chunks { | |
for _, part := range chunk.Message.Content.Parts { | |
if part == "" { | |
continue | |
} | |
if chunk.Message.Author.Role == "user" { | |
f.WriteString("**User:** ") | |
} else { | |
f.WriteString("**ChatGPT:** ") | |
} | |
// ensure all code blocks are closed, because if you interrupt ChatGPT when it's writing out code, | |
// the code block will not be closed | |
num := strings.Count(part, "```") | |
if num%2 != 0 { | |
part = part + "```" | |
} | |
_, err := f.WriteString(part) | |
if err != nil { | |
log.Fatal("Error when writing file: ", err) | |
} | |
f.WriteString("\n\n") | |
} | |
} | |
f.Close() | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment