Created
April 26, 2023 01:35
-
-
Save Vbitz/fbd79216fa413e77eb29c0731401ff3e to your computer and use it in GitHub Desktop.
Github Star and Follow spidering script. Warning very hackish.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"context" | |
"crypto/sha256" | |
"encoding/csv" | |
"encoding/hex" | |
"encoding/json" | |
"errors" | |
"flag" | |
"fmt" | |
"log" | |
"os" | |
"path" | |
"strings" | |
"github.com/google/go-github/github" | |
"github.com/schollz/progressbar/v3" | |
"golang.org/x/oauth2" | |
) | |
var ( | |
storePath = flag.String("store", "store/", "The path to store all findings.") | |
maxDepth = flag.Int("depth", 8, "The maximum recursion depth.") | |
) | |
var ( | |
ErrBlacklist = fmt.Errorf("Blacklisted") | |
) | |
type PathKind string | |
const ( | |
KIND_USER PathKind = "users" | |
KIND_USER_REPOS PathKind = "userRepos" | |
KIND_USER_STARS PathKind = "userStars" | |
KIND_REPO PathKind = "repos" | |
) | |
func getPath(kind PathKind, name string) string { | |
nameHash := sha256.Sum256([]byte(name)) | |
nameHex := hex.EncodeToString(nameHash[:]) | |
return path.Join(*storePath, string(kind), nameHex) | |
} | |
type FlaggedRepo struct { | |
Owner string | |
Name string | |
Description string | |
Skipped bool | |
} | |
type MalwareIntel struct { | |
client *github.Client | |
foundUsers map[string]bool | |
foundRepos map[string]bool | |
wordList []string | |
startUsers []string | |
blacklistUsers map[string]bool | |
blacklistRepos map[string]bool | |
jobs []Job | |
flaggedRepos []FlaggedRepo | |
} | |
func (a *MalwareIntel) pathExists(path string) (bool, error) { | |
_, err := os.Stat(path) | |
if errors.Is(err, os.ErrNotExist) { | |
return false, nil | |
} else if err != nil { | |
return false, err | |
} else { | |
return true, nil | |
} | |
} | |
func (a *MalwareIntel) ensurePath(path string) error { | |
err := os.MkdirAll(path, os.ModePerm) | |
if err != nil { | |
return err | |
} | |
return nil | |
} | |
func (a *MalwareIntel) readCache(p string, ret any) (bool, error) { | |
// infoPath := path.Join(p, "info.json") | |
contentPath := path.Join(p, "content.json") | |
exists, err := a.pathExists(p) | |
if err != nil { | |
return false, err | |
} | |
if exists { | |
f, err := os.Open(contentPath) | |
if errors.Is(err, os.ErrNotExist) { | |
// The file not existing just suggests that the directory needs to be created. | |
return false, nil | |
} else if err != nil { | |
return false, err | |
} | |
defer f.Close() | |
dec := json.NewDecoder(f) | |
err = dec.Decode(ret) | |
if err != nil { | |
return false, err | |
} | |
return true, nil | |
} else { | |
err := a.ensurePath(p) | |
if err != nil { | |
return false, err | |
} | |
return false, nil | |
} | |
} | |
func (a *MalwareIntel) writeCache(p string, content any) error { | |
// infoPath := path.Join(p, "info.json") | |
contentPath := path.Join(p, "content.json") | |
f, err := os.Create(contentPath) | |
if err != nil { | |
return err | |
} | |
defer f.Close() | |
enc := json.NewEncoder(f) | |
err = enc.Encode(content) | |
if err != nil { | |
return err | |
} | |
return nil | |
} | |
func (a *MalwareIntel) GetUserList() []string { | |
return a.startUsers | |
} | |
func (a *MalwareIntel) GetUser(name string) (*github.User, error) { | |
userPath := getPath(KIND_USER, name) | |
var user *github.User | |
ok, err := a.readCache(userPath, &user) | |
if err != nil { | |
return nil, err | |
} | |
if !ok { | |
log.Printf("cache miss for user: %s", name) | |
user, _, err = a.client.Users.Get(context.Background(), name) | |
if err != nil { | |
return nil, err | |
} | |
err = a.writeCache(userPath, user) | |
if err != nil { | |
return nil, err | |
} | |
} | |
return user, nil | |
} | |
func (a *MalwareIntel) GetUserRepositories(u *github.User) ([]*github.Repository, error) { | |
name := u.GetLogin() | |
userPath := getPath(KIND_USER_REPOS, name) | |
var repos []*github.Repository | |
ok, err := a.readCache(userPath, &repos) | |
if err != nil { | |
return nil, err | |
} | |
if !ok { | |
log.Printf("cache miss for user repo list: %s", name) | |
repos, _, err = a.client.Repositories.List(context.Background(), name, &github.RepositoryListOptions{}) | |
if err != nil { | |
return nil, err | |
} | |
err = a.writeCache(userPath, repos) | |
if err != nil { | |
return nil, err | |
} | |
} | |
return repos, nil | |
} | |
func (a *MalwareIntel) GetUserStarredRepositories(u *github.User) ([]*github.StarredRepository, error) { | |
name := u.GetLogin() | |
userPath := getPath(KIND_USER_STARS, name) | |
var repos []*github.StarredRepository | |
ok, err := a.readCache(userPath, &repos) | |
if err != nil { | |
return nil, err | |
} | |
if !ok { | |
log.Printf("cache miss for user starred list: %s", name) | |
repos, _, err = a.client.Activity.ListStarred(context.Background(), name, &github.ActivityListStarredOptions{}) | |
if err != nil { | |
return nil, err | |
} | |
err = a.writeCache(userPath, repos) | |
if err != nil { | |
return nil, err | |
} | |
} | |
return repos, nil | |
} | |
func (a *MalwareIntel) GetRepository(login string, name string) (*github.Repository, error) { | |
id := login + "/" + name | |
if _, ok := a.blacklistRepos[id]; ok { | |
return nil, ErrBlacklist | |
} | |
userPath := getPath(KIND_REPO, name) | |
var repo *github.Repository | |
ok, err := a.readCache(userPath, &repo) | |
if err != nil { | |
return nil, err | |
} | |
if !ok { | |
log.Printf("cache miss for repository: %s/%s", login, name) | |
repo, _, err = a.client.Repositories.Get(context.Background(), login, name) | |
if err != nil { | |
return nil, err | |
} | |
err = a.writeCache(userPath, repo) | |
if err != nil { | |
return nil, err | |
} | |
} | |
return repo, nil | |
} | |
func (a *MalwareIntel) IsFlagged(s string) bool { | |
lower := strings.ToLower(s) | |
for _, word := range a.wordList { | |
if strings.Contains(lower, word) { | |
return true | |
} | |
} | |
return false | |
} | |
type Job interface { | |
Run() error | |
} | |
func (a *MalwareIntel) SubmitJob(job Job) { | |
a.jobs = append(a.jobs, job) | |
} | |
func (a *MalwareIntel) AnalyzeUser(user *github.User, depth int) error { | |
// Make sure we haven't already looked at this user. | |
if _, ok := a.foundUsers[user.GetLogin()]; ok { | |
return nil | |
} | |
a.SubmitJob(&AnalyzeUserJob{ | |
app: a, | |
user: user, | |
depth: depth, | |
}) | |
return nil | |
} | |
type AnalyzeUserJob struct { | |
app *MalwareIntel | |
user *github.User | |
depth int | |
} | |
func (j *AnalyzeUserJob) Run() error { | |
// Make sure we haven't already looked at this user. | |
if _, ok := j.app.foundUsers[j.user.GetLogin()]; ok { | |
return nil | |
} | |
if j.depth <= 0 { | |
// log.Printf("Skipping %s due to recursion depth", j.user.GetLogin()) | |
return nil | |
} | |
j.app.foundUsers[j.user.GetLogin()] = true | |
// log.Printf("[USER] %s : %s", j.user.GetLogin(), j.user.GetBio()) | |
// Search though repositories. | |
repos, err := j.app.GetUserRepositories(j.user) | |
if err != nil { | |
return err | |
} | |
for _, repo := range repos { | |
err := j.app.AnalyzeRepo(repo, j.depth-1) | |
if err != nil { | |
return err | |
} | |
} | |
// Search though followers. | |
// Search though following. | |
// Search though starred. | |
starred, err := j.app.GetUserStarredRepositories(j.user) | |
if err != nil { | |
return err | |
} | |
for _, star := range starred { | |
err := j.app.AnalyzeRepo(star.GetRepository(), j.depth-1) | |
if err != nil { | |
return err | |
} | |
} | |
return nil | |
} | |
func (a *MalwareIntel) AnalyzeRepo(repo *github.Repository, depth int) error { | |
login := repo.GetOwner().GetLogin() | |
name := repo.GetName() | |
// Make sure we haven't already looked at this repository. | |
if _, ok := a.foundRepos[login+"/"+name]; ok { | |
return nil | |
} | |
a.SubmitJob(&AnalyzeRepoJob{ | |
app: a, | |
repo: repo, | |
depth: depth, | |
}) | |
return nil | |
} | |
type AnalyzeRepoJob struct { | |
app *MalwareIntel | |
repo *github.Repository | |
depth int | |
} | |
func (j *AnalyzeRepoJob) Run() error { | |
login := j.repo.GetOwner().GetLogin() | |
name := j.repo.GetName() | |
// Make sure we haven't already looked at this repository. | |
if _, ok := j.app.foundRepos[login+"/"+name]; ok { | |
return nil | |
} | |
description := j.repo.GetDescription() | |
// TODO(joshua): Get repo README.md to scan it. | |
isFork := j.repo.GetFork() | |
details := fmt.Sprintf("%s/%s: %s", login, name, description) | |
var flagged = false | |
// Check if the repo is flagged. | |
if j.app.IsFlagged(login) || j.app.IsFlagged(name) || j.app.IsFlagged(description) { | |
details = "[FLAGGED] " + details | |
flagged = true | |
} | |
if j.depth <= 0 { | |
if flagged { | |
log.Printf("Skipping %s due to recursion depth", j.repo.GetFullName()) | |
j.app.flaggedRepos = append(j.app.flaggedRepos, FlaggedRepo{ | |
Owner: login, | |
Name: name, | |
Description: description, | |
Skipped: true, | |
}) | |
} | |
return nil | |
} | |
j.app.foundRepos[login+"/"+name] = true | |
// Check if this repo is a fork. | |
if isFork && flagged { | |
fullDetails, err := j.app.GetRepository(j.repo.GetOwner().GetLogin(), j.repo.GetName()) | |
if err == ErrBlacklist { | |
// This repo is blacklisted. | |
} else if err != nil { | |
return err | |
} else { | |
parent := fullDetails.GetParent() | |
// If it is then analyze the parent. | |
err = j.app.AnalyzeRepo(parent, j.depth-1) | |
if err != nil { | |
log.Fatal(err) | |
} | |
details = fmt.Sprintf("[FORK %s/%s] ", parent.GetOwner().GetLogin(), parent.GetName()) + details | |
} | |
} | |
if flagged { | |
log.Println("[REPO]", details) | |
j.app.flaggedRepos = append(j.app.flaggedRepos, FlaggedRepo{ | |
Owner: login, | |
Name: name, | |
Description: description, | |
Skipped: false, | |
}) | |
} | |
if flagged { | |
// Search through owner. | |
err := j.app.AnalyzeUser(j.repo.Owner, j.depth-1) | |
if err != nil { | |
log.Fatal(err) | |
} | |
// Search through forks. | |
// Search through starred. | |
// Search through follows. | |
} | |
return nil | |
} | |
func main() { | |
flag.Parse() | |
out, err := os.Create("output.log") | |
if err != nil { | |
fmt.Printf("fatal: %v", err) | |
return | |
} | |
defer out.Close() | |
log.SetOutput(out) | |
token, err := os.ReadFile("token.txt") | |
if err != nil { | |
fmt.Printf("fatal: %v", err) | |
return | |
} | |
ctx := context.Background() | |
ts := oauth2.StaticTokenSource( | |
&oauth2.Token{AccessToken: string(token)}, | |
) | |
tc := oauth2.NewClient(ctx, ts) | |
client := github.NewClient(tc) | |
app := &MalwareIntel{ | |
client: client, | |
foundUsers: make(map[string]bool), | |
foundRepos: make(map[string]bool), | |
blacklistUsers: make(map[string]bool), | |
blacklistRepos: make(map[string]bool), | |
} | |
blacklist, err := os.ReadFile("blacklist.txt") | |
if err != nil { | |
fmt.Printf("fatal: %v", err) | |
return | |
} | |
startUsers, err := os.ReadFile("users.txt") | |
if err != nil { | |
fmt.Printf("fatal: %v", err) | |
return | |
} | |
app.startUsers = strings.Split(string(startUsers), "\n") | |
flaggedWords, err := os.ReadFile("flagged.txt") | |
if err != nil { | |
fmt.Printf("fatal: %v", err) | |
return | |
} | |
app.wordList = strings.Split(string(flaggedWords), "\n") | |
for _, item := range strings.Split(string(blacklist), "\n") { | |
tokens := strings.Split(item, " ") | |
if tokens[0] == "user" { | |
app.blacklistUsers[tokens[1]] = true | |
} else if tokens[0] == "repo" { | |
app.blacklistRepos[tokens[1]] = true | |
} | |
} | |
// Enumerate though each user in the store. | |
for _, login := range app.GetUserList() { | |
// Force the user to be enumerated. | |
delete(app.foundUsers, login) | |
// Get all user details. | |
user, err := app.GetUser(login) | |
if err != nil { | |
fmt.Printf("fatal: %v", err) | |
return | |
} | |
err = app.AnalyzeUser(user, *maxDepth) | |
if err != nil { | |
fmt.Printf("fatal: %v", err) | |
return | |
} | |
} | |
prog := progressbar.Default(-1, "Executing Jobs") | |
for { | |
if len(app.jobs) == 0 { | |
break | |
} | |
i := 0 | |
job := app.jobs[i] | |
app.jobs = append(app.jobs[:i], app.jobs[i+1:]...) | |
err := job.Run() | |
if err != nil { | |
fmt.Printf("fatal: %v", err) | |
return | |
} | |
prog.Add(1) | |
prog.Describe(fmt.Sprintf("Executing Jobs: currently waiting: %d", len(app.jobs))) | |
} | |
outputCsv, err := os.Create("output.csv") | |
if err != nil { | |
fmt.Printf("fatal: %v", err) | |
return | |
} | |
defer outputCsv.Close() | |
csvWriter := csv.NewWriter(outputCsv) | |
for _, repo := range app.flaggedRepos { | |
csvWriter.Write([]string{ | |
repo.Owner, repo.Name, repo.Description, func() string { | |
if repo.Skipped { | |
return "true" | |
} else { | |
return "false" | |
} | |
}(), | |
}) | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Copyright 2023 Joshua D. Scarsbrook | |
Licensed under the Apache License, Version 2.0 (the "License"); | |
you may not use this file except in compliance with the License. | |
You may obtain a copy of the License at | |
http://www.apache.org/licenses/LICENSE-2.0 | |
Unless required by applicable law or agreed to in writing, software | |
distributed under the License is distributed on an "AS IS" BASIS, | |
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
See the License for the specific language governing permissions and | |
limitations under the License. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment