Skip to content

Instantly share code, notes, and snippets.

@sats17
Last active December 23, 2024 06:18
Show Gist options
  • Save sats17/01845e6d6b8ec212bd68eb39e51ecea7 to your computer and use it in GitHub Desktop.
Save sats17/01845e6d6b8ec212bd68eb39e51ecea7 to your computer and use it in GitHub Desktop.

############################################################################################################ ################## ############################# ################## #############################

              This Gist collection contains all scripts for python

################## ############################# ################## ############################# ############################################################################################################

import pandas as pd

def fetchFieldFromCsvFile(file): for row in file.itertuples(): Arr.append(row.csv_header) print(len(Arr)) print(Arr)

def bifurcateDuplicateAndUniqueValuesFromCSV(file): # Get duplicate from csv file """ example csv file - csv_header "duplicate" "duplicate" "unique" Output - duplicate.csv contains duplicate value unique.csv contains unique values

"""
unique = []
duplicate = []
count = 0
for row in file.itertuples():
    count+=1
    if row.csv_header in unique:
        duplicate.append(row.csv_header)
    else:
        unique.append(row.csv_header)
    print(count)
print("Number of duplicate records ", len(duplicate))
print("Number of unique records ", len(unique))
pd.DataFrame(duplicate).to_csv("duplicate.csv", header=False, index=False)
pd.DataFrame(unique).to_csv("unique.csv", header=False, index=False)
return unique

file = pd.read_csv("csvFile.csv") fetchFieldFromCsvFile(file) bifurcateDuplicateAndUniqueValuesFromCSV(file)

import requests import urllib3 from concurrent.futures import ThreadPoolExecutor, as_completed

urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

def fetch_gists(username, token): headers = {"Authorization": f"token {token}"} url = f"https://api.github.com/users/{username}/gists" response = requests.get(url, headers=headers, verify=False)

if response.status_code == 200:
    return response.json()
else:
    print("Error:", response.status_code)
    return None

def search_keyword_in_file(file_url, keyword): file_content = requests.get(file_url, verify=False).text return keyword.lower() in file_content.lower()

def search_keyword_in_gist(gist, keyword): gist_id = gist["id"] gist_files = gist["files"] keyword_found_in = []

for file in gist_files.values():
    file_url = file["raw_url"]
    if search_keyword_in_file(file_url, keyword):
        keyword_found_in.append({"gist_id": gist_id, "file_name": file["filename"]})

return keyword_found_in

def search_keyword_in_gists(username, token, keyword): gists = fetch_gists(username, token)

if not gists:
    print(f"No gists found for user {username}")
    return

keyword_found_in = []

with ThreadPoolExecutor() as executor:
    futures = [executor.submit(search_keyword_in_gist, gist, keyword) for gist in gists]

    for future in as_completed(futures):
        gist_results = future.result()
        if gist_results:
            folder_name = gist_results[0]["file_name"]
            for result in gist_results:
                result["folder_name"] = folder_name
            keyword_found_in.extend(gist_results)

return keyword_found_in

username = "github_username" token = "github_pat_token" keyword = "gateway"

results = search_keyword_in_gists(username, token, keyword)

if results: print(f"Keyword '{keyword}' found in the following gists:") for result in results: print(f"Folder name: {result['folder_name']}, Gist ID: {result['gist_id']}, File name: {result['file_name']}") else: print(f"No gists found containing the keyword '{keyword}'.")

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment