Skip to content

Instantly share code, notes, and snippets.

@theSoberSobber
Created May 12, 2024 20:04
Show Gist options
  • Save theSoberSobber/ac721d0e97637bd08ffa7ac11d068d3f to your computer and use it in GitHub Desktop.
Save theSoberSobber/ac721d0e97637bd08ffa7ac11d068d3f to your computer and use it in GitHub Desktop.
Uploads a folder named test to drive with tag support, creates an index.json in the same directory, tags are to be supplied as meow,,cat,,animal,,pet.txt here meow.txt will be the name of the file and all others will be tags.
import os
from Google import Create_Service
from googleapiclient.http import MediaFileUpload
import hashlib
import json
API_NAME = 'drive'
API_VERSION = 'v3'
SCOPES = ['https://www.googleapis.com/auth/drive']
service = Create_Service("client_secret_akshat.json", API_NAME, API_VERSION, SCOPES)
print(service)
# file_path = 'test.txt'
# parent_folder_id = 'root'
# file_metadata = {
# 'name': os.path.basename(file_path),
# 'parents': [parent_folder_id]
# }
# media = MediaFileUpload(file_path)
# uploaded_file = service.files().create(
# body=file_metadata,
# media_body=media,
# fields='id, webViewLink'
# ).execute()
# print("File uploaded successfully.")
# print("File ID:", uploaded_file['id'])
# print("File link:", uploaded_file['webViewLink'])
def upload_internal(name, file_path):
file_metadata = {
'name': name,
'parents': ["root"]
}
media = MediaFileUpload(file_path)
uploaded_file = service.files().create(
body=file_metadata,
media_body=media,
fields='id, webViewLink'
).execute()
# print("File uploaded successfully.")
return uploaded_file['id'], uploaded_file['webViewLink']
def calculate_file_hash(file_path):
"""Calculate the SHA-256 hash of a file."""
sha256_hash = hashlib.sha256()
with open(file_path, "rb") as file:
# Read the file in chunks to avoid loading the entire file into memory
chunk = 0
while chunk := file.read(4096):
sha256_hash.update(chunk)
return sha256_hash.hexdigest()
def extract_tags(filename):
# Extract tags from filename
name, ext = os.path.splitext(filename)
tags = name.split(',,')[1:]
name = name.split(',,')[:1][0].split("\\")[-1]
# print(f"Tags extracted from filename {name}: {tags}")
return name + ext, tags
# print(extract_tags("test/meow,,kitten,,cat,,animal,,pet.txt"))
def upload(file_path):
# file_path = "test/meow,,kitten,,cat,,animal,,pet.txt"
name, tags = extract_tags(file_path)
_, link = upload_internal(name, file_path)
return name, link, tags
index = {}
directory = "test"
for root, dirs, files in os.walk(directory):
for file in files:
file_path = os.path.join(root, file)
if os.path.isfile(file_path):
name, link, tags = upload(file_path)
hash = calculate_file_hash(file_path)
index[hash] = {"name": name, "link": link, "tags": tags}
with open("index.json", 'w') as f:
json.dump(index, f, indent=4)
@theSoberSobber
Copy link
Author

remove_duplicates.py to run on the folder prior to uploading

import os
import hashlib

def hash_file(file_path):
    BUF_SIZE = 65536
    sha256 = hashlib.sha256()
    with open(file_path, 'rb') as f: 
        while True:
            data = f.read(BUF_SIZE)
            if not data:
                break
            sha256.update(data)
    return sha256.hexdigest()

def find_duplicates(directory):
    # Dictionary to store file hashes
    hash_dict = {}
    # Dictionary to store deleted files
    deleted_files = {}
    # List to store duplicate files
    duplicates = []
    for root, _, files in os.walk(directory):
        for file in files:
            file_path = os.path.join(root, file)
            file_hash = hash_file(file_path)
            if file_hash in hash_dict:
                if file_path not in deleted_files and hash_dict[file_hash] not in deleted_files.values():
                    duplicates.append((file_path, hash_dict[file_hash]))
            else:
                hash_dict[file_hash] = file_path
    # Check for filename conflicts
    filename_dict = {}
    for root, _, files in os.walk(directory):
        for file in files:
            file_path = os.path.join(root, file)
            if file in filename_dict:
                if file_path not in deleted_files and filename_dict[file] not in deleted_files.values():
                    duplicates.append((file_path, filename_dict[file]))
            else:
                filename_dict[file] = file_path
    return duplicates

def resolve_duplicates(duplicates):
    for dup in duplicates:
        print("Conflict Found:")
        print(f"1. {dup[0]}")
        print(f"2. {dup[1]}")
        choice = input("Enter the number of the file to keep (1/2): ")
        if choice == '1':
            os.remove(dup[1])
            print(f"Kept {dup[0]} and deleted {dup[1]}")
        elif choice == '2':
            os.remove(dup[0])
            print(f"Kept {dup[1]} and deleted {dup[0]}")
        else:
            print("Invalid choice. Skipping this conflict.")
        print()

if __name__ == "__main__":
    directory = 'test'  # Specify the directory to check for duplicates
    duplicates = find_duplicates(directory)
    if duplicates:
        print("Duplicates found. Resolving conflicts:")
        resolve_duplicates(duplicates)
    else:
        print("No duplicates found.")

@theSoberSobber
Copy link
Author

Create the secret file by going to cloud console, creating new project, from library enable drive api, create new creds, create for desktop app, add a new test user as your email id, auth local using localhost (link provided when launched), just run the script then, it should say service created and log the service.

@theSoberSobber
Copy link
Author

theSoberSobber commented May 12, 2024

repo to monolith.py to convert to the test folder for upload with tags: (from the notes all sem repo, there is also a fix script to add notes folder to that repo)

import os
import shutil
import sys

def copy_files(input_folder, output_folder, folder_names=None):
    if folder_names is None:
        folder_names = []
    
    for item in os.listdir(input_folder):
        item_path = os.path.join(input_folder, item)
        if os.path.isdir(item_path):
            copy_files(item_path, output_folder, folder_names + [item])
        else:
            filename, extension = os.path.splitext(item)
            new_filename = filename + ',,' + ',,'.join(folder_names) + extension
            output_path = os.path.join(output_folder, new_filename)
            shutil.copy(item_path, output_path)

if __name__ == "__main__":
    # Check if two arguments are provided
    if len(sys.argv) != 3:
        print("Usage: python script.py <input_folder> <output_folder>")
        sys.exit(1)
    
    input_folder = sys.argv[1]
    output_folder = sys.argv[2]

    # Call the function to copy files
    copy_files(input_folder, output_folder)
    print("Files copied successfully!")
    ```

@theSoberSobber
Copy link
Author

theSoberSobber commented May 12, 2024

fix.py to create the notes dirs in the old rep

import os
import shutil
import sys

def organize_notes(input_folder):
    for sem in os.listdir(input_folder):
        print(sem)
        sem_path = os.path.join(input_folder, sem)
        for subject in os.listdir(sem_path):
            print(subject)
            subject_path = os.path.join(sem_path, subject)
            notes_folder = os.path.join(subject_path, "Notes")
            os.makedirs(notes_folder, exist_ok=True)
            for pdf in os.listdir(subject_path):
                print(pdf)
                pdf_path = os.path.join(subject_path, pdf)
                if os.path.isfile(pdf_path):
                    shutil.move(pdf_path, os.path.join(notes_folder, pdf))

if __name__ == "__main__":
    # Check if one argument is provided
    if len(sys.argv) != 2:
        print("Usage: python script.py <input_folder>")
        sys.exit(1)
    
    input_folder = sys.argv[1]

    # Call the function to organize notes
    organize_notes(input_folder)
    print("Notes organized successfully!")
    ```

@theSoberSobber
Copy link
Author

theSoberSobber commented May 12, 2024

with hash and stop pickup

import os
from Google import Create_Service
from googleapiclient.http import MediaFileUpload
import hashlib
import json

API_NAME = 'drive'
API_VERSION = 'v3'
SCOPES = ['https://www.googleapis.com/auth/drive']

service = Create_Service("client_secret_akshat.json", API_NAME, API_VERSION, SCOPES)

print(service)

data = None

if os.path.exists('index.json'):
    with open('index.json', 'r') as file:
        data = json.load(file)

# file_path = 'test.txt'
# parent_folder_id = 'root'

# file_metadata = {
#     'name': os.path.basename(file_path),
#     'parents': [parent_folder_id]
# }

# media = MediaFileUpload(file_path)

# uploaded_file = service.files().create(
#     body=file_metadata,
#     media_body=media,
#     fields='id, webViewLink'
# ).execute()
# print("File uploaded successfully.")
# print("File ID:", uploaded_file['id'])
# print("File link:", uploaded_file['webViewLink'])

def upload_internal(name, file_path):
    file_metadata = {
        'name': name,
        'parents': ["root"]
    }
    media = MediaFileUpload(file_path)
    uploaded_file = service.files().create(
        body=file_metadata,
        media_body=media,
        fields='id, webViewLink'
    ).execute()
    # print("File uploaded successfully.")
    return uploaded_file['id'], uploaded_file['webViewLink']

def calculate_file_hash(file_path):
    """Calculate the SHA-256 hash of a file."""
    sha256_hash = hashlib.sha256()
    with open(file_path, "rb") as file:
        # Read the file in chunks to avoid loading the entire file into memory
        chunk = 0
        while chunk := file.read(4096):
            sha256_hash.update(chunk)
    return sha256_hash.hexdigest()

def extract_tags(filename):
    # Extract tags from filename
    name, ext = os.path.splitext(filename)
    tags = name.split(',,')[1:]
    name = name.split(',,')[:1][0].split("\\")[-1]
    # print(f"Tags extracted from filename {name}: {tags}")
    return name + ext, tags

# print(extract_tags("test/meow,,kitten,,cat,,animal,,pet.txt"))

def upload(file_path):
    # file_path = "test/meow,,kitten,,cat,,animal,,pet.txt"
    name, tags = extract_tags(file_path)
    hash = calculate_file_hash(file_path)
    if data and hash in data:
        return hash, data[hash]["name"], data[hash]["link"], data[hash]["tags"]
    _, link = upload_internal(name, file_path)
    return hash, name, link, tags

directory = "test"
for root, dirs, files in os.walk(directory):
    for file in files:
        file_path = os.path.join(root, file)
        if os.path.isfile(file_path):
            hash, name, link, tags = upload(file_path)
            if data and hash in data:
                print(f"File {name} already exists in index.json!")
                continue
            data[hash] = {"name": name, "link": link, "tags": tags}
            print(f"Uploaded {name} Successfully with tags: {tags}!")

with open("index.json", 'w') as f:
    json.dump(data, f, indent=4)
    ```

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment