Created
February 14, 2024 16:04
-
-
Save CoffeeVampir3/1c5b183a9fb8653b5b5ccf3ab8e32f35 to your computer and use it in GitHub Desktop.
Anti jpeg data collator
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
def filter_images_by_score_interval(jsonl_file_path, target_interval, output_file_path): | |
min_score = None | |
matching_entries = [] | |
# Find the minimum score in the file | |
with open(jsonl_file_path, 'r') as file: | |
for line in file: | |
json_object = json.loads(line) | |
score = json_object.get('score') | |
if min_score is None or score < min_score: | |
min_score = score | |
# If no minimum score found, do nothing | |
if min_score is None: | |
return | |
# Read the file again to find matching entries | |
with open(jsonl_file_path, 'r') as file: | |
for line in file: | |
json_object = json.loads(line) | |
score = json_object.get('score') | |
# Check if the score falls within the target interval | |
if min_score <= score < min_score + target_interval: | |
matching_entries.append(json_object) | |
# Write matching entries to a new JSONL file | |
with open(output_file_path, 'w') as outfile: | |
for entry in matching_entries: | |
json.dump(entry, outfile) | |
outfile.write('\n') | |
print(f"Matching entries have been written to {output_file_path}") | |
if __name__ == "__main__": | |
jsonl_file_path = '/home/blackroot/Desktop/anti-jpgger/dataset.jsonl' # Replace with your JSONL file path | |
target_interval = 0.02 # The specific interval you mentioned | |
output_file_path = 'garbage.jsonl' # Path for the new JSONL file | |
filter_images_by_score_interval(jsonl_file_path, target_interval, output_file_path) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import sys | |
from PIL import Image | |
def convert_png_to_jpeg(input_folder): | |
# Check if the input folder exists | |
if not os.path.isdir(input_folder): | |
print(f"The folder {input_folder} does not exist.") | |
return | |
# Ensure the output directory exists | |
output_dir = "outputs" | |
if not os.path.exists(output_dir): | |
os.makedirs(output_dir) | |
# Iterate through all files in the input folder | |
for file in os.listdir(input_folder): | |
if file.endswith(".png"): | |
# Construct full file path | |
file_path = os.path.join(input_folder, file) | |
# Open the image | |
with Image.open(file_path) as img: | |
# Convert PNG to RGB mode if it is in a different mode (JPEG does not support alpha channel) | |
if img.mode in ("RGBA", "P"): | |
img = img.convert("RGB") | |
# Save the image with decreasing quality, including the lowest possible setting | |
for quality in [40, 30, 20, 10, 1]: # Now includes 1 as the lowest quality setting | |
output_filename = f"{os.path.splitext(file)[0]}_quality{quality}.jpg" | |
output_path = os.path.join(output_dir, output_filename) | |
img.save(output_path, "JPEG", quality=quality) | |
print(f"Saved {output_path} with quality {quality}") | |
if __name__ == "__main__": | |
if len(sys.argv) != 2: | |
print("Usage: python script.py <input_folder_path>") | |
else: | |
input_folder = sys.argv[1] | |
convert_png_to_jpeg(input_folder) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import os | |
import requests | |
def download_images(jsonl_file_path, output_folder): | |
# Create the output folder if it doesn't exist | |
if not os.path.exists(output_folder): | |
os.makedirs(output_folder) | |
# Open and read the JSONL file | |
with open(jsonl_file_path, 'r') as file: | |
for line in file: | |
try: | |
# Parse the JSON object from the line | |
json_object = json.loads(line) | |
url = json_object['url'] | |
score = json_object['score'] | |
# Format the filename as the score | |
filename = f"{score}.png" | |
output_path = os.path.join(output_folder, filename) | |
# Download and save the image | |
response = requests.get(url) | |
if response.status_code == 200: | |
with open(output_path, 'wb') as img_file: | |
img_file.write(response.content) | |
print(f"Downloaded and saved image with score {score} to {output_path}") | |
else: | |
print(f"Failed to download image with score {score} from {url}") | |
except Exception as e: | |
print(f"An error occurred: {e}") | |
if __name__ == "__main__": | |
jsonl_file_path = '/home/blackroot/Desktop/anti-jpgger/garbage.jsonl' # Replace this with your JSONL file path | |
output_folder = 'images' # The folder where images will be saved | |
download_images(jsonl_file_path, output_folder) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment