Last active
April 17, 2025 03:32
-
-
Save flips22/897fcd809111f41a7dbe435c9d883f76 to your computer and use it in GitHub Desktop.
CBL to Mylar Script - Imports a CBL file, finds each unique series on comicvine, checks if you already have it in mylar and if not, adds the series to mylar via its api.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
''' | |
Installation: | |
1) Add this package as a python wrapper to search the comicvine api: | |
https://github.com/jessebraham/comicvine-search | |
I wasn't able to get this module to install so I copied it to the same folder as the .py file (or add to you env of course) | |
2) Replace [mylar api key] with your api key | |
3) Replace [mylar server address] with your server in the format: http://servername:port/ (make sure to include the slash at the end) | |
4) Replace [comicvine api key] with your api key | |
Usage: | |
python3 cbltomylar.py cblfiletoimport.cbl | |
You can also use wildcards: | |
python3 cbltomylar.py *.cbl | |
results are output to std out if you want to save to a file add > logfile.txt | |
Notes: | |
Marvel or DC Comics are the only acceptable publishers that are allowed. This is hardcoded in, but you can edit the code to add more. | |
Without this, I was getting non-english publications. | |
All matches of series and volume year will be added. So if there are multiple matches, multiple series will be added. | |
Reference: Mylar general structure for API: | |
http://localhost:8090 + HTTP_ROOT + /api?apikey=$apikey&cmd=$command | |
''' | |
import requests | |
import time | |
import comicvine_search | |
from comicvine_search import ComicVineClient | |
import xml.etree.ElementTree as ET | |
from glob import glob | |
from sys import argv | |
mylarapikey = '[mylar api key]' | |
mylarURL = '[mylar server address]' | |
# format: http://servername:port/ | |
cv = ComicVineClient('[comicvine api key]') | |
#comicvine api key | |
mylarURLfull = mylarURL + 'api?apikey=' + mylarapikey + '&cmd=addComic&id=' | |
mylarURLfullcheck = mylarURL + 'api?apikey=' + mylarapikey + '&cmd=getComic&id=' | |
#filename = 'input.cbl' | |
#filename = sys.argv[1] | |
for filename in glob(argv[1]): | |
tree = ET.parse(filename) | |
root = tree.getroot() | |
series_list = [] | |
input_list = [] | |
cblhave = 0 | |
cbladd = 0 | |
notfound = 0 | |
cblinput = root.findall("./Books/Book") | |
for series in cblinput: | |
series.attrib.pop('Year') | |
series.attrib.pop('Number') | |
#print (series.attrib) | |
line = series.attrib['Series'],series.attrib['Volume'] | |
series_list.append(line) | |
#print(series_list) | |
#remove duplicates | |
input_list = list(set(series_list)) | |
print ("Found ", len(input_list), " unique series in CBL file. Starting search.") | |
for n in range(len(input_list)): | |
inputname = input_list[n][0] | |
inputyear = input_list[n][1] | |
print("Searching for " + inputname + " (" + inputyear + ")") | |
time.sleep(1) | |
#sleeping at least 1 second is what comicvine reccomends. If you are more than 450 requests in 15 minutes (900 seconds) you will be rate limited. So if you are going to be importing for a straight 15 minutes (wow), then you would want to changet this to 2. | |
response = cv.search(inputname , resources=['volume']) | |
for r in range(1000): | |
# I could not figure out how to use the length of results to stop the loop, so I just eject out of it on error | |
try: | |
response.results[r]['start_year'] | |
except IndexError: | |
#print(' Done with: ' + inputname) | |
break | |
if response.results[r]['name'] == inputname: | |
if response.results[r]['start_year'] == inputyear: | |
if response.results[r]['publisher']['name'] == "DC Comics" or response.results[r]['publisher']['name'] == "Marvel" or response.results[r]['publisher']['name'] == "Marvel Digital Comics Unlimited" or response.results[r]['publisher']['name'] == "Dynamite Entertainment": | |
#added: Dynamite Entertainment and Marvel Digital Comics Unlimited | |
#print(response.results[r]['publisher']['name']) | |
print(" Found on comicvine: " + response.results[r]['publisher']['name'] + ": " + inputname + " (" + inputyear + ") ","comicid:", response.results[r]['id']) | |
checkURL = mylarURLfullcheck + str(response.results[r]['id']) | |
addURL = mylarURLfull + str(response.results[r]['id']) | |
#print(checkURL) | |
responsecheck = requests.get(checkURL) | |
if len(responsecheck.text) > 75: #Checks the length of the response. If it is short then assume you don't have it. I'm sure there are more elegant ways to do this. | |
print(" Mylar is already tracking this series") | |
cblhave = cblhave +1 | |
#print(responsecheck.status_code) | |
else: | |
responsem = requests.get(addURL) | |
print(" Adding series to mylar") | |
cbladd = cbladd +1 | |
notfound = len(input_list) - cblhave - cbladd | |
print("CBL File: ", filename," Series in CBL:", len(input_list)," Series already in mylar:", cblhave, " Series added to mylar:",cbladd, " Series not found on comicvine:",notfound) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
''' | |
This is an updated script from TheMadman which adds a ton of functionality to my script and also cleans up the code. Here's a description from him: | |
It will import all the CBL files in a subfolder called 'ReadingLists' and store the data in a csv file to keep track of changes. It will keep a register | |
of series name and year (as found in your readinglists), and lookup the CV id and publisher which match (allowing for preferred and blacklisted publishers). | |
It will then check/add to Mylar based on user preference. | |
If you add new files to your readinglist folder and re-run the script, it will merge the new series it finds with the existing data from previous runs rather | |
than potentially double handling the same series every time you read a cbl file. | |
Only issue I've found is that sometimes there are 2 matches for a series with the same year found in CV (both with Marvel as publisher) so it's impossible to | |
know which comicID is correct without more info. | |
Installation: | |
1) Download & install this package (required for searching the comicvine api): | |
https://github.com/jessebraham/comicvine-search | |
2) Create a folder called 'ReadingLists' in the same directory as the script and add any CBL files you want to process into this folder | |
3) Replace [MYLAR API KEY] with your Mylar3 api key | |
4) Replace [MYLAR SERVER ADDRESS] with your server in the format: http://servername:port/ (make sure to include the slash at the end) | |
5) Replace [CV API KEY] with your comicvine api key | |
6) Optional - Modify the following options: | |
- PUBLISHER_BLACKLIST : List of publishers to ignore during CV searching | |
- PUBLISHER_PREFERRED : List of publishers to prioritise when multiple CV matches are found | |
- ADD_NEW_SERIES_TO_MYLAR : Automatically add CV search results to Mylar as new series | |
- CV_SEARCH_LIMIT : Set a limit on the number of CV API calls made during this processing. | |
This is useful for large collections if you want to break the process into smaller chunks. | |
Usage: | |
python3 cbl-mylar-import.py | |
Results are output to "output.csv" in the same directory as the script | |
Notes: | |
- Series are found based on series name and year match. | |
- If multiple results are found, any matches of the preferred publisher will be prioritised. | |
- For multiple matches, this script will output the last result found. | |
- CV api calls are limited to once every 2 seconds, so this script can take a while for large collections. | |
It is not recommended to reduce this, however you can modify the rate using the CV_API_RATE var. | |
- If you mess anything up, you can simply delete the output.csv or force a re-run using the Mylar & CV FORCE_RECHECK vars. | |
''' | |
import requests | |
import json | |
import time | |
import os | |
from enum import IntEnum | |
import comicvine_search | |
from comicvine_search import ComicVineClient | |
import xml.etree.ElementTree as ET | |
from glob import glob | |
from sys import argv | |
### DEV OPTIONS | |
#Enable verbose output | |
VERBOSE = False | |
#Prevent overwriting of main CSV data file | |
TEST_MODE = False | |
#File prefs | |
SCRIPT_DIR = os.getcwd() | |
READINGLIST_DIR = os.path.join(SCRIPT_DIR, "ReadingLists") | |
DATA_FILE = os.path.join(SCRIPT_DIR, "output.csv") | |
if TEST_MODE: | |
#Create new file instead of overwriting data file | |
OUTPUT_FILE = os.path.join(SCRIPT_DIR, "output_new.csv") | |
else: | |
OUTPUT_FILE = DATA_FILE | |
CSV_HEADERS = ["Series","Year","Publisher", "ComicID","InMylar"] | |
class Column(IntEnum): | |
SERIES = 0 | |
YEAR = 1 | |
PUBLISHER = 2 | |
COMICID = 3 | |
INMYLAR = 4 | |
#CV prefs | |
CV_SEARCH_LIMIT = 10000 #Maximum allowed number of CV API calls | |
CV_API_KEY = '[COMICVINE API KEY]' | |
CV_API_RATE = 2 #Seconds between CV API calls | |
FORCE_RECHECK_CV = False | |
PUBLISHER_BLACKLIST = ["Panini Comics","Editorial Televisa","Planeta DeAgostini","Unknown"] | |
PUBLISHER_PREFERRED = ["Marvel","DC Comics"] #If multiple matches found, prefer this result | |
CV = None | |
#Mylar prefs | |
mylarAPI = '[MYLAR API KEY]' | |
mylarBaseURL = '[MYLAR URL]' #format= http://servername:port/ | |
FORCE_RECHECK_MYLAR_MATCHES = False | |
ADD_NEW_SERIES_TO_MYLAR = True | |
mylarAddURL = mylarBaseURL + 'api?apikey=' + mylarAPI + '&cmd=addComic&id=' | |
mylarCheckURL = mylarBaseURL + 'api?apikey=' + mylarAPI + '&cmd=getComic&id=' | |
numNewSeries = 0 | |
numExistingSeries = 0 | |
numCBLSeries = 0 | |
#Initialise counters | |
mylarExisting = 0 | |
mylarMissing = 0 | |
CVFound = 0 | |
CVNotFound = 0 | |
searchCount = 0 | |
def parseCBLfiles(): | |
series_list = [] | |
print("Checking CBL files in %s" % (READINGLIST_DIR)) | |
for root, dirs, files in os.walk(READINGLIST_DIR): | |
for file in files: | |
if file.endswith(".cbl"): | |
try: | |
filename = os.path.join(root, file) | |
#print("Parsing %s" % (filename)) | |
tree = ET.parse(filename) | |
fileroot = tree.getroot() | |
cblinput = fileroot.findall("./Books/Book") | |
for series in cblinput: | |
line = series.attrib['Series'].replace(",",""),series.attrib['Volume'] | |
series_list.append(list(line)) | |
except: | |
print("Unable to process file at %s" % ( os.path.join(str(root), str(file)) )) | |
return series_list | |
def isSeriesInMylar(comicID): | |
found = False | |
global mylarExisting | |
global mylarMissing | |
#print("Checking if comicID %s exists in Mylar" % (comicID)) | |
if comicID.isnumeric(): | |
comicCheckURL = "%s%s" % (mylarCheckURL, str(comicID)) | |
mylarData = requests.get(comicCheckURL).text | |
jsonData = json.loads(mylarData) | |
#jsonData = mylarData.json() | |
mylarComicData = jsonData['data']['comic'] | |
if not len(mylarComicData) == 0: | |
found = True | |
elif comicID != "Unknown": | |
print(" Mylar series status unknown - invalid ComicID:%s" % (comicID)) | |
if found: | |
if VERBOSE: print(" Match found for %s in Mylar" % (comicID)) | |
mylarExisting += 1 | |
return True | |
else: | |
if VERBOSE: print(" No match found for %s in Mylar" % (comicID)) | |
mylarMissing += 1 | |
return False | |
#In the event of if else failure | |
return False; | |
def addSeriesToMylar(comicID): | |
if comicID.isnumeric(): | |
if VERBOSE: print(" Adding %s to Mylar" % (comicID)) | |
comicAddURL = "%s%s" % (mylarAddURL, str(comicID)) | |
mylarData = requests.get(comicAddURL).text | |
## Check result of API call | |
jsonData = json.loads(mylarData) | |
#jsonData = mylarData.json() | |
#mylarComicData = jsonData['data']['comic'] | |
if jsonData['success'] == "true": | |
return True | |
else: | |
return False | |
else: | |
return False | |
def findVolumeDetails(series,year): | |
found = False | |
comicID = "Unknown" | |
publisher = "Unknown" | |
global searchCount | |
global CVNotFound | |
global CVFound | |
global CV | |
if isinstance(series,str): | |
searchCount += 1 | |
result_matches = 0 | |
preferred_matches = 0 | |
result_publishers = [] | |
result_matches_blacklist = 0 | |
issueCounter = 0 | |
series_matches = [] | |
publisher_blacklist_results = set() | |
try: | |
if VERBOSE: print(" Searching for %s (%s) on CV" % (series,year)) | |
#response = CV.Volume.search(series) | |
response = CV.search(series , resources=['volume']) | |
if response.results is None: | |
print(" No results found for %s (%s)" % (series,year)) | |
else: #Results were found | |
for result in response.results: #Iterate through CV results | |
#If exact series name and year match | |
if result['name'] == series and result['start_year'] == year: | |
publisher_temp = result['publisher']['name'] | |
result_publishers.append(publisher_temp) | |
series_matches.append(result) | |
if publisher_temp in PUBLISHER_BLACKLIST: | |
result_matches_blacklist += 1 | |
publisher_blacklist_results.add(publisher_temp) | |
else: | |
found = True | |
result_matches += 1 | |
publisher = publisher_temp | |
if publisher in PUBLISHER_PREFERRED: preferred_matches += 1 | |
comicID = result['id'] | |
numIssues = result['count_of_issues'] | |
print(" Found on comicvine: %s - %s (%s) : %s (%s issues)" % (publisher, series, year, comicID, numIssues)) | |
#Handle multiple publisher matches | |
if result_matches > 1: | |
print(" Warning: Multiple valid matches found! Publishers: %s" % (", ".join(result_publishers))) | |
#set result to preferred publisher | |
for item in series_matches: | |
if item['publisher']['name'] in PUBLISHER_PREFERRED or preferred_matches == 0: | |
numIssues = item['count_of_issues'] | |
if numIssues > issueCounter: | |
#Current series has more issues than any other preferred results! | |
publisher = item['publisher']['name'] | |
comicID = item['id'] | |
issueCounter = numIssues | |
## TODO: Remove "preferred text labels" | |
print(" Selected series from multiple results: %s - %s (%s issues)" % (publisher,comicID,numIssues)) | |
else: | |
#Another series has more issues | |
print(" Skipped Series : %s - %s (%s issues) - another preferred series has more issues!" % (item['publisher']['name'],item['id'],numIssues)) | |
if len(response.results) == 0: | |
print(" No results found for %s (%s)" % (series,year)) | |
if result_matches_blacklist > 0 and result_matches == 0: | |
#Only invalid results found | |
print(" No valid results found for %s (%s). %s blacklisted results found with the following publishers: %s" % (series,year,result_matches_blacklist, ",".join(publisher_blacklist_results))) | |
except Exception as e: | |
print(" There was an error processing %s (%s)" % (series,year)) | |
print(repr(e)) | |
#Update counters | |
if not found: | |
CVNotFound += 1 | |
else: | |
CVFound += 1 | |
return [publisher,comicID] | |
def readExistingData(): | |
print("Reading data from %s" % (DATA_FILE)) | |
dataList = [] | |
if os.path.exists(DATA_FILE): | |
#Import raw csv data as lines | |
with open(DATA_FILE, mode='r') as csv_file: | |
data = csv_file.readlines() | |
#Parse csv data and strip whitespace | |
for i in range(len(data)): | |
if not i == 0: #Skip header row | |
fields = [x.strip() for x in data[i].split(",")] | |
dataList.append(fields) | |
return dataList | |
def outputData(data): | |
print("Exporting data to %s" % (OUTPUT_FILE)) | |
with open(OUTPUT_FILE, mode='w') as output_file: | |
output_file.write("%s\n" % (",".join(CSV_HEADERS))) | |
#Check if list contains multiple columns | |
if len(data[0]) == 1: | |
output_file.writelines(data) | |
else: | |
for row in data: | |
output_file.write("%s\n" % (",".join(map(str,row)))) | |
def index_2d(myList, v): | |
for i, x in enumerate(myList): | |
if v[0] == x[0] and v[1] == x[1]: | |
return (i) | |
def mergeDataLists(list1, list2): | |
# list1 = Main list with rows of 4 items | |
# list2 = Import list with rows of 2 items | |
print("Merging data lists") | |
mainDataList = list1 | |
dataToMerge = list2 | |
global numExistingSeries | |
global numCBLSeries | |
global numNewSeries | |
mainDataTitles = [] | |
mergedTitleSet = () | |
finalMergedList = [] | |
#Extract first 2 row elements to modified list | |
for row in mainDataList: | |
mainDataTitles.append([row[Column.SERIES], row[Column.YEAR]]) | |
mergedTitleList = mainDataTitles + dataToMerge | |
mergedTitleList.sort() | |
numExistingSeries = len(mainDataList) | |
numCBLSeries = len(mergedTitleList) | |
mergedTitleSet = set(tuple(map(tuple,mergedTitleList))) | |
for row in mergedTitleSet: | |
if list(row) in mainDataTitles: | |
#Find index of exact match in mainDataSet | |
match_row = index_2d(mainDataList,row) | |
#if VERBOSE: print("Merged row: %s found in main data at row %s" % (list(row),match_row)) | |
finalMergedList.append(mainDataList[match_row]) | |
#Removing | |
#if VERBOSE: print("Removing %s from mainDataList" % (list(row))) | |
mainDataList.pop(match_row) | |
else: | |
#if VERBOSE: print("Merged row: %s NOT found in main data" % (list(row))) | |
#Use the list with only | |
newData = [row[Column.SERIES],row[Column.YEAR],"Unknown","Unknown",False] | |
finalMergedList.append(newData) | |
numNewSeries = len(finalMergedList) - numExistingSeries | |
return finalMergedList | |
def main(): | |
#Initialise CV API tool | |
global CV | |
CV = ComicVineClient(CV_API_KEY) | |
global numExistingSeries | |
global numCBLSeries | |
global numNewSeries | |
#Extract list from existing csv | |
importData = readExistingData() | |
#Process CBL files | |
cblSeriesList = parseCBLfiles() | |
#Merge csv data with cbl data | |
mergedData = mergeDataLists(importData, cblSeriesList) | |
mergedData.sort() | |
print("Found %s series in CSV, %s new series in CBL" % (numExistingSeries,numNewSeries)) | |
#Run all data checks in CV & Mylar | |
for rowIndex in range(len(mergedData)): | |
series = mergedData[rowIndex][Column.SERIES] | |
year = mergedData[rowIndex][Column.YEAR] | |
publisher = mergedData[rowIndex][Column.PUBLISHER] | |
comicID = mergedData[rowIndex][Column.COMICID] | |
inMylar = mergedData[rowIndex][Column.INMYLAR] | |
checkMylar = False | |
comicIDExists = comicID.isnumeric() | |
#Check for new comicIDs | |
if not comicIDExists or FORCE_RECHECK_CV: | |
#Self-imposed search limit to prevent hitting limits | |
if searchCount < CV_SEARCH_LIMIT: | |
#sleeping at least 1 second is what comicvine reccomends. If you are more than 450 requests in 15 minutes (900 seconds) you will be rate limited. So if you are going to be importing for a straight 15 minutes (wow), then you would want to changet this to 2. | |
if searchCount > 0: time.sleep(CV_API_RATE) | |
#Update field in data list | |
cv_data = findVolumeDetails(series,year) | |
mergedData[rowIndex][Column.PUBLISHER] = cv_data[0] | |
mergedData[rowIndex][Column.COMICID] = cv_data[1] | |
#update vars for use elsewhere | |
publisher = str(cv_data[0]) | |
comicID = str(cv_data[1]) | |
#Check if series exists in mylar | |
if inMylar == "True": | |
#Match exists in mylar | |
if FORCE_RECHECK_MYLAR_MATCHES: | |
#Force recheck anyway | |
checkMylar = True | |
else: | |
checkMylar = False | |
else: | |
#No mylar match found | |
checkMylar = True | |
if checkMylar: | |
#Update field in data list | |
inMylar = isSeriesInMylar(comicID) | |
mergedData[rowIndex][Column.INMYLAR] = inMylar | |
#Add new series to Mylar | |
if not inMylar and ADD_NEW_SERIES_TO_MYLAR: | |
mergedData[rowIndex][Column.INMYLAR] = addSeriesToMylar(comicID) | |
#Write modified data to file | |
outputData(mergedData) | |
#Print summary to terminal | |
print("Total Number of Series: %s, New Series Added From CBL: %s, Existing Series (Mylar): %s, Missing Series (Mylar): %s, New Matches (CV): %s, Unfound Series (CV): %s" % (numExistingSeries,numNewSeries,mylarExisting,mylarMissing,CVFound,CVNotFound)) | |
## TODO: Summarise list of publishers in results | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
👏