Created
September 5, 2021 07:37
-
-
Save f0k/35ed2b6467764845217a31c3fe722b15 to your computer and use it in GitHub Desktop.
Merge and deduplicate .ics/.ical files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
""" | |
Merges and deduplicates one or more .ics / ical files. | |
For usage information, call with --help. | |
Author: Jan Schlüter | |
""" | |
import sys | |
import io | |
from argparse import ArgumentParser | |
def opts_parser(): | |
usage =\ | |
"""Merges and deduplicates one or more .ics / ical files. | |
In case of duplicated entries (by UID or DTSTART+SUMMARY), | |
files listed earlier take precedence. | |
""" | |
parser = ArgumentParser(description=usage) | |
parser.add_argument('infile', nargs='+', | |
type=str, | |
help='The .ics or ical files to read.') | |
parser.add_argument('outfile', | |
type=str, | |
help='The .ics or ical file to write.') | |
parser.add_argument('--verbose', '-v', | |
action='store_true', | |
help='If given, report all duplicated entries.') | |
return parser | |
def read_objects(fn): | |
""" | |
Reads calendar objects from the given .ics / ical file and yields | |
them. Global calendar properties (e.g., PRODID) are also treated as | |
objects. | |
""" | |
vobject = [] | |
vobject_type = None | |
with io.open(fn, newline='') as f: | |
line = f.readline() | |
if line != "BEGIN:VCALENDAR\r\n": | |
raise ValueError("%s does not start with BEGIN:VCALENDAR" % fn) | |
for line in f: | |
if line == "END:VCALENDAR\r\n": | |
return | |
if not vobject_type: | |
# we're on global scope, check if we have a new object | |
if line.startswith("BEGIN:"): | |
vobject_type = line[6:].rstrip("\r\n") | |
vobject = [line] | |
else: | |
yield line | |
else: | |
# we're in an object, check if it ended | |
vobject.append(line) | |
if line == "END:%s\r\n" % vobject_type: | |
yield "".join(vobject) | |
vobject = [] | |
vobject_type = None | |
def get_identifiers(vobject): | |
""" | |
Return a set of identifiers for a given calendar object string or | |
global calendar property. For the latter, uses the property name. | |
For objects, uses the object type and UID/TZID if present, and a | |
combination of object type, DTSTART and SUMMARY if present. | |
""" | |
def read_property(name): | |
"""Returns a property value of our vobject, or "" if not present.""" | |
start_pos = vobject.find(name) | |
if start_pos < 0: | |
return "" | |
end_pos = vobject.find("\r\n", start_pos) | |
value = vobject[start_pos:end_pos] | |
# unfold folded lines (linebreak + single whitespace) | |
while vobject[end_pos + 2] in " \t": | |
start_pos = end_pos + 3 | |
end_pos = vobject.find("\r\n", start_pos) | |
value += vobject[start_pos:end_pos] | |
return value | |
if not vobject.startswith("BEGIN:"): | |
# it's a global property, use its name | |
property_name = vobject.split(":", 1)[0] | |
return {property_name} | |
else: | |
vobject_type = vobject[6:].split("\r\n", 1)[0] | |
if vobject_type == "VTIMEZONE": | |
# for timezones, use their identifier | |
return {(vobject_type, read_property("TZID"))} | |
else: | |
# for events, todos, ... use their uid or dtstart+summary | |
identifiers = set() | |
uid = read_property("UID") | |
if uid: | |
identifiers.add((vobject_type, uid)) | |
dtstart = read_property("DTSTART") | |
summary = read_property("SUMMARY") | |
if dtstart or summary: | |
identifiers.add((vobject_type, dtstart, summary)) | |
return identifiers | |
def main(): | |
# parse command line | |
parser = opts_parser() | |
options = parser.parse_args() | |
# remember identifiers of calendar objects already written | |
ids_written = set() | |
# open output file | |
with io.open(options.outfile, 'w', newline='') as f: | |
f.write("BEGIN:VCALENDAR\r\n") | |
# iterate over calendar objects in input files | |
for fn in options.infile: | |
for vobject in read_objects(fn): | |
# get the set of identifiers for the current object | |
identifiers = get_identifiers(vobject) | |
# only write it if none of the identifiers occurred before | |
if not (identifiers & ids_written): | |
f.write(vobject) | |
elif options.verbose: | |
print("already written:", identifiers & ids_written) | |
# remember the identifiers | |
ids_written.update(identifiers) | |
f.write("END:VCALENDAR\r\n") | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment