Last active
November 18, 2025 18:31
-
-
Save TimSC/0b68605ff7872eaf5e198f3ff755e875 to your computer and use it in GitHub Desktop.
Python script to extract a single person and close relations from a GEDCOM file
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """ | |
| Extract a single person from a GEDCOM file using python-gedcom library. | |
| """ | |
| from gedcom.element.individual import IndividualElement | |
| from gedcom.parser import Parser | |
| import sys | |
| import argparse | |
| def extract_person(gedcom_file, person_id): | |
| """ | |
| Extract information about a specific person from a GEDCOM file. | |
| Args: | |
| gedcom_file: Path to the GEDCOM file | |
| person_id: The GEDCOM ID of the person (e.g., 'I1', '@I1@') | |
| Returns: | |
| Dictionary containing person information or None if not found | |
| """ | |
| # Initialize the parser | |
| gedcom_parser = Parser() | |
| gedcom_parser.parse_file(gedcom_file) | |
| # Normalize the person ID (add @ symbols if not present) | |
| if not person_id.startswith('@'): | |
| person_id = f'@{person_id}@' | |
| # Get the individual | |
| person = gedcom_parser.get_element_dictionary().get(person_id) | |
| if not person or not isinstance(person, IndividualElement): | |
| return None | |
| # Extract person information | |
| # get_name() returns a tuple (given_name, surname) | |
| name_tuple = person.get_name() | |
| if name_tuple and len(name_tuple) >= 2: | |
| given_name = name_tuple[0] or 'Unknown' | |
| surname = name_tuple[1] or 'Unknown' | |
| full_name = f"{given_name} {surname}".strip() | |
| else: | |
| given_name = 'Unknown' | |
| surname = 'Unknown' | |
| full_name = 'Unknown' | |
| info = { | |
| 'id': person_id, | |
| 'name': full_name, | |
| 'given_name': given_name, | |
| 'surname': surname, | |
| 'gender': person.get_gender(), | |
| 'birth_date': person.get_birth_data()[0] if person.get_birth_data()[0] else 'Unknown', | |
| 'birth_place': person.get_birth_data()[1] if person.get_birth_data()[1] else 'Unknown', | |
| 'death_date': person.get_death_data()[0] if person.get_death_data()[0] else 'Unknown', | |
| 'death_place': person.get_death_data()[1] if person.get_death_data()[1] else 'Unknown', | |
| 'is_deceased': person.is_deceased() | |
| } | |
| # Get families (parents) | |
| families_child = gedcom_parser.get_families(person, family_type='FAMC') | |
| parents = [] | |
| for family in families_child: | |
| # Get parents from the family | |
| for parent in gedcom_parser.get_parents(person, family): | |
| name_tuple = parent.get_name() | |
| if name_tuple and len(name_tuple) >= 2: | |
| parent_name = f"{name_tuple[0]} {name_tuple[1]}".strip() | |
| else: | |
| parent_name = 'Unknown' | |
| # Determine if father or mother based on gender | |
| gender = parent.get_gender() | |
| if gender == 'M': | |
| parents.append(('Father', parent_name)) | |
| elif gender == 'F': | |
| parents.append(('Mother', parent_name)) | |
| else: | |
| parents.append(('Parent', parent_name)) | |
| info['parents'] = parents | |
| # Get families (spouse and children) | |
| families_spouse = gedcom_parser.get_families(person, family_type='FAMS') | |
| spouses = [] | |
| children = [] | |
| for family in families_spouse: | |
| # Get all family members | |
| family_members = gedcom_parser.get_family_members(family) | |
| for member in family_members: | |
| # Skip the person themselves | |
| if member.get_pointer() == person_id: | |
| continue | |
| name_tuple = member.get_name() | |
| if name_tuple and len(name_tuple) >= 2: | |
| member_name = f"{name_tuple[0]} {name_tuple[1]}".strip() | |
| else: | |
| member_name = 'Unknown' | |
| # Check if this is a child or spouse | |
| member_families_child = gedcom_parser.get_families(member, family_type='FAMC') | |
| is_child = any(f.get_pointer() == family.get_pointer() for f in member_families_child) | |
| if is_child: | |
| children.append(member_name) | |
| else: | |
| spouses.append(member_name) | |
| info['spouses'] = spouses | |
| info['children'] = children | |
| return info | |
| def print_person_info(info): | |
| """Print person information in a readable format.""" | |
| if not info: | |
| print("Person not found!") | |
| return | |
| print(f"\n{'='*60}") | |
| print(f"PERSON INFORMATION") | |
| print(f"{'='*60}") | |
| print(f"ID: {info['id']}") | |
| print(f"Name: {info['name']}") | |
| print(f"Given Name: {info['given_name']}") | |
| print(f"Surname: {info['surname']}") | |
| print(f"Gender: {info['gender']}") | |
| print(f"Birth Date: {info['birth_date']}") | |
| print(f"Birth Place: {info['birth_place']}") | |
| print(f"Death Date: {info['death_date']}") | |
| print(f"Death Place: {info['death_place']}") | |
| print(f"Deceased: {info['is_deceased']}") | |
| if info['parents']: | |
| print(f"\nParents:") | |
| for relation, name in info['parents']: | |
| print(f" {relation}: {name}") | |
| if info['spouses']: | |
| print(f"\nSpouses:") | |
| for spouse in info['spouses']: | |
| print(f" - {spouse}") | |
| if info['children']: | |
| print(f"\nChildren:") | |
| for child in info['children']: | |
| print(f" - {child}") | |
| print(f"{'='*60}\n") | |
| def save_person_to_gedcom(gedcom_file, person_id, output_file): | |
| """ | |
| Save a single person and their immediate family to a new GEDCOM file. | |
| Args: | |
| gedcom_file: Path to the source GEDCOM file | |
| person_id: The GEDCOM ID of the person to extract | |
| output_file: Path to the output GEDCOM file | |
| """ | |
| # Initialize the parser | |
| gedcom_parser = Parser() | |
| gedcom_parser.parse_file(gedcom_file) | |
| # Normalize the person ID | |
| if not person_id.startswith('@'): | |
| person_id = f'@{person_id}@' | |
| # Get the individual | |
| person = gedcom_parser.get_element_dictionary().get(person_id) | |
| if not person or not isinstance(person, IndividualElement): | |
| raise ValueError(f"Person with ID {person_id} not found") | |
| # Collect all elements to include | |
| elements_to_include = set() | |
| elements_to_include.add(person_id) | |
| # Add family elements where person is a child | |
| for family in gedcom_parser.get_families(person, family_type='FAMC'): | |
| elements_to_include.add(family.get_pointer()) | |
| # Add parents | |
| for parent in gedcom_parser.get_parents(person, family): | |
| elements_to_include.add(parent.get_pointer()) | |
| # Add family elements where person is a spouse | |
| for family in gedcom_parser.get_families(person, family_type='FAMS'): | |
| elements_to_include.add(family.get_pointer()) | |
| # Add all family members (spouse and children) | |
| for member in gedcom_parser.get_family_members(family): | |
| elements_to_include.add(member.get_pointer()) | |
| # Write the filtered GEDCOM file manually | |
| with open(output_file, 'w', encoding='utf-8') as f: | |
| # Write header from original file | |
| root_elements = gedcom_parser.get_root_child_elements() | |
| for element in root_elements: | |
| if element.get_tag() == 'HEAD': | |
| write_element(f, element, 0) | |
| break | |
| # Write selected individuals and families | |
| for element in root_elements: | |
| pointer = element.get_pointer() | |
| if pointer and pointer in elements_to_include: | |
| write_element(f, element, 0) | |
| # Write trailer | |
| f.write("0 TRLR\n") | |
| def write_element(file, element, level): | |
| """Recursively write a GEDCOM element and its children to file.""" | |
| # Get the element tag and value | |
| tag = element.get_tag() | |
| value = element.get_value() | |
| pointer = element.get_pointer() | |
| # Build the line | |
| line = f"{level}" | |
| if pointer: | |
| line += f" {pointer}" | |
| line += f" {tag}" | |
| if value: | |
| line += f" {value}" | |
| line += "\n" | |
| file.write(line) | |
| # Write all child elements | |
| for child in element.get_child_elements(): | |
| write_element(file, child, level + 1) | |
| def list_all_people(gedcom_file, sort_by='id'): | |
| """ | |
| List all people in the GEDCOM file with their IDs. | |
| Args: | |
| gedcom_file: Path to the GEDCOM file | |
| sort_by: Sort order ('id', 'firstname', 'lastname', 'birth', 'death') | |
| """ | |
| # Initialize the parser | |
| gedcom_parser = Parser() | |
| gedcom_parser.parse_file(gedcom_file) | |
| # Get all individuals | |
| individuals = [] | |
| for element in gedcom_parser.get_root_child_elements(): | |
| if isinstance(element, IndividualElement): | |
| # get_name() returns a tuple (given_name, surname) | |
| name_tuple = element.get_name() | |
| if name_tuple and len(name_tuple) >= 2: | |
| given_name = name_tuple[0] or 'Unknown' | |
| surname = name_tuple[1] or 'Unknown' | |
| name = f"{given_name} {surname}".strip() | |
| else: | |
| given_name = 'Unknown' | |
| surname = 'Unknown' | |
| name = 'Unknown' | |
| birth_data = element.get_birth_data()[0] | |
| birth_year = birth_data.split()[-1] if birth_data else 'Unknown' | |
| death_data = element.get_death_data()[0] | |
| if element.is_deceased(): | |
| death_year = death_data.split()[-1] if death_data else 'Unknown' | |
| else: | |
| death_year = 'Living' | |
| individuals.append({ | |
| 'id': element.get_pointer(), | |
| 'name': name, | |
| 'given_name': given_name, | |
| 'surname': surname, | |
| 'birth_year': birth_year, | |
| 'death_year': death_year | |
| }) | |
| # Sort based on the selected option | |
| if sort_by == 'firstname': | |
| individuals.sort(key=lambda x: (x['given_name'].lower(), x['surname'].lower())) | |
| elif sort_by == 'lastname': | |
| individuals.sort(key=lambda x: (x['surname'].lower(), x['given_name'].lower())) | |
| elif sort_by == 'birth': | |
| # Put 'Unknown' at the end, sort the rest | |
| individuals.sort(key=lambda x: (x['birth_year'] == 'Unknown', x['birth_year'])) | |
| elif sort_by == 'death': | |
| # Put 'Living' first, 'Unknown' at the end, sort the rest | |
| def death_sort_key(x): | |
| if x['death_year'] == 'Living': | |
| return (0, '') | |
| elif x['death_year'] == 'Unknown': | |
| return (2, '') | |
| else: | |
| return (1, x['death_year']) | |
| individuals.sort(key=death_sort_key) | |
| else: # default to 'id' | |
| individuals.sort(key=lambda x: x['id']) | |
| # Print the list | |
| print(f"\n{'='*80}") | |
| print(f"ALL PEOPLE IN GEDCOM FILE: {gedcom_file}") | |
| print(f"Sorted by: {sort_by}") | |
| print(f"{'='*80}") | |
| print(f"{'ID':<12} {'Name':<40} {'Born':<12} {'Died':<12}") | |
| print(f"{'-'*80}") | |
| for person in individuals: | |
| print(f"{person['id']:<12} {person['name']:<40} {person['birth_year']:<12} {person['death_year']:<12}") | |
| print(f"{'-'*80}") | |
| print(f"Total: {len(individuals)} people\n") | |
| def main(): | |
| parser = argparse.ArgumentParser( | |
| description='Extract a single person from a GEDCOM file' | |
| ) | |
| parser.add_argument('gedcom_file', help='Path to the GEDCOM file') | |
| parser.add_argument('person_id', nargs='?', help='Person ID (e.g., I1 or @I1@)') | |
| parser.add_argument('-o', '--output', help='Output GEDCOM file (optional)') | |
| parser.add_argument('-l', '--list', action='store_true', help='List all people in the file') | |
| parser.add_argument('-s', '--sort', | |
| choices=['id', 'firstname', 'lastname', 'birth', 'death'], | |
| default='id', | |
| help='Sort order for list (default: id)') | |
| args = parser.parse_args() | |
| try: | |
| # List mode | |
| if args.list: | |
| list_all_people(args.gedcom_file, args.sort) | |
| return | |
| # Extract mode requires person_id | |
| if not args.person_id: | |
| parser.error("person_id is required unless using --list") | |
| person_info = extract_person(args.gedcom_file, args.person_id) | |
| print_person_info(person_info) | |
| # Save to output file if specified | |
| if args.output: | |
| save_person_to_gedcom(args.gedcom_file, args.person_id, args.output) | |
| print(f"Person and immediate family saved to: {args.output}") | |
| except FileNotFoundError: | |
| print(f"Error: File '{args.gedcom_file}' not found!", file=sys.stderr) | |
| sys.exit(1) | |
| if __name__ == '__main__': | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment