Skip to content

Instantly share code, notes, and snippets.

@amalgjose
Created February 25, 2025 14:28
Show Gist options
  • Save amalgjose/db393fa8ae72c121514700bc6db7bb8f to your computer and use it in GitHub Desktop.
Save amalgjose/db393fa8ae72c121514700bc6db7bb8f to your computer and use it in GitHub Desktop.
from lxml import etree
from sqlalchemy import create_engine, MetaData, Table, Column, Integer, String, DateTime
from sqlalchemy.orm import sessionmaker
from sqlalchemy.ext.declarative import declarative_base
from datetime import datetime
# Database connection details
DATABASE_URI = 'postgresql+psycopg2://username:password@localhost:5432/dbname'
# Initialize SQLAlchemy engine and session
engine = create_engine(DATABASE_URI)
Session = sessionmaker(bind=engine)
session = Session()
# Base for declarative class
Base = declarative_base()
# Configuration: Map XPath expressions to database columns
MAPPING_CONFIG = {
"table_name": "example_table", # Name of the database table
"mappings": {
"id": "//record/id/text()", # XPath for the 'id' field
"name": "//record/name/text()", # XPath for the 'name' field
"date": "//record/date/text()", # XPath for the 'date' field
# Add more mappings as needed
}
}
# Function to create a table class dynamically
def create_table_class(table_name, mappings):
class DynamicTable(Base):
__tablename__ = table_name
id = Column(Integer, primary_key=True, autoincrement=True)
for column_name in mappings.keys():
if column_name != "id": # Skip 'id' since it's already defined
setattr(DynamicTable, column_name, Column(String))
return DynamicTable
# Function to parse XML using XPath and extract data
def parse_xml(xml_file, mappings):
tree = etree.parse(xml_file)
root = tree.getroot()
data = []
for record in root.xpath("//record"): # Adjust XPath to match your XML structure
record_data = {}
for column_name, xpath_expr in mappings.items():
value = record.xpath(xpath_expr)
record_data[column_name] = value[0] if value else None
data.append(record_data)
return data
# Function to load data into the database
def load_data_to_db(table_name, mappings, data):
DynamicTable = create_table_class(table_name, mappings)
# Create the table if it doesn't exist
Base.metadata.create_all(engine)
# Insert data into the table
for record in data:
row = DynamicTable(**record)
session.add(row)
session.commit()
# Main function
def main(xml_file, config):
table_name = config["table_name"]
mappings = config["mappings"]
# Parse XML and extract data
data = parse_xml(xml_file, mappings)
# Load data into the database
load_data_to_db(table_name, mappings, data)
print(f"Data from {xml_file} has been loaded into the {table_name} table.")
# Example usage
if __name__ == "__main__":
xml_file = 'example.xml' # Path to your XML file
main(xml_file, MAPPING_CONFIG)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment