Skip to content

Instantly share code, notes, and snippets.

@n0nuser
Last active November 28, 2022 13:51
Show Gist options
  • Save n0nuser/e68fd08aef22aba38d851abe08e94e59 to your computer and use it in GitHub Desktop.
Save n0nuser/e68fd08aef22aba38d851abe08e94e59 to your computer and use it in GitHub Desktop.
Execute query in files from a folder into a MS SQL Server continuosly
#!/usr/bin/python3
###############################################################################################
# Author: n0nuser - Pablo Jesús González Rubio - https://nonuser.es - [email protected]
###############################################################################################
#
# Traverses files in a folder, executing the query in each file to a MS SQL Server
#
# If it succeeds, the file could be deleted
#
# If there's any error with a file (execution of query or not able to read file),
# the file could be deleted within a given max retries
#
# The script is tested against:
# - DB Connection error
# - Read file error
# - DB execute query error
#
# In a Windows environment the file can be run at startup by:
# - Changing the filename to .pyw
# - Changing the program that runs the script to C:\Users\myUser\AppData\Local\Programs\Python\Python3XX\pythonw.exe
# - Putting a link to the file in the Startup folder: %appdata%\Microsoft\Windows\Start Menu\Programs\Startup
#
# In a Linux environment can be run at startup with cron or systemd
###############################################################################################
import logging
import os
import pyodbc
import time
# server = 'localhost\sqlexpress' # for a named instance
# server = 'myserver,port' # to specify an alternate port
server = "localhost,1433"
database = "MyDB"
username = "myUser"
password = "myPass"
table = "myTable"
folder = "C:\\Location\\Of\\Folder"
# folder = "/home/location/of/folder"
MAX_NUMBER_OF_ERRORS = 5
DB_CONNECTION_TIMEOUT = 300 # 5 min.
DELETE_FILES = True
LOG_FILENAME = "myLog.log"
# Interesting to catch errors: https://stackoverflow.com/questions/11392709/how-to-catch-specific-pyodbc-error-message
def manage_exception(file_errors, filename, file, e):
logging.error(f"File {filename} error: {e}")
file_errors[filename] += 1
if file_errors[filename] > MAX_NUMBER_OF_ERRORS:
logging.error(f"File {filename} deleted due to many errors")
if DELETE_FILES:
os.remove(file)
del file_errors[filename]
def main():
file_errors = {}
while True:
try:
# CHANGE DRIVER TO YOURS
conn = pyodbc.connect(
"DRIVER={ODBC Driver 17 for SQL Server};SERVER="
+ server
+ ";DATABASE="
+ database
+ ";UID="
+ username
+ ";PWD="
+ password
)
cursor = conn.cursor()
# Generator that yields a 3-tuple (dirpath, dirnames, filenames).
# This allows to have millions of files, and don't clutter RAM
for _, _, files in os.walk(folder):
for filename in files:
file = os.path.join(folder, filename)
sql_query = ""
# Read file
try:
with open(file, "r") as f:
sql_query = f.read()
except Exception as e:
manage_exception(file_errors, filename, file, e)
continue
# Query
try:
cursor.execute(sql_query)
conn.commit()
if DELETE_FILES:
os.remove(file)
logging.info(f"File {filename} processed")
except Exception as e:
manage_exception(file_errors, filename, file, e)
continue
except Exception as e:
logging.error(f"DB Connect error: {e}")
# Sleep for DB_CONNECTION_TIMEOUT set minutes
time.sleep(DB_CONNECTION_TIMEOUT)
if __name__ == "__main__":
logging.basicConfig(
filename=LOG_FILENAME,
level=logging.INFO,
format="%(asctime)s:%(levelname)s:%(message)s",
)
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment