nnnolan · March 7, 2023 14:00
diff --git a/xml_editor.py b/xml_editor.py
 import os
 import shutil
 import xml.etree.ElementTree as ET
 import subprocess
 import pandas as pd

 def xml_editor(beanname: str, dataframe: pd.DataFrame, id_column_name: str):
    """This function edits the process-conf.xml file to change the filter of the beanname passed in. 
    It then runs the process.bat file to run the beanname passed in."""
    
    
    # creates a copy of process-conf.xml, so that we can edit it safely *** this can prob get phased out  ***
    
    if not os.path.exists("old-processconf.xml"):
        shutil.copy2("process-conf.xml", "old-processconf.xml")
    

    # xml stuff
    tree = ET.parse("process-conf.xml")
    root = tree.getroot()

    # gets data from column labeled "id_column_name"
    id_list = dataframe[id_column_name].tolist()

    # make a string that will determine the line
    QUERY_END = (", ".join("'" + item + "'" for item in id_list)) + ""
    QUERY_LINE = "WHERE PARENTID IN ( " + QUERY_END + ")"


    #future maintainers: this is where you'll have to do some work to make this work for other beans
    #current scope only involves the ExtractCommunications bean
    if beanname == "ExtractCommunicationAttachments":
        pass
    if beanname == "ExtractCommunications":
        end_stuff = to_change.split("AND", 1)[1]
        QUERY_LINE = QUERY_LINE + " AND" + end_stuff
        
    # creates a string that will be used to find the line that we want to replace, beanname is the name of the bean
    magic_line = "./bean[@id='" + beanname + "']//entry[@key='sfdc.extractionSOQL']"

    # finds the line that we want to replace, and formats it
    to_change = root.find(magic_line).attrib["value"]
    changed = to_change.split("WHERE")[0] + QUERY_LINE 

    if len(changed) > 100_000:
        print("the query is too long, please reduce the amount of parent id's")
        pause = input("press enter to exit")
        quit()
    # replaces the line with the new line
    tree.find(magic_line).attrib["value"] = changed
    tree.write("process-conf.xml")

    # add a new line to the start of the file, i guess this is a workaround because 0 isnt inclusive
    var = (
        '<!DOCTYPE beans PUBLIC "-//SPRING//DTD BEAN//EN" "http://www.springframework.org/dtd/spring-beans.dtd">'
        + "\n"
    )

    with open("process-conf.xml", "r+") as fp:
        lines = fp.readlines()  # get lines
        lines.insert(0, var)  # insert the new line
        fp.seek(0)  # file pointer locates at the beginning to write the whole file again
        fp.writelines(lines)  # write the whole file again
        
        
    #also future maintainers: change the paths to your respective .bat files
    if beanname == "ExtractCommunicationAttachments":
        subprocess.call(os.path.join(os.getcwd()), "scripts\\comms_attach_extract.bat")
    elif beanname == "ExtractCommunications":
        subprocess.call(os.path.join(os.getcwd()), "scripts\\comms_extract.bat")
        
        
 #example call
 communication_df = pd.read_csv("Data\communications.csv")
 xml_editor("ExtractCommunicationAttachments", communication_df, "ID")
	import os
	import shutil
	import xml.etree.ElementTree as ET
	import subprocess
	import pandas as pd

	def xml_editor(beanname: str, dataframe: pd.DataFrame, id_column_name: str):
	"""This function edits the process-conf.xml file to change the filter of the beanname passed in.
	It then runs the process.bat file to run the beanname passed in."""


	# creates a copy of process-conf.xml, so that we can edit it safely * this can prob get phased out *

	if not os.path.exists("old-processconf.xml"):
	shutil.copy2("process-conf.xml", "old-processconf.xml")


	# xml stuff
	tree = ET.parse("process-conf.xml")
	root = tree.getroot()

	# gets data from column labeled "id_column_name"
	id_list = dataframe[id_column_name].tolist()

	# make a string that will determine the line
	QUERY_END = (", ".join("'" + item + "'" for item in id_list)) + ""
	QUERY_LINE = "WHERE PARENTID IN ( " + QUERY_END + ")"


	#future maintainers: this is where you'll have to do some work to make this work for other beans
	#current scope only involves the ExtractCommunications bean
	if beanname == "ExtractCommunicationAttachments":
	pass
	if beanname == "ExtractCommunications":
	end_stuff = to_change.split("AND", 1)[1]
	QUERY_LINE = QUERY_LINE + " AND" + end_stuff

	# creates a string that will be used to find the line that we want to replace, beanname is the name of the bean
	magic_line = "./bean[@id='" + beanname + "']//entry[@key='sfdc.extractionSOQL']"

	# finds the line that we want to replace, and formats it
	to_change = root.find(magic_line).attrib["value"]
	changed = to_change.split("WHERE")[0] + QUERY_LINE

	if len(changed) > 100_000:
	print("the query is too long, please reduce the amount of parent id's")
	pause = input("press enter to exit")
	quit()
	# replaces the line with the new line
	tree.find(magic_line).attrib["value"] = changed
	tree.write("process-conf.xml")

	# add a new line to the start of the file, i guess this is a workaround because 0 isnt inclusive
	var = (
	'<!DOCTYPE beans PUBLIC "-//SPRING//DTD BEAN//EN" "http://www.springframework.org/dtd/spring-beans.dtd">'
	+ "\n"
	)

	with open("process-conf.xml", "r+") as fp:
	lines = fp.readlines() # get lines
	lines.insert(0, var) # insert the new line
	fp.seek(0) # file pointer locates at the beginning to write the whole file again
	fp.writelines(lines) # write the whole file again


	#also future maintainers: change the paths to your respective .bat files
	if beanname == "ExtractCommunicationAttachments":
	subprocess.call(os.path.join(os.getcwd()), "scripts\\comms_attach_extract.bat")
	elif beanname == "ExtractCommunications":
	subprocess.call(os.path.join(os.getcwd()), "scripts\\comms_extract.bat")


	#example call
	communication_df = pd.read_csv("Data\communications.csv")
	xml_editor("ExtractCommunicationAttachments", communication_df, "ID")