Skip to content

Instantly share code, notes, and snippets.

@bubnenkoff
Created March 12, 2019 12:14
Show Gist options
  • Save bubnenkoff/06f5b155737111d9dea4da05105aa564 to your computer and use it in GitHub Desktop.
Save bubnenkoff/06f5b155737111d9dea4da05105aa564 to your computer and use it in GitHub Desktop.
parser
import os
# модуль для работы с ftp
import ftplib
from pathlib import Path
# модуль для работы с zip архивами
from zipfile import *
# Библиотека для работы с Oracle
import cx_Oracle
import datetime as dt
import time
# !!!Важно. Установленная по умолчанию в Canopy версия библиотеки lxml не умеет читать кодировку windows-1251
# Поэтому необходимо скачать с сайта https://pypi.python.org/pypi/lxml последнюю версию (3.7.3) библиотеки
# Подключаем библиотеку для парсинга XML
import lxml.etree as et
# библиотека для работы с INI подобными файлами
import configparser
import sys
import elementpath
from multiprocessing import Pool
os.environ['NLS_LANG'] = 'AMERICAN_AMERICA.AL32UTF8'
conn = cx_Oracle.connect('zakupki/dD9qHxQD3t5w@FST_RAC')
cursor = conn.cursor()
# Возвращает имя родителя как ключ списка словарей --> lots: [{},{}]
# children_names_list - список тегов которые нужно вытащить из ребенка
# унифицировать не получится. Сюда передаем ТОЛЬКО родителя множественных с указанием списка детей которые нужно извлечь
def xpath_two_level(element, children_names_list):
# размерность родителя не завимимо от количества детей единица! Типа он существует. Если у него 5 детей все равно единица будет
children_names_list = [name.replace("/","_").lower() for name in children_names_list] # передаем сюда со слешами и заменяем все на подчеркивания
# print("\nchildren_names_list: ", children_names_list)
result = dict()
parent_node_name = et.QName(element[0].tag).localname # purchaseObjects
#print("parent_node_name: ", parent_node_name)
item_list = []
for el in element[0]: # если родитель purchaseObjects то тут будут: purchaseObject purchaseObject purchaseObject
child_node_name = et.QName(el.tag).localname # purchaseObject
if len(el)>0: # потомок может не иметь вложенности и быть просто purchaseObject/sum
item_dict = dict()
for e in el:
tag_name = et.QName(e.tag).localname # OKVED а внутри уже искомый OKVED/CODE ИЛИ одиночный тег
tag_name = tag_name.replace("KTRU", "OKPD2") # в некоторых документах встречаются KTRU вместо OKPD2 заменяем т.к. нужен именно он
if len(e) > 0: # К примеру OKPD2 у которого внутри есть OKPD2/aaa OKPD2/bbb OKPD2/ccc
for a in e: # смотрим что в искомом теге
sub_tag_name = et.QName(a.tag).localname
sub_tag_value = a.text
concat_tag_name = tag_name + "_" + sub_tag_name # вид: fcs_id
if concat_tag_name.lower() in children_names_list: # иы же сюда педелаем уже как foo/bar
item_dict[concat_tag_name] = sub_tag_value
else: #
sub_tag_name = et.QName(e.tag).localname
sub_tag_value = e.text
concat_tag_name = child_node_name + "_" + sub_tag_name # вид: purchaseObject_price
if sub_tag_name.lower() in children_names_list:
item_dict[concat_tag_name] = sub_tag_value
item_list.append(item_dict)
else: # бывают случаи когда у потомков нет вложенности, но его тоже надо забрать
pass
result[parent_node_name] = item_list # нам надо вернуть с именем корня: "purchaseObjects": [{},{}]
return result
# print(item_list)
def xpath_three_level(element, children_names_list):
# размерность родителя не завимимо от количества детей единица! Типа он существует. Если у него 5 детей все равно единица будет
children_names_list = [name.replace("/","_").lower() for name in children_names_list] # передаем сюда со слешами и заменяем все на подчеркивания
print("\nchildren_names_list: ", children_names_list)
result = dict()
parent_node_name = et.QName(element[0].tag).localname # purchaseObjects
print("parent_node_name: ", parent_node_name)
item_list = []
purchaseObjects = []
lots = []
obj_list = []
for el in element[0]: # если родитель purchaseObjects то тут будут: purchaseObject purchaseObject purchaseObject
print("\n\n\n--------------------11111111----------------------\n\n\n")
single_lot = []
child_node_name = et.QName(el.tag).localname # purchaseObject
print("child_node_name: ", child_node_name)
if len(el)>0: # потомок может не иметь вложенности и быть просто purchaseObject/sum
print("elelelelelelelelel: ", el)
item_dict = dict()
for e in el:
if len(e) == 0: # если никаких вложенных элементов нет
tag_name = et.QName(e.tag).localname # OKVED а внутри уже искомый OKVED/CODE ИЛИ одиночный тег
tag_name = tag_name.replace("KTRU", "OKPD2") # в некоторых документах встречаются KTRU вместо OKPD2 заменяем т.к. нужен именно он
if len(e) > 0: # К примеру OKPD2 у которого внутри есть OKPD2/aaa OKPD2/bbb OKPD2/ccc
for a in e: # смотрим что в искомом теге
sub_tag_name = et.QName(a.tag).localname
sub_tag_value = a.text
concat_tag_name = tag_name + "_" + sub_tag_name # вид: fcs_id
if concat_tag_name.lower() in children_names_list: # иы же сюда педелаем уже как foo/bar
item_dict[concat_tag_name] = sub_tag_value
else: #
sub_tag_name = et.QName(e.tag).localname
sub_tag_value = e.text
concat_tag_name = child_node_name + "_" + sub_tag_name # вид: purchaseObject_price
if sub_tag_name.lower() in children_names_list:
item_dict[concat_tag_name] = sub_tag_value
else: # если вложенные элементы есть
tag_name = et.QName(e.tag).localname # OKVED а внутри уже искомый OKVED/CODE ИЛИ одиночный тег
tag_name = tag_name.replace("KTRU", "OKPD2") # в некоторых документах встречаются KTRU вместо OKPD2 заменяем т.к. нужен именно он
sub_tag_name = child_node_name + "_" + tag_name # lot_purchaseObjects
if sub_tag_name.lower() in children_names_list: # искомый тег в составе тех которые мы передали в функцию
# print(" ---- ", e, " - ", len(e))
po_dict = dict()
obj_dict = dict()
if (len(e) > 0): # purchaseObjects
for e1 in e:
tag_name_sub = et.QName(e1.tag).localname
if tag_name_sub == "purchaseObject":
for e2 in e1:
sub_tag_name2 = et.QName(e2.tag).localname # OKPD2
print("e2: ", e2)
for e3 in e2:
sub_tag_name3 = et.QName(e3.tag).localname # code
sub4 = sub_tag_name2 + "_" + sub_tag_name3
obj_dict[sub4] = e3.text
# print("e3: ", e3)
# print("e3: ", e3.text)
obj_list.append(obj_dict)
po_dict["purchaseObjects"] = obj_list
# print("\n\nobj_dict: ", obj_dict)
# print("\n\nobj_dict: ", obj_dict)
# single_lot.append(obj_list)
# objs_dict["purchaseObjects"] = objs_dict
else:
print("_________________eee: ", e)
print("obj_list: ", obj_list)
# single_lot.append(po_dict)
# single_lot.append(item_dict)
# item_list.append(single_lot)
# item_list.append(objs_dict)
# single_lot.append(item_dict)
else: # бывают случаи когда у потомков нет вложенности, но его тоже надо забрать
print("eeeeeeee ---. ", e)
purchaseObjects.append(po_dict)
# print("purchaseObjects -----------> ", purchaseObjects)
# result[parent_node_name] = purchaseObjects # нам надо вернуть с именем корня: "purchaseObjects": [{},{}]
# result["objects"] = purchaseObjects # нам надо вернуть с именем корня: "purchaseObjects": [{},{}]
# print("______result: ", purchaseObjects)
# sys.exit()
return po_dict
# Принимаем список секций которые должны выойти в одну таблицу БД.
# Важно! Сеции передаются как список т.е. секции могут часть верхнего уровня, а часть вообще в самом конце
# Список children_names_list ПОКА общий т.е. БЕЗ учета уровня секции если /code и там и там есть, то будет вытащено оба
# т.е. если передать к примеру:
# //ns2:fcsNotificationEF/ns:purchaseResponsible/ns:responsibleOrg
# //ns2:fcsNotificationEF/ns:lot
# то оно вернется в виде единого словаря см. вызывающий код.
# Возвращаем их в формате { "root": { parent_child: "value", parent_another_child: "value2"}}
# Внимание имя если parent_node больше 16, то урезаем его до первых 8 символов!
def xpath_one_level(elements, children_names_list):
# print("xpath_one_level call")
children_names_list = [name.lower() for name in children_names_list]
result = dict()
root = dict()
for element in elements:
# print("element: ", element)
parent_node = element[0]
# print("parent_node: ", parent_node)
parent_node_name = et.QName(parent_node.tag).localname
if len(parent_node_name) >= 16:
parent_node_name = et.QName(parent_node.tag).localname[:8] # сокращаем имя если оно слишком длинное! Требование Оракл!
for el in parent_node:
tag_name = et.QName(el.tag).localname
if tag_name.lower() in children_names_list:
if "date" in tag_name.lower():
root[parent_node_name + "_" + tag_name] = el.text.split("T")[0]
else:
root[parent_node_name + "_" + tag_name] = el.text
result["root"] = root # чтобы у элементов без вложенности были вложены в тег root
print("result: ", result)
return result
def parse_xml(afile, section_name, region_name):
# afile = r"D:\code\2019\python_downloader\files\contract_Moskva_2018010100_2018020100_001\contract_0373100084614000196_38900889.xml"
etxml = None
try:
etxml = et.parse(afile)
except OSError as e:
file_name = Path(afile).name
raise OSError("Cant read: " + file_name)
root = etxml.getroot() # получаем корневой элемент дерева
result_list = []
lnamespaces = {'ns2': 'http://zakupki.gov.ru/oos/export/1',
'ns': 'http://zakupki.gov.ru/oos/types/1',
'ns7':'http://zakupki.gov.ru/oos/EPtypes/1',
'ns6':'http://zakupki.gov.ru/oos/pprf615types/1',
'ns7':'http://zakupki.gov.ru/oos/EPtypes/1'
}
root_element_name = root.xpath("local-name(//ns2:export/*[1])", namespaces=lnamespaces)
if "noti" in section_name.lower() and not "Cancel" in afile: # fcsNotificationCancel в данной секции не обрабатываем! у них другас ятруктура. Узнать этот подтип можно только по имени файла
# print("\n\nnotice")
# судя по документации все остальные секции мы пропустим
# if (not "epNotificationEOK" in afile) or ("epNotificationEOKOU" not in afile) or ("epNotificationEOKD" not in afile) or ("epNotificationEZK" not in afile) or ("epNotificationEZP" not in afile):
# print("ORA: Судя по документации эту секцию пропустим")
# #sys.exit(1) # код именно единица, аначе ответ не будет спаршен!!!
# x = root.xpath("//ns2:fcsNotification*")
# x = root.xpath("//*[starts-with(local-name(), 'ns2:fcsNotification')]")
# x = root.xpath("//*[contains(local-name(), 'fcsNotification')]")
# print("==> ", x)
# sys.exit()
# Секции различаются радикально!!! Тут Иного пути кроме как пытаться догадаться по части имени как обрабатывать секцию нет!!!
if "fcsNotification" in root_element_name: # При этом у нас может быть fcsNotificationFoo или любые другие буквы
# В Нотайсах множественность у LOTS или OBJECTS. На основани наличия множественного LOTS ветвимся. И ТОГО и Другого я не находил
isLotsSectionExists = root.xpath("boolean(//*[contains(local-name(), 'lots')])", namespaces=lnamespaces)
if not isLotsSectionExists: # множественные purchaseObjects а не Лоты!
#print("fcsNotification Multiple purchaseObjects")
top_level_xpath = [
root.xpath("//*[contains(local-name(), 'fcsNotification')]", namespaces=lnamespaces),
root.xpath("//*[contains(local-name(), 'fcsNotification')]/ns:purchaseResponsible/ns:responsibleOrg", namespaces=lnamespaces),
root.xpath("//*[contains(local-name(), 'fcsNotification')]/ns:lot", namespaces=lnamespaces), # у нас тут один ЛОТ, а не ЛОТЫ!
root.xpath("//*[contains(local-name(), 'fcsNotification')]/ns:lot/ns:currency", namespaces=lnamespaces),
]
top_level_include = ["id", "purchaseNumber", "docPublishDate", "INN", "KPP", "fullName", "factAddress", "maxPrice", "code"]
second_level_xpath = root.xpath("//*[contains(local-name(), 'fcsNotification')]/ns:lot/ns:purchaseObjects", namespaces=lnamespaces)
second_level_include = ["OKPD2/code", "OKPD2/name", "quantity/value", "price", "KTRU/code", "KTRU/name"] # там где нет OKPD2 есть KTRU, но выше будет хак замены имени на OKPD2 т.к. при вставке нам OKDP нужен
result = [
xpath_one_level(top_level_xpath, top_level_include),
xpath_two_level(second_level_xpath, second_level_include),
]
prepare_to_insert(result, section_name, region_name, afile)
else: # множественные лоты
print("Multiple lots")
top_level_xpath = [
root.xpath("//*[contains(local-name(), 'fcsNotification')]", namespaces=lnamespaces),
root.xpath("//*[contains(local-name(), 'fcsNotification')]/ns:purchaseResponsible/ns:responsibleOrg", namespaces=lnamespaces),
]
top_level_include = ["id", "purchaseNumber", "docPublishDate", "INN", "KPP", "fullName", "factAddress", "maxPrice", "code"]
second_level_xpath = root.xpath("//*[contains(local-name(), 'fcsNotification')]/ns:lots", namespaces=lnamespaces)
second_level_include = ["OKPD2/code", "currency/code", "quantity/value", "maxPrice", "KTRU/code", "lotnumber"] # там где нет OKPD2 есть KTRU, но выше будет хак замены имени на OKPD2 т.к. при вставке нам OKDP нужен
third_level_xpath = root.xpath("//*[contains(local-name(), 'fcsNotification')]/ns:lots", namespaces=lnamespaces)
third_level_include = ["lot/purchaseObjects", "OKPD2/code", "currency/code", "quantity/value", "maxPrice", "KTRU/code"]
result = [
xpath_one_level(top_level_xpath, top_level_include),
xpath_two_level(second_level_xpath, second_level_include),
xpath_three_level(third_level_xpath, third_level_include),
]
print("result: ", result)
prepare_to_insert(result, section_name, region_name, afile)
elif "epNotification" in root_element_name:
#print("epNotification")
top_level_xpath = [
root.xpath("//*[contains(local-name(), 'epNotification')]", namespaces=lnamespaces),
root.xpath("//*[contains(local-name(), 'epNotification')]/ns7:purchaseResponsibleInfo/ns7:responsibleOrgInfo", namespaces=lnamespaces),
root.xpath("//*[contains(local-name(), 'epNotification')]/ns7:commonInfo", namespaces=lnamespaces),
]
top_level_include = ["id", "purchaseNumber", "docPublishDate", "INN", "KPP", "fullName", "factAddress", "maxPrice", "code"]
# тут проблема у нас множественность может начинаться с секции с именем notDrugpurchaseObjectsInfo а может с любым другим
# поэтому в начале узнаем первого потомка purchaseObjectsInfo, а потом подсунем его в xpath
purchase_root_name = root.xpath("local-name(//ns7:purchaseObjectsInfo/*[1])", namespaces=lnamespaces) # notDrugPurchaseObjectsInfo или подобное в котором есть множественность
second_level_xpath = root.xpath("//ns7:purchaseObjectsInfo/ns7:" + purchase_root_name, namespaces=lnamespaces)
#print("second_level_xpath: ", second_level_xpath)
second_level_include = ["OKPD2/code", "quantity/value", "price"] # там где нет OKPD2 есть KTRU, но выше будет хак замены имени на OKPD2 т.к. при вставке нам OKDP нужен
result = [
xpath_one_level(top_level_xpath, top_level_include),
xpath_two_level(second_level_xpath, second_level_include),
]
prepare_to_insert(result, section_name, region_name, afile)
elif "fcsContractSign" in root_element_name: # это информация о заключении контракта, пока не ясно бьется ли она с основными контрактами. Пропускаем
pass
#print("ORA: Подтип fcsContractSign пока не обрабатываем")
#sys.exit(1)
elif "Clarification" in root_element_name:
pass
#print("ORA: Подтип Clarification (разъяснение документации) не обрабатываем")
#sys.exit(1)
elif "fcsPlacementResult" in root_element_name:
top_level_xpath = [
root.xpath("//ns2:fcsPlacementResult", namespaces=lnamespaces),
# root.xpath("//ns2:contract/ns:foundation/ns:fcsOrder/ns:order", namespaces=lnamespaces),
]
top_level_include = ["purchaseNumber", "lotNumber", "foundationProtocolNumber", "procedurelFailed" ] # procedurelFailed - не опечатка именно так криво он называется
second_level_xpath = root.xpath("//ns2:fcsPlacementResult/ns:applications", namespaces=lnamespaces)
second_level_include = ["journalNumber", "price", "appRating"]
result = [
xpath_one_level(top_level_xpath, top_level_include),
xpath_two_level(second_level_xpath, second_level_include),
]
prepare_to_insert(result, section_name, region_name, afile)
#print("ORA: Подтип fcsPlacementResult (Результат проведения процедуры определения поставщика) не обрабатываем")
#sys.exit(1)
else:
pass
#print("ORA: Unknown notice type: " + str(root_element_name))
#sys.exit(1)
elif "noti" in section_name.lower() and "Cancel" in afile: # Вот тут как раз и обработаем
top_level_xpath = [ root.xpath("//ns2:fcsNotificationCancel", namespaces=lnamespaces) ]
top_level_include = ["id", "publishDate", "purchaseNumber", "publishDTInETP", "docPublishDate"]
result = [ xpath_one_level(top_level_xpath, top_level_include) ]
prepare_to_insert(result, section_name, region_name, afile)
elif "contractprocedure" in afile.lower(): #
pass
#print("ORA: contractprocedure (платежки) пока не грузим")
#sys.exit(1)
"""
top_level_xpath = [root.xpath("//ns2:contractProcedure", namespaces=lnamespaces)]
top_level_include = ["id", "publishDate", "publishDate"] # по идее можно еще , "currency/code", "paid" но там вложенность
result = [ xpath_one_level(top_level_xpath, top_level_include) ]
prepare_to_insert(result, section_name, region_name, afile)
"""
elif "contracts" in section_name.lower() and not "contractprocedure" in afile.lower():
#print("\n\ncontracts")
top_level_xpath = [
root.xpath("//ns2:contract", namespaces=lnamespaces),
root.xpath("//ns2:contract/ns:foundation/ns:fcsOrder/ns:order", namespaces=lnamespaces),
root.xpath("//ns2:contract/ns:customer", namespaces=lnamespaces),
root.xpath("//ns2:contract/ns:suppliers/ns:supplier/ns:legalEntityRF", namespaces=lnamespaces),
root.xpath("//ns2:contract/ns:priceInfo", namespaces=lnamespaces),
root.xpath("//ns2:contract/ns:priceInfo/ns:currency", namespaces=lnamespaces),
root.xpath("//ns2:contract/ns:products/ns:product/ns:OKPD2", namespaces=lnamespaces),
root.xpath("//ns2:contract/ns:priceInfo", namespaces=lnamespaces),
root.xpath("//ns2:contract/ns:priceInfo/ns:currency", namespaces=lnamespaces),
root.xpath("//ns2:contract/ns:products/ns:product", namespaces=lnamespaces),
]
top_level_include = ["id", "publishDate", "purchaseNumber", "notificationNumber", "lotNumber", "inn", "kpp", "fullName", "address", "price", "code", "price", "price", "quantity", "sum" ]
# top_level_include = ["id"]
second_level_xpath = root.xpath("//ns2:fcsNotificationEF/ns:lot/ns:purchaseObjects", namespaces=lnamespaces)
second_level_include = ["OKPD2/code", "OKPD2/name", "quantity/value", "price"]
result = [
xpath_one_level(top_level_xpath, top_level_include),
]
#print("result: ", result)
prepare_to_insert(result, section_name, region_name, afile)
elif "protocol" in section_name.lower():
#print("\nprotocol")
isappParticipantsSectionExists = root.xpath("boolean(//*[contains(local-name(), 'appParticipants')])", namespaces=lnamespaces)
print("isappParticipantsSectionExists: ", isappParticipantsSectionExists)
# Это немного не те типы. Протоколы рассмотрения заявок и протокол проведения электронного аукциона
# top_level_xpath = []
if not isappParticipantsSectionExists: # одиночный appParticipant
if "EF1_" in afile:
top_level_xpath = [ root.xpath("//ns2:fcsProtocolEF1", namespaces=lnamespaces)
]
return # пропускам эту секцию
elif "EF2_" in afile:
top_level_xpath = [ root.xpath("//ns2:fcsProtocolEF2", namespaces=lnamespaces)
]
return # пропускам эту секцию
elif "EF3_" in afile:
top_level_xpath = [ root.xpath("//ns2:fcsProtocolEF3", namespaces=lnamespaces),
root.xpath("//ns:appParticipant", namespaces=lnamespaces),
]
elif "OK1_" in afile:
top_level_xpath = [ root.xpath("//ns2:fcsProtocolOK1", namespaces=lnamespaces) ]
elif "OK2_" in afile:
top_level_xpath = [ root.xpath("//ns2:fcsProtocolOK2", namespaces=lnamespaces) ]
elif "Evasion_" in afile: # протокол отказа от заключения договора, пропускам его
top_level_xpath = [ root.xpath("//ns2:fcsProtocolEvasion", namespaces=lnamespaces),
]
return # нет смысла продолжать
elif "OKOU2_" in afile:
top_level_xpath = [ root.xpath("//ns2:fcsProtocolOKOU2", namespaces=lnamespaces),
]
elif "EFSinglePart_" in afile:
top_level_xpath = [ root.xpath("//ns2:fcsProtocolEFSinglePart", namespaces=lnamespaces),
]
elif "SingleApp" in afile:
top_level_xpath = [ root.xpath("//ns2:fcsProtocolEFSingleApp", namespaces=lnamespaces),
root.xpath("//ns:appParticipant", namespaces=lnamespaces),
]
elif "PPI_" in afile:
top_level_xpath = [ root.xpath("//ns2:epProtocolEOK3", namespaces=lnamespaces),
root.xpath("//ns2:epProtocolEOK3/ns7:commonInfo", namespaces=lnamespaces),
]
elif "PR615_" in afile:
top_level_xpath = [ root.xpath("//ns2:pprf615ProtocolEF1", namespaces=lnamespaces),
root.xpath("//ns2:pprf615ProtocolEF1/ns6:commonInfo", namespaces=lnamespaces),
]
elif "ProtocolZ" in afile:
if root_element_name == "epProtocolEZP2":
top_level_xpath = [ root.xpath("//ns2:epProtocolEZP2", namespaces=lnamespaces),
root.xpath("//ns2:epProtocolEZP2/ns7:commonInfo", namespaces=lnamespaces),
]
elif root_element_name == "fcsProtocolZP":
top_level_xpath = [ root.xpath("//ns2:fcsProtocolZP", namespaces=lnamespaces),
]
elif root_element_name == "fcsProtocolZK":
top_level_xpath = [ root.xpath("//ns2:fcsProtocolZK", namespaces=lnamespaces),
]
elif root_element_name == "fcsProtocolZPFinal":
top_level_xpath = [ root.xpath("//ns2:fcsProtocolZPFinal", namespaces=lnamespaces),
]
elif root_element_name == "fcsProtocolZPExtract":
print("Skipping this section")
return
else:
raise Exception("Unkown root element name: " + root_element_name)
elif "PRO_" in afile:
top_level_xpath = [ root.xpath("//ns2:epProtocolEZK2", namespaces=lnamespaces),
root.xpath("//ns2:epProtocolEZK2/ns7:commonInfo", namespaces=lnamespaces),
]
elif "VPP_" in afile:
top_level_xpath = [ root.xpath("//ns2:epProtocolEZP1Extract", namespaces=lnamespaces),
root.xpath("//ns2:epProtocolEZP1Extract/ns7:commonInfo", namespaces=lnamespaces),
]
elif "Cancel" in afile:
raise Exception("fcsProtocolCancel not processing currently") # пока не обрабатываем их. У части есть id, у части нет. Это только к проблемам приводит
top_level_xpath = [ root.xpath("//ns2:fcsProtocolCancel", namespaces=lnamespaces) ]
else:
raise Exception("Unknown subsection for protocols!!")
top_level_include = ["id", "publishDate", "purchaseNumber", "publishDTInETP", "docPublishDate", "inn", "kpp", "organizationName", "postAddress"] # если нет publishDate то есть publishDTInETP
# docPublishDate только у PR615 и Cancel
result = [
xpath_one_level(top_level_xpath, top_level_include),
]
print("!!!result: ", result)
prepare_to_insert(result, section_name, region_name, afile)
else: # множественные appParticipants
print("we are here")
if "EF3_" in afile:
top_level_xpath = [ root.xpath("//ns2:fcsProtocolEF3", namespaces=lnamespaces) ]
elif "OK1_" in afile:
top_level_xpath = [ root.xpath("//ns2:fcsProtocolOK1", namespaces=lnamespaces) ]
elif "OK2_" in afile:
top_level_xpath = [ root.xpath("//ns2:fcsProtocolOK2", namespaces=lnamespaces) ]
elif "Evasion_" in afile: # протокол отказа от заключения договора
top_level_xpath = [ root.xpath("//ns2:fcsProtocolEvasion", namespaces=lnamespaces),
]
return # нет смысла продолжать
elif "OKOU2_" in afile:
top_level_xpath = [ root.xpath("//ns2:fcsProtocolOKOU2", namespaces=lnamespaces),
]
elif "EFSinglePart_" in afile:
top_level_xpath = [ root.xpath("//ns2:fcsProtocolEFSinglePart", namespaces=lnamespaces),
]
elif "PPI_" in afile:
top_level_xpath = [ root.xpath("//ns2:epProtocolEOK3", namespaces=lnamespaces),
root.xpath("//ns2:epProtocolEOK3/ns7:commonInfo", namespaces=lnamespaces),
]
elif "PR615_" in afile:
top_level_xpath = [ root.xpath("//ns2:pprf615ProtocolEF1", namespaces=lnamespaces),
root.xpath("//ns2:pprf615ProtocolEF1/ns6:commonInfo", namespaces=lnamespaces),
]
elif "ProtocolZ" in afile:
if root_element_name == "epProtocolEZP2":
top_level_xpath = [ root.xpath("//ns2:epProtocolEZP2", namespaces=lnamespaces),
root.xpath("//ns2:epProtocolEZP2/ns7:commonInfo", namespaces=lnamespaces),
]
elif root_element_name == "fcsProtocolZP":
top_level_xpath = [ root.xpath("//ns2:fcsProtocolZP", namespaces=lnamespaces),
]
elif root_element_name == "fcsProtocolZK":
top_level_xpath = [ root.xpath("//ns2:fcsProtocolZK", namespaces=lnamespaces),
]
elif root_element_name == "fcsProtocolZPFinal":
top_level_xpath = [ root.xpath("//ns2:fcsProtocolZPFinal", namespaces=lnamespaces),
]
elif root_element_name == "fcsProtocolZPExtract":
print("Skipping this section")
return
else:
raise Exception("Unkown root element name: " + root_element_name)
elif "PRO_" in afile:
top_level_xpath = [ root.xpath("//ns2:epProtocolEZK2", namespaces=lnamespaces),
root.xpath("//ns2:epProtocolEZK2/ns7:commonInfo", namespaces=lnamespaces),
]
elif "VPP_" in afile:
top_level_xpath = [ root.xpath("//ns2:epProtocolEZP1Extract", namespaces=lnamespaces),
root.xpath("//ns2:epProtocolEZP1Extract/ns7:commonInfo", namespaces=lnamespaces),
]
elif "Cancel" in afile:
raise Exception("fcsProtocolCancel not processing currently") # пока не обрабатываем их. У части есть id, у части нет. Это только к проблемам приводит
top_level_xpath = [ root.xpath("//ns2:fcsProtocolCancel", namespaces=lnamespaces) ]
else:
raise Exception("Unknown subsection for protocols!!!!")
top_level_include = ["id", "publishDate", "purchaseNumber", "publishDTInETP", "docPublishDate"] # если нет publishDate то есть publishDTInETP
# docPublishDate только у PR615 и Cancel
second_level_xpath = root.xpath("//ns:appParticipants", namespaces=lnamespaces) # проще искать от этого уровня
second_level_include = ["inn", "kpp", "organizationName", "postAddress"]
result = [
xpath_one_level(top_level_xpath, top_level_include),
xpath_two_level(second_level_xpath, second_level_include),
]
print("+++result: ", result)
prepare_to_insert(result, section_name, region_name, afile)
else:
raise Exception("Unkown file type. Expected contract/protocol/...")
# else:
# print("Ошибка! Неизвестная секция")
# return result, section_name, region_name, afile
def prepare_to_insert(result, section_name, region_name, afile):
if "noti" in section_name.lower():
sql_504 = "" # объявим заранее
#print("----------------- >", afile)
if "Cancel" not in afile:
# Даже если у нотайса одиночный лот, то вставляем его в ОТДЕЛЬНУЮ таблицу т.к. есть нотайсы где лоты множестве
sql_test = """
MERGE INTO NOTIFICATION_FZ44 p
USING (SELECT
:fcsNotif_id as doc_id,
:fcsNotif_purchaseNumber as purchaseNumber,
:fcsNotif_docpublishdate as docpublishdate,
:responsibleOrg_INN as PLACER_INN,
:responsibleOrg_KPP as PLACER_KPP,
:responsibleOrg_fullName as PLACER_name,
:responsibleOrg_factAddress as PLACER_ADDRESS,
:region as region
FROM DUAL
) v
ON (p.purchaseNumber=v.purchaseNumber)
WHEN MATCHED THEN
UPDATE SET
p.docpublishdate = TO_DATE(v.docpublishdate,'YYYY-MM-DD'),
p.PLACER_INN = v.PLACER_INN,
p.PLACER_KPP = v.PLACER_KPP,
p.PLACER_name = v.PLACER_name,
p.PLACER_ADDRESS = v.PLACER_ADDRESS,
p.region = v.region
WHEN NOT MATCHED THEN
INSERT (p.doc_id, p.purchaseNumber, p.docpublishdate, p.PLACER_INN, p.PLACER_KPP, p.PLACER_name, p.PLACER_ADDRESS, p.region)
VALUES(v.doc_id, v.purchaseNumber, TO_DATE(v.docpublishdate,'YYYY-MM-DD'), v.PLACER_INN, v.PLACER_KPP, v.PLACER_name, v.PLACER_ADDRESS, v.region )
"""
sql_test_lots = """
MERGE INTO NOTIFICATION_LOTS_FZ44 p
USING (SELECT
:fcsNotif_purchaseNumber as purchaseNumber,
:lot_maxPrice as lot_maxPrice,
:currency_code as lot_currency_code,
:lot_lotNumber as lot_Number,
:MYUNIQ_ID as MYUNIQ_ID,
:lot_type as multylot,
:xml_name as xml_name
FROM DUAL
) v
ON (p.myuniq_id=v.myuniq_id)
WHEN MATCHED THEN
UPDATE SET
p.purchaseNumber = v.purchaseNumber,
p.lot_maxPrice = v.lot_maxPrice,
p.lot_currency_code = v.lot_currency_code,
p.multylot = v.multylot,
p.xml_name = v.xml_name
WHEN NOT MATCHED THEN
INSERT (p.purchaseNumber, p.lot_maxPrice, p.lot_currency_code, p.MYUNIQ_ID, p.lot_Number, p.xml_name, p.multylot)
VALUES(v.purchaseNumber, v.lot_maxPrice, v.lot_currency_code, v.MYUNIQ_ID, v.lot_Number, v.xml_name, v.multylot)
"""
sql_test_2 = """
MERGE INTO NOTIFICATION_OBJS p
USING (SELECT
:myuniq_id as myuniq_id,
:fcsNotif_purchaseNumber as purchaseNumber,
:OKPD2_code as OKPD2_code,
:OKPD2_name as OKPD2_name
FROM DUAL
) v
ON (p.myuniq_id=v.myuniq_id AND p.OKPD2_code=v.OKPD2_code)
WHEN MATCHED THEN
UPDATE SET
p.purchaseNumber = v.purchaseNumber,
-- p.OKPD2_code = v.OKPD2_code,
p.OKPD2_name = v.OKPD2_name
WHEN NOT MATCHED THEN
INSERT (p.myuniq_id, p.purchaseNumber, p.OKPD2_code, p.OKPD2_name)
VALUES(v.myuniq_id, v.purchaseNumber, v.OKPD2_code, v.OKPD2_name)
"""
if "NotificationCancel" in afile: # внимание тут есть DOC_ID
sql_test = """
MERGE INTO Notification_CANCEL_FZ44 p
USING (SELECT
:fcsNotif_id as doc_id,
:fcsNotif_purchaseNumber as PURCHASENUMBER,
:fcsNotif_docPublishDate as PUBLISHDATE,
:region as region
FROM DUAL
) v
ON (p.PURCHASENUMBER=v.PURCHASENUMBER)
WHEN MATCHED THEN
UPDATE SET
p.doc_id = v.doc_id,
p.PUBLISHDATE = TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'),
p.region = v.region
WHEN NOT MATCHED THEN
INSERT (p.doc_id, p.PURCHASENUMBER, p.PUBLISHDATE, p.region)
VALUES (v.doc_id, v.PURCHASENUMBER, TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), v.region)
"""
if "NotificationZK504" in afile or "NotificationZP504" in afile:
sql_504 = """
MERGE INTO NOTIFICATION_FZ44 p
USING (SELECT
:epNotifi_id as doc_id,
:commonInfo_purchaseNumber as purchaseNumber,
:responsi_INN as PLACER_INN,
:responsi_KPP as PLACER_KPP,
:responsi_fullName as PLACER_name,
:responsi_factAddress as PLACER_ADDRESS,
:region as region
FROM DUAL
) v
ON (p.purchaseNumber=v.purchaseNumber)
WHEN MATCHED THEN
UPDATE SET
p.PLACER_INN = v.PLACER_INN,
p.PLACER_KPP = v.PLACER_KPP,
p.PLACER_name = v.PLACER_name,
p.PLACER_ADDRESS = v.PLACER_ADDRESS,
p.region = v.region
WHEN NOT MATCHED THEN
INSERT (p.doc_id, p.purchaseNumber, p.PLACER_INN, p.PLACER_KPP, p.PLACER_name, p.PLACER_ADDRESS, p.region)
VALUES(v.doc_id, v.purchaseNumber, v.PLACER_INN, v.PLACER_KPP, v.PLACER_name, v.PLACER_ADDRESS, v.region )
"""
if "NotificationOK504" in afile:
sql_504 = """
MERGE INTO NOTIFICATION_FZ44 p
USING (SELECT
:epNotifi_id as doc_id,
:commonInfo_purchaseNumber as purchaseNumber,
:responsi_INN as PLACER_INN,
:responsi_KPP as PLACER_KPP,
:responsi_fullName as PLACER_name,
:responsi_factAddress as PLACER_ADDRESS,
:region as region
FROM DUAL
) v
ON (p.purchaseNumber=v.purchaseNumber)
WHEN MATCHED THEN
UPDATE SET
p.PLACER_INN = v.PLACER_INN,
p.PLACER_KPP = v.PLACER_KPP,
p.PLACER_name = v.PLACER_name,
p.PLACER_ADDRESS = v.PLACER_ADDRESS,
p.region = v.region
WHEN NOT MATCHED THEN
INSERT (p.doc_id, p.purchaseNumber, p.PLACER_INN, p.PLACER_KPP, p.PLACER_name, p.PLACER_ADDRESS, p.region)
VALUES(v.doc_id, v.purchaseNumber, v.PLACER_INN, v.PLACER_KPP, v.PLACER_name, v.PLACER_ADDRESS, v.region )
"""
if "PlacementResult" in afile:
sql_504 = """
MERGE INTO NOTIFICATION_RESULT_FZ44 p
USING (SELECT
:fcsPlace_purchaseNumber as PURCHASENUMBER,
:fcsPlace_lotNumber as LOTNUMBER,
:fcsPlace_foundationProtNumber as FOUNDATION_PROTOCOLNUMBER,
:fcsPlace_procedurelFailed as PROCEDURELFAILED,
:application_journalNumber as APPLICATION_JOURNALNUMBER,
:application_appRating as APPLICATION_RATING,
:application_price as APPLICATION_PRICE,
:region as region
FROM DUAL
) v
ON (p.purchaseNumber=v.purchaseNumber)
WHEN MATCHED THEN
UPDATE SET
p.LOTNUMBER = v.LOTNUMBER,
p.FOUNDATION_PROTOCOLNUMBER = v.FOUNDATION_PROTOCOLNUMBER,
p.PROCEDURELFAILED = v.PROCEDURELFAILED,
p.APPLICATION_JOURNALNUMBER = v.APPLICATION_JOURNALNUMBER,
p.APPLICATION_RATING = v.APPLICATION_RATING,
p.APPLICATION_PRICE = v.APPLICATION_PRICE,
p.region = v.region
WHEN NOT MATCHED THEN
INSERT (p.purchaseNumber, p.LOTNUMBER, p.FOUNDATION_PROTOCOLNUMBER, p.PROCEDURELFAILED, p.APPLICATION_JOURNALNUMBER, p.APPLICATION_RATING, p.APPLICATION_PRICE, p.region)
VALUES(v.purchaseNumber, v.LOTNUMBER, v.FOUNDATION_PROTOCOLNUMBER, v.PROCEDURELFAILED, v.APPLICATION_JOURNALNUMBER, v.APPLICATION_RATING, v.APPLICATION_PRICE, v.region)
"""
# Идей лучше чем эту сецию сделать исключительной до тех пор пока не ясно сколько вообще подобных секций будет нет
if "NotificationOK504" in afile:
result[0]["root"]["region"] = region_name
# print("we are here ", type(result[0]["root"]))
save_to_ora(sql_504, result[0]["root"])
if "NotificationZK504" in afile or "NotificationZP504" in afile:
result[0]["root"]["region"] = region_name
# print("we are here ", type(result[0]["root"]))
save_to_ora(sql_504, result[0]["root"])
if "fcsPlacementResult" in afile:
result[0]["root"]["region"] = region_name
fcsPlace_purchaseNumber = result[0]["root"]["fcsPlace_purchaseNumber"]
fcsPlace_lotNumber = result[0]["root"]["fcsPlace_lotNumber"]
fcsPlace_foundationProtocolNumber = result[0]["root"]["fcsPlace_foundationProtocolNumber"]
fcsPlace_procedurelFailed = result[0]["root"]["fcsPlace_procedurelFailed"]
region = result[0]["root"]["region"]
if "applications" in str(result): # т.к. все данные мы вставляем в одну таблицу, то идем от обратного и все данные из root мы подставляем в result
for el in result[1]['applications']: # В теории у нас может быть множественность
el["region"] = region
el["fcsPlace_procedurelFailed"] = fcsPlace_procedurelFailed
el["fcsPlace_foundationProtNumber"] = fcsPlace_foundationProtocolNumber # название сокращено
el["fcsPlace_lotNumber"] = fcsPlace_lotNumber
el["fcsPlace_purchaseNumber"] = fcsPlace_purchaseNumber
el["fcsPlace_procedurelFailed"] = fcsPlace_procedurelFailed
if not "application_appRating" in el:
el["application_appRating"] = ""
save_to_ora(sql_504, el)
# save_to_ora(sql_504, result[0]["root"])
if not "504_" in afile and not "fcsPlacementResult" in afile: # обрабатываем случаи NotificationOK504_ и NotificationK504_
external_key = None # во вложенном словаре нужно будет знать кто его родитель, нужно для foreign_key
# Нужно проверить а может у нас уже есть LOTS!
# тут нужно модифицировать оригинальный словарь, чтобы он выглядел так, как будто бы там есть lots пусть и с одним элементом, иначе это никак не унифицировать sql_test_lots
#print("____________________result: ", result)
if not "lots" in result[1]:
result[0]["lots"] = [] # создаем в корне секцию лотов. Теперь у нас там еще и root и purchaseObjects , в данном случае в ней не будет множественности, но она нужна нам
temp_dict = {}
for k,v in result[0]["root"].items():
if k == "lot_maxPrice":
temp_dict["lot_maxPrice"] = v
lot_maxPrice = dict()
if k == "currency_code":
temp_dict["currency_code"] = v
temp_dict["OKPD2_code"] = "" # чтобы в запросе на вставку было нужное количество арнументов подсунем пустышку
temp_dict["lot_type"] = "single" # нет вложенных элементов и разные ОКВЕДов
temp_dict["lot_lotNumber"] = "1" # там где нет lot_lotNumber всегда ставим единицу т.к. она нужна будет для формирования ключа
result[0]["lots"].append(temp_dict)
# Удаляем из оригинала поля которые мы уже засунули во вложенный массив
del result[0]["root"]["lot_maxPrice"]
del result[0]["root"]["currency_code"]
else: # если у нас множественность, то надо пройтись по всем элементам и поставить там флаг малтипл. Типа в документе куча лотов
for el in result[1]["lots"]:
el["lot_type"] = "multiple"
print("\n\n\n\n\n\nresult: ", result)
print("________________")
myuniqid = ""
for r in result:
print("rrrrrrrrrrrrr: ", r)
for k,v in r.items(): # k - название крупных разделов
if k == "root":
external_key = v["fcsNotif_purchaseNumber"] # сохраним внешний ключ, который будем использовать во внутреннем словаре
v["region"] = region_name
save_to_ora(sql_test, v)
if k == "lots":
for el in v:
el["fcsNotif_purchaseNumber"] = external_key
myuniqid = str(external_key) + "_" + str(el["lot_lotNumber"]) # PURCHASENUMBER плюс LOT_NUMBER в итоге связка будет уникальной
el["myuniq_id"] = myuniqid
if not "currency_code" in el:
el["currency_code"] = ""
if not "lot_maxPrice" in el:
el["lot_maxPrice"] = ""
if "OKPD2_code" in el:
del el["OKPD2_code"] # бывает захватывается. Почему именно нужно разбираться
el["xml_name"] = Path(afile).name
# el["lot_type"] = "multiple" # ПОЧИНИТЬ ЭТУ ЧАСТЬ. Сейчас флаг выставляется везде без разбора! чтобы видеть, что у нас лоты множественные, а не те которые из одиночной секции lot взяты
save_to_ora(sql_test_lots, el)
if k == "purchaseObjects":
for el in v:
el["myuniq_id"] = myuniqid
el["fcsNotif_purchaseNumber"] = external_key
if not "quantity_value" in el:
el["quantity_value"] = ""
if "OKEI_nationalCode" in el: # удаляем случайно захваченные теги
del el["OKEI_nationalCode"]
if "OKEI_code" in el:
del el["OKEI_code"]
if "purchaseObject_price" in el: # тоже захватывается, но не нужно цена у нас в лоте есть
del el["purchaseObject_price"]
if "customerQuantities" in el: # чистим разный попавший сюда мусор
del el["customerQuantities"]
if "customerQuantities_customerQuantity" in el:
del el["customerQuantities_customerQuantity"]
if "quantity_value" in el: # хотя мы и собираем количество, но оно тут будет не корректным т.к. окведы у товаров часто совпадают
del el["quantity_value"] # пример notification_Moskva_2018010100_2018020100_002/fcsNotificationEA44_0873500000817000938_14959861.xml
save_to_ora(sql_test_2, el)
elif "contractprocedure" in afile.lower():
sql_test = """
MERGE INTO CONTRACT_PROCEDURE_FZ44 p
USING (SELECT
:contract_id as doc_id,
:contract_publishDate as PUBLISHDATE,
:region as region
FROM DUAL
) v
ON (p.doc_id=v.doc_id)
WHEN MATCHED THEN
UPDATE SET
p.PUBLISHDATE = TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'),
p.region = v.region
WHEN NOT MATCHED THEN
INSERT (p.doc_id, p.PUBLISHDATE, p.region)
VALUES(v.doc_id, TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), v.region)
"""
for r in result:
for k,v in r.items(): # k - название крупных разделов
if k == "root":
v["region"] = region_name
save_to_ora(sql_test, v)
elif "contracts" in section_name.lower() and not "contractprocedure" in afile.lower():
sql_test = """
MERGE INTO CONTRACT_FZ44 p
USING (SELECT
:contract_id as doc_id,
:contract_publishDate as PUBLISHDATE,
:order_notificationNumber as NOTIFICATIONNUMBER,
:order_lotNumber as lotNumber,
:customer_inn as CUSTOMER_INN,
:customer_kpp as CUSTOMER_KPP,
:customer_fullName as CUSTOMER_NAME,
:legalEntityRF_INN as SUPPLIER_INN,
:legalEntityRF_KPP as SUPPLIER_KPP,
:legalEntityRF_fullName as SUPPLIER_NAME,
:legalEntityRF_address as SUPPLIER_ADDRESS,
:priceInfo_price as PRICE,
:CURRENCY_CODE as CURRENCY_CODE,
:OKPD2_code as PRODUCT_OKPD2_CODE,
:product_price as PRODUCT_PRICE,
:product_quantity as PRODUCT_QUANTITY,
:product_sum as PRODUCT_SUM,
:region as region,
:file_name as file_name
FROM DUAL
) v
ON (p.NOTIFICATIONNUMBER=v.NOTIFICATIONNUMBER)
WHEN MATCHED THEN
UPDATE SET
p.PUBLISHDATE = TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'),
p.lotNumber = v.lotNumber,
p.CUSTOMER_INN = v.CUSTOMER_INN,
p.CUSTOMER_KPP = v.CUSTOMER_KPP,
p.CUSTOMER_NAME = v.CUSTOMER_NAME,
p.SUPPLIER_INN = v.SUPPLIER_INN,
p.SUPPLIER_KPP = v.SUPPLIER_KPP,
p.SUPPLIER_NAME = v.SUPPLIER_NAME,
p.SUPPLIER_ADDRESS = v.SUPPLIER_ADDRESS,
p.PRICE = v.PRICE,
p.CURRENCY_CODE = v.CURRENCY_CODE,
p.PRODUCT_OKPD2_CODE = v.PRODUCT_OKPD2_CODE,
p.PRODUCT_PRICE = v.PRODUCT_PRICE,
p.PRODUCT_QUANTITY = v.PRODUCT_QUANTITY,
p.PRODUCT_SUM = v.PRODUCT_SUM,
p.region = v.region,
p.file_name = v.file_name
WHEN NOT MATCHED THEN
INSERT (p.doc_id, p.PUBLISHDATE, p.NOTIFICATIONNUMBER, p.lotNumber, p.CUSTOMER_INN, p.CUSTOMER_KPP, p.CUSTOMER_NAME, p.SUPPLIER_INN, p.SUPPLIER_KPP, p.SUPPLIER_NAME, p.SUPPLIER_ADDRESS, p.PRICE, p.CURRENCY_CODE, p.PRODUCT_OKPD2_CODE, p.PRODUCT_PRICE, p.PRODUCT_QUANTITY, p.PRODUCT_SUM, p.region, p.file_name)
VALUES(v.doc_id, TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), v.NOTIFICATIONNUMBER, v.lotNumber, v.CUSTOMER_INN, v.CUSTOMER_KPP, v.CUSTOMER_NAME, v.SUPPLIER_INN, v.SUPPLIER_KPP, v.SUPPLIER_NAME, v.SUPPLIER_ADDRESS, v.PRICE, v.CURRENCY_CODE, v.PRODUCT_OKPD2_CODE, v.PRODUCT_PRICE, v.PRODUCT_QUANTITY, v.PRODUCT_SUM, v.region, v.file_name)
"""
for r in result:
for k,v in r.items(): # k - название крупных разделов
if k == "root":
v["region"] = region_name
v["file_name"] = Path(afile).name
save_to_ora(sql_test, v)
elif "protocols" in section_name.lower():
sql_test = ""
sql_suppliers = ""
# В перспективе можно не привязываться к имени файла, но тогда нужно выше вводить понятие подтип.
if "EF1_" in afile:
sql_test = """
MERGE INTO PROTOCOLS_FZ44 p
USING (SELECT
:fcsProtocolEF1_id as doc_id,
:fcsProtocolEF1_purchaseNumber as PURCHASENUMBER,
:fcsProtocolEF1_publishDate as PUBLISHDATE,
:region as region,
:file_name as file_name
FROM DUAL
) v
ON (p.PURCHASENUMBER=v.PURCHASENUMBER)
WHEN MATCHED THEN
UPDATE SET
p.PUBLISHDATE = TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'),
p.region = v.region,
p.file_name = v.file_name
WHEN NOT MATCHED THEN
INSERT (p.doc_id, p.PURCHASENUMBER, p.PUBLISHDATE, p.region, p.file_name)
VALUES(v.doc_id, v.PURCHASENUMBER, TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), v.region, v.file_name)
"""
elif "EF2_" in afile:
sql_test = """
MERGE INTO PROTOCOLS_FZ44 p
USING (SELECT
:fcsProtocolEF2_id as doc_id,
:fcsProtocolEF2_purchaseNumber as PURCHASENUMBER,
:fcsProtocolEF2_publishDate as PUBLISHDATE,
:region as region,
:file_name as file_name
FROM DUAL
) v
ON (p.PURCHASENUMBER=v.PURCHASENUMBER)
WHEN MATCHED THEN
UPDATE SET
p.PUBLISHDATE = TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'),
p.region = v.region,
p.file_name = v.file_name
WHEN NOT MATCHED THEN
INSERT (p.doc_id, p.PURCHASENUMBER, p.PUBLISHDATE, p.region, p.file_name)
VALUES(v.doc_id, v.PURCHASENUMBER, TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), v.region, v.file_name)
"""
elif "EF3_" in afile:
sql_test = """
MERGE INTO PROTOCOLS_FZ44 p
USING (SELECT
:fcsProtocolEF3_id as doc_id,
:fcsProtocolEF3_purchaseNumber as PURCHASENUMBER,
:fcsProtocolEF3_publishDate as PUBLISHDATE,
:region as region,
:file_name as file_name
FROM DUAL
) v
ON (p.PURCHASENUMBER=v.PURCHASENUMBER)
WHEN MATCHED THEN
UPDATE SET
p.PUBLISHDATE = TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'),
p.region = v.region,
p.file_name = v.file_name
WHEN NOT MATCHED THEN
INSERT (p.doc_id, p.PURCHASENUMBER, p.PUBLISHDATE, p.region, p.file_name)
VALUES(v.doc_id, v.PURCHASENUMBER, TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), v.region, v.file_name)
"""
elif "OK1_" in afile:
sql_test = """
MERGE INTO PROTOCOLS_FZ44 p
USING (SELECT
:fcsProtocolOK1_id as doc_id,
:fcsProtocolOK1_purchaseNumber as PURCHASENUMBER,
:fcsProtocolOK1_publishDate as PUBLISHDATE,
:region as region,
:file_name as file_name
FROM DUAL
) v
ON (p.PURCHASENUMBER=v.PURCHASENUMBER)
WHEN MATCHED THEN
UPDATE SET
p.PUBLISHDATE = TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'),
p.region = v.region,
p.file_name = v.file_name
WHEN NOT MATCHED THEN
INSERT (p.doc_id, p.PURCHASENUMBER, p.PUBLISHDATE, p.region, p.file_name)
VALUES(v.doc_id, v.PURCHASENUMBER, TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), v.region, v.file_name)
"""
elif "OK2_" in afile:
sql_test = """
MERGE INTO PROTOCOLS_FZ44 p
USING (SELECT
:fcsProtocolOK2_id as doc_id,
:fcsProtocolOK2_purchaseNumber as PURCHASENUMBER,
:fcsProtocolOK2_publishDate as PUBLISHDATE,
:region as region,
:file_name as file_name
FROM DUAL
) v
ON (p.PURCHASENUMBER=v.PURCHASENUMBER)
WHEN MATCHED THEN
UPDATE SET
p.PUBLISHDATE = TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'),
p.region = v.region,
p.file_name = v.file_name
WHEN NOT MATCHED THEN
INSERT (p.doc_id, p.PURCHASENUMBER, p.PUBLISHDATE, p.region, p.file_name)
VALUES(v.doc_id, v.PURCHASENUMBER, TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), v.region, v.file_name)
"""
elif "PZP_" in afile:
sql_test = """
MERGE INTO PROTOCOLS_FZ44 p
USING (SELECT
:epProtocolEZP2_id as doc_id,
:commonInfo_purchaseNumber as PURCHASENUMBER,
SUBSTR(:commonInfo_publishDTInETP, 0, 10) as PUBLISHDATE,
:region as region,
:file_name as file_name
FROM DUAL
) v
ON (p.PURCHASENUMBER=v.PURCHASENUMBER)
WHEN MATCHED THEN
UPDATE SET
p.PUBLISHDATE = TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'),
p.region = v.region,
p.file_name = v.file_name
WHEN NOT MATCHED THEN
INSERT (p.doc_id, p.PURCHASENUMBER, p.PUBLISHDATE, p.region, p.file_name)
VALUES(v.doc_id, v.PURCHASENUMBER, TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), v.region, v.file_name)
"""
elif "ProtocolCancel" in afile:
sql_test = """
MERGE INTO PROTOCOLS_CANCEL_FZ44 p
USING (SELECT
:fcsProto_purchaseNumber as PURCHASENUMBER,
:fcsProto_docPublishDate as PUBLISHDATE,
:region as region,
:file_name as file_name
FROM DUAL
) v
ON (p.PURCHASENUMBER=v.PURCHASENUMBER)
WHEN MATCHED THEN
UPDATE SET
p.PUBLISHDATE = TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'),
p.region = v.region,
p.file_name = v.file_name
WHEN NOT MATCHED THEN
INSERT (p.PURCHASENUMBER, p.PUBLISHDATE, p.region, p.file_name)
VALUES (v.PURCHASENUMBER, TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), v.region, v.file_name)
"""
# print(sql_test)
elif "Evasion_" in afile:
sql_test = """
MERGE INTO PROTOCOLS_FZ44 p
USING (SELECT
:fcsProto_id as doc_id,
:fcsProto_purchaseNumber as PURCHASENUMBER,
:fcsProto_publishDate as PUBLISHDATE,
:region as region,
:file_name as file_name
FROM DUAL
) v
ON (p.PURCHASENUMBER=v.PURCHASENUMBER)
WHEN MATCHED THEN
UPDATE SET
p.PUBLISHDATE = TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'),
p.region = v.region,
p.file_name = v.file_name
WHEN NOT MATCHED THEN
INSERT (p.doc_id, p.PURCHASENUMBER, p.PUBLISHDATE, p.region, p.file_name)
VALUES(v.doc_id, v.PURCHASENUMBER, TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), v.region, v.file_name)
"""
elif "VPP_" in afile:
sql_test = """
MERGE INTO PROTOCOLS_FZ44 p
USING (SELECT
:epProtoc_id as doc_id,
:commonInfo_purchaseNumber as PURCHASENUMBER,
SUBSTR(:commonInfo_publishDTInETP, 0, 10) as PUBLISHDATE,
:region as region,
:file_name as file_name
FROM DUAL
) v
ON (p.PURCHASENUMBER=v.PURCHASENUMBER)
WHEN MATCHED THEN
UPDATE SET
p.PUBLISHDATE = TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'),
p.region = v.region,
p.file_name = v.file_name
WHEN NOT MATCHED THEN
INSERT (p.doc_id, p.PURCHASENUMBER, p.PUBLISHDATE, p.region, p.file_name)
VALUES(v.doc_id, v.PURCHASENUMBER, TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), v.region, v.file_name)
"""
elif "PR615_" in afile:
sql_test = """
MERGE INTO PROTOCOLS_FZ44 p
USING (SELECT
:pprf615P_id as doc_id,
:commonInfo_purchaseNumber as PURCHASENUMBER,
:commonInfo_docPublishDate as PUBLISHDATE,
:region as region,
:file_name as file_name
FROM DUAL
) v
ON (p.PURCHASENUMBER=v.PURCHASENUMBER)
WHEN MATCHED THEN
UPDATE SET
p.PUBLISHDATE = TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'),
p.region = v.region,
p.file_name = v.file_name
WHEN NOT MATCHED THEN
INSERT (p.doc_id, p.PURCHASENUMBER, p.PUBLISHDATE, p.region, p.file_name)
VALUES(v.doc_id, v.PURCHASENUMBER, TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), v.region, v.file_name)
"""
elif "PRO_" in afile:
sql_test = """
MERGE INTO PROTOCOLS_FZ44 p
USING (SELECT
:epProtocolEZK2_id as doc_id,
:commonInfo_purchaseNumber as PURCHASENUMBER,
SUBSTR(:commonInfo_publishDTInETP, 0, 10) as PUBLISHDATE,
:region as region,
:file_name as file_name
FROM DUAL
) v
ON (p.PURCHASENUMBER=v.PURCHASENUMBER)
WHEN MATCHED THEN
UPDATE SET
p.PUBLISHDATE = TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'),
p.region = v.region,
p.file_name = v.file_name
WHEN NOT MATCHED THEN
INSERT (p.doc_id, p.PURCHASENUMBER, p.PUBLISHDATE, p.region, p.file_name)
VALUES(v.doc_id, v.PURCHASENUMBER, TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), v.region, v.file_name)
"""
elif "PPI_" in afile:
sql_test = """
MERGE INTO PROTOCOLS_FZ44 p
USING (SELECT
:epProtocolEOK3_id as doc_id,
:commonInfo_purchaseNumber as PURCHASENUMBER,
SUBSTR(:commonInfo_publishDTInETP, 0, 10) as PUBLISHDATE,
:region as region,
:file_name as file_name
FROM DUAL
) v
ON (p.PURCHASENUMBER=v.PURCHASENUMBER)
WHEN MATCHED THEN
UPDATE SET
p.PUBLISHDATE = TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'),
p.region = v.region,
p.file_name = v.file_name
WHEN NOT MATCHED THEN
INSERT (p.doc_id, p.PURCHASENUMBER, p.PUBLISHDATE, p.region, p.file_name)
VALUES(v.doc_id, v.PURCHASENUMBER, TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), v.region, v.file_name)
"""
elif "ProtocolZ" in afile:
if "fcsProtocolZP" in afile and not "fcsProtocolZPFinal" in afile:
sql_test = """
MERGE INTO PROTOCOLS_FZ44 p
USING (SELECT
:fcsProtocolZP_id as doc_id,
:fcsProtocolZP_purchaseNumber as PURCHASENUMBER,
SUBSTR(:fcsProtocolZP_publishDate, 0, 10) as PUBLISHDATE,
:region as region,
:file_name as file_name
FROM DUAL
) v
ON (p.PURCHASENUMBER=v.PURCHASENUMBER)
WHEN MATCHED THEN
UPDATE SET
p.PUBLISHDATE = TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'),
p.region = v.region,
p.file_name = v.file_name
WHEN NOT MATCHED THEN
INSERT (p.doc_id, p.PURCHASENUMBER, p.PUBLISHDATE, p.region, p.file_name)
VALUES(v.doc_id, v.PURCHASENUMBER, TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), v.region, v.file_name)
"""
elif "fcsProtocolZK" in afile:
sql_test = """
MERGE INTO PROTOCOLS_FZ44 p
USING (SELECT
:fcsProtocolZK_id as doc_id,
:fcsProtocolZK_purchaseNumber as PURCHASENUMBER,
SUBSTR(:fcsProtocolZK_publishDate, 0, 10) as PUBLISHDATE,
:region as region,
:file_name as file_name
FROM DUAL
) v
ON (p.PURCHASENUMBER=v.PURCHASENUMBER)
WHEN MATCHED THEN
UPDATE SET
p.PUBLISHDATE = TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'),
p.region = v.region,
p.file_name = v.file_name
WHEN NOT MATCHED THEN
INSERT (p.doc_id, p.PURCHASENUMBER, p.PUBLISHDATE, p.region, p.file_name)
VALUES(v.doc_id, v.PURCHASENUMBER, TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), v.region, v.file_name)
"""
elif "fcsProtocolZPFinal" in afile:
sql_test = """
MERGE INTO PROTOCOLS_FZ44 p
USING (SELECT
:fcsProto_id as doc_id,
:fcsProto_purchaseNumber as PURCHASENUMBER,
SUBSTR(:fcsProto_publishDate, 0, 10) as PUBLISHDATE,
:region as region,
:file_name as file_name
FROM DUAL
) v
ON (p.PURCHASENUMBER=v.PURCHASENUMBER)
WHEN MATCHED THEN
UPDATE SET
p.PUBLISHDATE = TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'),
p.region = v.region,
p.file_name = v.file_name
WHEN NOT MATCHED THEN
INSERT (p.doc_id, p.PURCHASENUMBER, p.PUBLISHDATE, p.region, p.file_name)
VALUES(v.doc_id, v.PURCHASENUMBER, TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), v.region, v.file_name)
"""
else:
sql_test = """
MERGE INTO PROTOCOLS_FZ44 p
USING (SELECT
:epProtocolEZP2_id as doc_id,
:commonInfo_purchaseNumber as PURCHASENUMBER,
SUBSTR(:commonInfo_publishDTInETP, 0, 10) as PUBLISHDATE,
:region as region,
:file_name as file_name
FROM DUAL
) v
ON (p.PURCHASENUMBER=v.PURCHASENUMBER)
WHEN MATCHED THEN
UPDATE SET
p.PUBLISHDATE = TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'),
p.region = v.region,
p.file_name = v.file_name
WHEN NOT MATCHED THEN
INSERT (p.doc_id, p.PURCHASENUMBER, p.PUBLISHDATE, p.region, p.file_name)
VALUES(v.doc_id, v.PURCHASENUMBER, TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), v.region, v.file_name)
"""
elif "OKOU2_" in afile:
sql_test = """
MERGE INTO PROTOCOLS_FZ44 p
USING (SELECT
:fcsProto_id as doc_id,
:fcsProto_purchaseNumber as PURCHASENUMBER,
:fcsProto_publishDate as PUBLISHDATE,
:region as region,
:file_name as file_name
FROM DUAL
) v
ON (p.PURCHASENUMBER=v.PURCHASENUMBER)
WHEN MATCHED THEN
UPDATE SET
p.PUBLISHDATE = TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'),
p.region = v.region,
p.file_name = v.file_name
WHEN NOT MATCHED THEN
INSERT (p.doc_id, p.PURCHASENUMBER, p.PUBLISHDATE, p.region, p.file_name)
VALUES(v.doc_id, v.PURCHASENUMBER, TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), v.region, v.file_name)
"""
elif "Single" in afile:
sql_test = """
MERGE INTO PROTOCOLS_FZ44 p
USING (SELECT
:fcsProto_id as doc_id,
:fcsProto_purchaseNumber as PURCHASENUMBER,
:fcsProto_publishDate as PUBLISHDATE,
:region as region,
:file_name as file_name
FROM DUAL
) v
ON (p.PURCHASENUMBER=v.PURCHASENUMBER)
WHEN MATCHED THEN
UPDATE SET
p.PUBLISHDATE = TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'),
p.region = v.region,
p.file_name = v.file_name
WHEN NOT MATCHED THEN
INSERT (p.doc_id, p.PURCHASENUMBER, p.PUBLISHDATE, p.region, p.file_name)
VALUES(v.doc_id, v.PURCHASENUMBER, TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), v.region, v.file_name)
"""
# elif "EFSingleApp_" in afile:
# sql_suppliers = """
# MERGE INTO SUPPLIERS_FZ44 p
# USING (SELECT
# :PURCHASENUMBER as PURCHASENUMBER,
# :appParticipant_inn as Participant_inn,
# :appParticipant_kpp as Participant_kpp,
# :appParticipant_Name as PARTICIPANT_name
# FROM DUAL
# ) v
# ON (p.PURCHASENUMBER=v.PURCHASENUMBER)
# WHEN MATCHED THEN
# UPDATE SET
# p.Participant_inn = v.Participant_inn,
# p.Participant_kpp = v.Participant_kpp,
# p.PARTICIPANT_name = v.PARTICIPANT_name
# WHEN NOT MATCHED THEN
# INSERT (p.PURCHASENUMBER, p.Participant_inn, p.Participant_kpp, p.PARTICIPANT_name)
# VALUES(v.PURCHASENUMBER, v.Participant_inn, v.Participant_kpp, v.PARTICIPANT_name)
# """
else:
raise Exceprion("Unknown type: " + Path(afile).name)
#print("ORA Unknown section") # мы потом будем парсить выдачу консоли, и искать там ORA поэтому не убирать! Иначе фиг отловим
sql_suppliers = """
MERGE INTO SUPPLIERS_FZ44 p
USING (SELECT
:PURCHASENUMBER as PURCHASENUMBER,
:appParticipant_inn as Participant_inn,
:appParticipant_kpp as Participant_kpp,
:appParticipant_Name as PARTICIPANT_name,
:APPPARTICIPANT_POSTADDRESS as APPPARTICIPANT_POSTADDRESS
FROM DUAL
) v
ON (p.PURCHASENUMBER=v.PURCHASENUMBER)
WHEN MATCHED THEN
UPDATE SET
p.Participant_inn = v.Participant_inn,
p.Participant_kpp = v.Participant_kpp,
p.PARTICIPANT_name = v.PARTICIPANT_name,
p.APPPARTICIPANT_POSTADDRESS = v.APPPARTICIPANT_POSTADDRESS
WHEN NOT MATCHED THEN
INSERT (p.PURCHASENUMBER, p.Participant_inn, p.Participant_kpp, p.PARTICIPANT_name, p.APPPARTICIPANT_POSTADDRESS)
VALUES(v.PURCHASENUMBER, v.Participant_inn, v.Participant_kpp, v.PARTICIPANT_name, v.APPPARTICIPANT_POSTADDRESS)
"""
print("______________result[1] : ", result)
print("______________result[1]___________ : ", result[0])
if not "appParticipants" in str(result):
result[0]["appParticipants"] = [] # создаем в корне секцию appParticipants.
temp_dict = {}
for k,v in result[0]["root"].items():
if k == "appParticipant_inn":
temp_dict["appParticipant_inn"] = v
appParticipant_inn = dict()
if k == "appParticipant_kpp":
temp_dict["appParticipant_kpp"] = v
if k == "appParticipant_postAddress":
temp_dict["appParticipant_postAddress"] = v
if k == "appParticipant_organizationName":
temp_dict["appParticipant_organizationName"] = v
result[0]["appParticipants"].append(temp_dict)
# Удаляем из оригинала поля которые мы уже засунули во вложенный массив
if "appParticipant_inn" in result[0]["root"]:
del result[0]["root"]["appParticipant_inn"]
if "appParticipant_kpp" in result[0]["root"]:
del result[0]["root"]["appParticipant_kpp"]
if "appParticipant_organizationName" in result[0]["root"]:
del result[0]["root"]["appParticipant_organizationName"]
if "appParticipant_postAddress" in result[0]["root"]:
del result[0]["root"]["appParticipant_postAddress"]
# print("-----------------=============================")
print("++++++++++++++----------result -------- : ", result)
external_key = None
for r in result:
for k,v in r.items(): # k - название крупных разделов
if k == "root":
for k1, v1 in v.items(): # придется поитерироваться
if "purchaseNumber" in k1: # префикс у purchaseNumber может отличаться. Иного способа найти его нет
external_key = v1 #
v["region"] = region_name
v["file_name"] = str(Path(afile).name)
print("vvvvvv : ", v)
save_to_ora(sql_test, v)
if k == "appParticipants":
for el in v: # appParticipants у нас список. Придется по нему проитерироваться. Каждый элемент списка словарь
el["purchaseNumber"] = external_key
if not "appParticipant_organizationName" in el:
el["appParticipant_organizationName"] = "" # пару раз и такое было
x = el["appParticipant_organizationName"] # увы для оракла слишком длинное название
el["appParticipant_Name"] = x
del el["appParticipant_organizationName"]
# del el["fcsProto_publishDate"]
# del el["region"]
# del el["file_name"]
if not "appParticipant_kpp" in el:
el["appParticipant_kpp"] = "" # бывает что КПП нет
if not "appParticipant_postAddress" in el:
el["appParticipant_postAddress"] = "" # вдруг нет адреса
if not "appParticipant_inn" in el:
el["appParticipant_inn"] = "" # бывает что ИНН нет
print("el---------------> ", el)
save_to_ora(sql_suppliers, el)
#sys.exit(0) # не упали значит все хорошо
def insert_prepare(asql, acursor):
try:
acursor.prepare(asql)
except cx_Oracle.DatabaseError as e:
raise cx_Oracle.DatabaseError("insert_prepare: ", e)
#print('Ошибка! Не возможно приготовить курсор (' + str(e) + ')')
#sys.exit(1)
def oraconnect(aconstr):
conn = cx_Oracle.connect(aconstr)
cursor = conn.cursor()
def oradisc():
cursor.close()
conn.close()
def save_to_ora(asql, abindvar):
try:
print("--abindvar--> ", abindvar)
print("\n", asql)
if cursor.statement != asql:
insert_prepare(asql, cursor)
#print("-------===========--------")
# print (file_object.write(asql + "\n" + str(abindvar) + "\n\n\n"))
# print("-----------------------------")
cursor.execute(cursor.statement, abindvar)
conn.commit()
except cx_Oracle.DatabaseError as e:
raise cx_Oracle.DatabaseError("insert_prepare: ", e)
print("eee111 :", e)
oradisc()
#sys.exit(1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment