Created
March 12, 2019 12:14
-
-
Save bubnenkoff/06f5b155737111d9dea4da05105aa564 to your computer and use it in GitHub Desktop.
parser
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
# модуль для работы с ftp | |
import ftplib | |
from pathlib import Path | |
# модуль для работы с zip архивами | |
from zipfile import * | |
# Библиотека для работы с Oracle | |
import cx_Oracle | |
import datetime as dt | |
import time | |
# !!!Важно. Установленная по умолчанию в Canopy версия библиотеки lxml не умеет читать кодировку windows-1251 | |
# Поэтому необходимо скачать с сайта https://pypi.python.org/pypi/lxml последнюю версию (3.7.3) библиотеки | |
# Подключаем библиотеку для парсинга XML | |
import lxml.etree as et | |
# библиотека для работы с INI подобными файлами | |
import configparser | |
import sys | |
import elementpath | |
from multiprocessing import Pool | |
os.environ['NLS_LANG'] = 'AMERICAN_AMERICA.AL32UTF8' | |
conn = cx_Oracle.connect('zakupki/dD9qHxQD3t5w@FST_RAC') | |
cursor = conn.cursor() | |
# Возвращает имя родителя как ключ списка словарей --> lots: [{},{}] | |
# children_names_list - список тегов которые нужно вытащить из ребенка | |
# унифицировать не получится. Сюда передаем ТОЛЬКО родителя множественных с указанием списка детей которые нужно извлечь | |
def xpath_two_level(element, children_names_list): | |
# размерность родителя не завимимо от количества детей единица! Типа он существует. Если у него 5 детей все равно единица будет | |
children_names_list = [name.replace("/","_").lower() for name in children_names_list] # передаем сюда со слешами и заменяем все на подчеркивания | |
# print("\nchildren_names_list: ", children_names_list) | |
result = dict() | |
parent_node_name = et.QName(element[0].tag).localname # purchaseObjects | |
#print("parent_node_name: ", parent_node_name) | |
item_list = [] | |
for el in element[0]: # если родитель purchaseObjects то тут будут: purchaseObject purchaseObject purchaseObject | |
child_node_name = et.QName(el.tag).localname # purchaseObject | |
if len(el)>0: # потомок может не иметь вложенности и быть просто purchaseObject/sum | |
item_dict = dict() | |
for e in el: | |
tag_name = et.QName(e.tag).localname # OKVED а внутри уже искомый OKVED/CODE ИЛИ одиночный тег | |
tag_name = tag_name.replace("KTRU", "OKPD2") # в некоторых документах встречаются KTRU вместо OKPD2 заменяем т.к. нужен именно он | |
if len(e) > 0: # К примеру OKPD2 у которого внутри есть OKPD2/aaa OKPD2/bbb OKPD2/ccc | |
for a in e: # смотрим что в искомом теге | |
sub_tag_name = et.QName(a.tag).localname | |
sub_tag_value = a.text | |
concat_tag_name = tag_name + "_" + sub_tag_name # вид: fcs_id | |
if concat_tag_name.lower() in children_names_list: # иы же сюда педелаем уже как foo/bar | |
item_dict[concat_tag_name] = sub_tag_value | |
else: # | |
sub_tag_name = et.QName(e.tag).localname | |
sub_tag_value = e.text | |
concat_tag_name = child_node_name + "_" + sub_tag_name # вид: purchaseObject_price | |
if sub_tag_name.lower() in children_names_list: | |
item_dict[concat_tag_name] = sub_tag_value | |
item_list.append(item_dict) | |
else: # бывают случаи когда у потомков нет вложенности, но его тоже надо забрать | |
pass | |
result[parent_node_name] = item_list # нам надо вернуть с именем корня: "purchaseObjects": [{},{}] | |
return result | |
# print(item_list) | |
def xpath_three_level(element, children_names_list): | |
# размерность родителя не завимимо от количества детей единица! Типа он существует. Если у него 5 детей все равно единица будет | |
children_names_list = [name.replace("/","_").lower() for name in children_names_list] # передаем сюда со слешами и заменяем все на подчеркивания | |
print("\nchildren_names_list: ", children_names_list) | |
result = dict() | |
parent_node_name = et.QName(element[0].tag).localname # purchaseObjects | |
print("parent_node_name: ", parent_node_name) | |
item_list = [] | |
purchaseObjects = [] | |
lots = [] | |
obj_list = [] | |
for el in element[0]: # если родитель purchaseObjects то тут будут: purchaseObject purchaseObject purchaseObject | |
print("\n\n\n--------------------11111111----------------------\n\n\n") | |
single_lot = [] | |
child_node_name = et.QName(el.tag).localname # purchaseObject | |
print("child_node_name: ", child_node_name) | |
if len(el)>0: # потомок может не иметь вложенности и быть просто purchaseObject/sum | |
print("elelelelelelelelel: ", el) | |
item_dict = dict() | |
for e in el: | |
if len(e) == 0: # если никаких вложенных элементов нет | |
tag_name = et.QName(e.tag).localname # OKVED а внутри уже искомый OKVED/CODE ИЛИ одиночный тег | |
tag_name = tag_name.replace("KTRU", "OKPD2") # в некоторых документах встречаются KTRU вместо OKPD2 заменяем т.к. нужен именно он | |
if len(e) > 0: # К примеру OKPD2 у которого внутри есть OKPD2/aaa OKPD2/bbb OKPD2/ccc | |
for a in e: # смотрим что в искомом теге | |
sub_tag_name = et.QName(a.tag).localname | |
sub_tag_value = a.text | |
concat_tag_name = tag_name + "_" + sub_tag_name # вид: fcs_id | |
if concat_tag_name.lower() in children_names_list: # иы же сюда педелаем уже как foo/bar | |
item_dict[concat_tag_name] = sub_tag_value | |
else: # | |
sub_tag_name = et.QName(e.tag).localname | |
sub_tag_value = e.text | |
concat_tag_name = child_node_name + "_" + sub_tag_name # вид: purchaseObject_price | |
if sub_tag_name.lower() in children_names_list: | |
item_dict[concat_tag_name] = sub_tag_value | |
else: # если вложенные элементы есть | |
tag_name = et.QName(e.tag).localname # OKVED а внутри уже искомый OKVED/CODE ИЛИ одиночный тег | |
tag_name = tag_name.replace("KTRU", "OKPD2") # в некоторых документах встречаются KTRU вместо OKPD2 заменяем т.к. нужен именно он | |
sub_tag_name = child_node_name + "_" + tag_name # lot_purchaseObjects | |
if sub_tag_name.lower() in children_names_list: # искомый тег в составе тех которые мы передали в функцию | |
# print(" ---- ", e, " - ", len(e)) | |
po_dict = dict() | |
obj_dict = dict() | |
if (len(e) > 0): # purchaseObjects | |
for e1 in e: | |
tag_name_sub = et.QName(e1.tag).localname | |
if tag_name_sub == "purchaseObject": | |
for e2 in e1: | |
sub_tag_name2 = et.QName(e2.tag).localname # OKPD2 | |
print("e2: ", e2) | |
for e3 in e2: | |
sub_tag_name3 = et.QName(e3.tag).localname # code | |
sub4 = sub_tag_name2 + "_" + sub_tag_name3 | |
obj_dict[sub4] = e3.text | |
# print("e3: ", e3) | |
# print("e3: ", e3.text) | |
obj_list.append(obj_dict) | |
po_dict["purchaseObjects"] = obj_list | |
# print("\n\nobj_dict: ", obj_dict) | |
# print("\n\nobj_dict: ", obj_dict) | |
# single_lot.append(obj_list) | |
# objs_dict["purchaseObjects"] = objs_dict | |
else: | |
print("_________________eee: ", e) | |
print("obj_list: ", obj_list) | |
# single_lot.append(po_dict) | |
# single_lot.append(item_dict) | |
# item_list.append(single_lot) | |
# item_list.append(objs_dict) | |
# single_lot.append(item_dict) | |
else: # бывают случаи когда у потомков нет вложенности, но его тоже надо забрать | |
print("eeeeeeee ---. ", e) | |
purchaseObjects.append(po_dict) | |
# print("purchaseObjects -----------> ", purchaseObjects) | |
# result[parent_node_name] = purchaseObjects # нам надо вернуть с именем корня: "purchaseObjects": [{},{}] | |
# result["objects"] = purchaseObjects # нам надо вернуть с именем корня: "purchaseObjects": [{},{}] | |
# print("______result: ", purchaseObjects) | |
# sys.exit() | |
return po_dict | |
# Принимаем список секций которые должны выойти в одну таблицу БД. | |
# Важно! Сеции передаются как список т.е. секции могут часть верхнего уровня, а часть вообще в самом конце | |
# Список children_names_list ПОКА общий т.е. БЕЗ учета уровня секции если /code и там и там есть, то будет вытащено оба | |
# т.е. если передать к примеру: | |
# //ns2:fcsNotificationEF/ns:purchaseResponsible/ns:responsibleOrg | |
# //ns2:fcsNotificationEF/ns:lot | |
# то оно вернется в виде единого словаря см. вызывающий код. | |
# Возвращаем их в формате { "root": { parent_child: "value", parent_another_child: "value2"}} | |
# Внимание имя если parent_node больше 16, то урезаем его до первых 8 символов! | |
def xpath_one_level(elements, children_names_list): | |
# print("xpath_one_level call") | |
children_names_list = [name.lower() for name in children_names_list] | |
result = dict() | |
root = dict() | |
for element in elements: | |
# print("element: ", element) | |
parent_node = element[0] | |
# print("parent_node: ", parent_node) | |
parent_node_name = et.QName(parent_node.tag).localname | |
if len(parent_node_name) >= 16: | |
parent_node_name = et.QName(parent_node.tag).localname[:8] # сокращаем имя если оно слишком длинное! Требование Оракл! | |
for el in parent_node: | |
tag_name = et.QName(el.tag).localname | |
if tag_name.lower() in children_names_list: | |
if "date" in tag_name.lower(): | |
root[parent_node_name + "_" + tag_name] = el.text.split("T")[0] | |
else: | |
root[parent_node_name + "_" + tag_name] = el.text | |
result["root"] = root # чтобы у элементов без вложенности были вложены в тег root | |
print("result: ", result) | |
return result | |
def parse_xml(afile, section_name, region_name): | |
# afile = r"D:\code\2019\python_downloader\files\contract_Moskva_2018010100_2018020100_001\contract_0373100084614000196_38900889.xml" | |
etxml = None | |
try: | |
etxml = et.parse(afile) | |
except OSError as e: | |
file_name = Path(afile).name | |
raise OSError("Cant read: " + file_name) | |
root = etxml.getroot() # получаем корневой элемент дерева | |
result_list = [] | |
lnamespaces = {'ns2': 'http://zakupki.gov.ru/oos/export/1', | |
'ns': 'http://zakupki.gov.ru/oos/types/1', | |
'ns7':'http://zakupki.gov.ru/oos/EPtypes/1', | |
'ns6':'http://zakupki.gov.ru/oos/pprf615types/1', | |
'ns7':'http://zakupki.gov.ru/oos/EPtypes/1' | |
} | |
root_element_name = root.xpath("local-name(//ns2:export/*[1])", namespaces=lnamespaces) | |
if "noti" in section_name.lower() and not "Cancel" in afile: # fcsNotificationCancel в данной секции не обрабатываем! у них другас ятруктура. Узнать этот подтип можно только по имени файла | |
# print("\n\nnotice") | |
# судя по документации все остальные секции мы пропустим | |
# if (not "epNotificationEOK" in afile) or ("epNotificationEOKOU" not in afile) or ("epNotificationEOKD" not in afile) or ("epNotificationEZK" not in afile) or ("epNotificationEZP" not in afile): | |
# print("ORA: Судя по документации эту секцию пропустим") | |
# #sys.exit(1) # код именно единица, аначе ответ не будет спаршен!!! | |
# x = root.xpath("//ns2:fcsNotification*") | |
# x = root.xpath("//*[starts-with(local-name(), 'ns2:fcsNotification')]") | |
# x = root.xpath("//*[contains(local-name(), 'fcsNotification')]") | |
# print("==> ", x) | |
# sys.exit() | |
# Секции различаются радикально!!! Тут Иного пути кроме как пытаться догадаться по части имени как обрабатывать секцию нет!!! | |
if "fcsNotification" in root_element_name: # При этом у нас может быть fcsNotificationFoo или любые другие буквы | |
# В Нотайсах множественность у LOTS или OBJECTS. На основани наличия множественного LOTS ветвимся. И ТОГО и Другого я не находил | |
isLotsSectionExists = root.xpath("boolean(//*[contains(local-name(), 'lots')])", namespaces=lnamespaces) | |
if not isLotsSectionExists: # множественные purchaseObjects а не Лоты! | |
#print("fcsNotification Multiple purchaseObjects") | |
top_level_xpath = [ | |
root.xpath("//*[contains(local-name(), 'fcsNotification')]", namespaces=lnamespaces), | |
root.xpath("//*[contains(local-name(), 'fcsNotification')]/ns:purchaseResponsible/ns:responsibleOrg", namespaces=lnamespaces), | |
root.xpath("//*[contains(local-name(), 'fcsNotification')]/ns:lot", namespaces=lnamespaces), # у нас тут один ЛОТ, а не ЛОТЫ! | |
root.xpath("//*[contains(local-name(), 'fcsNotification')]/ns:lot/ns:currency", namespaces=lnamespaces), | |
] | |
top_level_include = ["id", "purchaseNumber", "docPublishDate", "INN", "KPP", "fullName", "factAddress", "maxPrice", "code"] | |
second_level_xpath = root.xpath("//*[contains(local-name(), 'fcsNotification')]/ns:lot/ns:purchaseObjects", namespaces=lnamespaces) | |
second_level_include = ["OKPD2/code", "OKPD2/name", "quantity/value", "price", "KTRU/code", "KTRU/name"] # там где нет OKPD2 есть KTRU, но выше будет хак замены имени на OKPD2 т.к. при вставке нам OKDP нужен | |
result = [ | |
xpath_one_level(top_level_xpath, top_level_include), | |
xpath_two_level(second_level_xpath, second_level_include), | |
] | |
prepare_to_insert(result, section_name, region_name, afile) | |
else: # множественные лоты | |
print("Multiple lots") | |
top_level_xpath = [ | |
root.xpath("//*[contains(local-name(), 'fcsNotification')]", namespaces=lnamespaces), | |
root.xpath("//*[contains(local-name(), 'fcsNotification')]/ns:purchaseResponsible/ns:responsibleOrg", namespaces=lnamespaces), | |
] | |
top_level_include = ["id", "purchaseNumber", "docPublishDate", "INN", "KPP", "fullName", "factAddress", "maxPrice", "code"] | |
second_level_xpath = root.xpath("//*[contains(local-name(), 'fcsNotification')]/ns:lots", namespaces=lnamespaces) | |
second_level_include = ["OKPD2/code", "currency/code", "quantity/value", "maxPrice", "KTRU/code", "lotnumber"] # там где нет OKPD2 есть KTRU, но выше будет хак замены имени на OKPD2 т.к. при вставке нам OKDP нужен | |
third_level_xpath = root.xpath("//*[contains(local-name(), 'fcsNotification')]/ns:lots", namespaces=lnamespaces) | |
third_level_include = ["lot/purchaseObjects", "OKPD2/code", "currency/code", "quantity/value", "maxPrice", "KTRU/code"] | |
result = [ | |
xpath_one_level(top_level_xpath, top_level_include), | |
xpath_two_level(second_level_xpath, second_level_include), | |
xpath_three_level(third_level_xpath, third_level_include), | |
] | |
print("result: ", result) | |
prepare_to_insert(result, section_name, region_name, afile) | |
elif "epNotification" in root_element_name: | |
#print("epNotification") | |
top_level_xpath = [ | |
root.xpath("//*[contains(local-name(), 'epNotification')]", namespaces=lnamespaces), | |
root.xpath("//*[contains(local-name(), 'epNotification')]/ns7:purchaseResponsibleInfo/ns7:responsibleOrgInfo", namespaces=lnamespaces), | |
root.xpath("//*[contains(local-name(), 'epNotification')]/ns7:commonInfo", namespaces=lnamespaces), | |
] | |
top_level_include = ["id", "purchaseNumber", "docPublishDate", "INN", "KPP", "fullName", "factAddress", "maxPrice", "code"] | |
# тут проблема у нас множественность может начинаться с секции с именем notDrugpurchaseObjectsInfo а может с любым другим | |
# поэтому в начале узнаем первого потомка purchaseObjectsInfo, а потом подсунем его в xpath | |
purchase_root_name = root.xpath("local-name(//ns7:purchaseObjectsInfo/*[1])", namespaces=lnamespaces) # notDrugPurchaseObjectsInfo или подобное в котором есть множественность | |
second_level_xpath = root.xpath("//ns7:purchaseObjectsInfo/ns7:" + purchase_root_name, namespaces=lnamespaces) | |
#print("second_level_xpath: ", second_level_xpath) | |
second_level_include = ["OKPD2/code", "quantity/value", "price"] # там где нет OKPD2 есть KTRU, но выше будет хак замены имени на OKPD2 т.к. при вставке нам OKDP нужен | |
result = [ | |
xpath_one_level(top_level_xpath, top_level_include), | |
xpath_two_level(second_level_xpath, second_level_include), | |
] | |
prepare_to_insert(result, section_name, region_name, afile) | |
elif "fcsContractSign" in root_element_name: # это информация о заключении контракта, пока не ясно бьется ли она с основными контрактами. Пропускаем | |
pass | |
#print("ORA: Подтип fcsContractSign пока не обрабатываем") | |
#sys.exit(1) | |
elif "Clarification" in root_element_name: | |
pass | |
#print("ORA: Подтип Clarification (разъяснение документации) не обрабатываем") | |
#sys.exit(1) | |
elif "fcsPlacementResult" in root_element_name: | |
top_level_xpath = [ | |
root.xpath("//ns2:fcsPlacementResult", namespaces=lnamespaces), | |
# root.xpath("//ns2:contract/ns:foundation/ns:fcsOrder/ns:order", namespaces=lnamespaces), | |
] | |
top_level_include = ["purchaseNumber", "lotNumber", "foundationProtocolNumber", "procedurelFailed" ] # procedurelFailed - не опечатка именно так криво он называется | |
second_level_xpath = root.xpath("//ns2:fcsPlacementResult/ns:applications", namespaces=lnamespaces) | |
second_level_include = ["journalNumber", "price", "appRating"] | |
result = [ | |
xpath_one_level(top_level_xpath, top_level_include), | |
xpath_two_level(second_level_xpath, second_level_include), | |
] | |
prepare_to_insert(result, section_name, region_name, afile) | |
#print("ORA: Подтип fcsPlacementResult (Результат проведения процедуры определения поставщика) не обрабатываем") | |
#sys.exit(1) | |
else: | |
pass | |
#print("ORA: Unknown notice type: " + str(root_element_name)) | |
#sys.exit(1) | |
elif "noti" in section_name.lower() and "Cancel" in afile: # Вот тут как раз и обработаем | |
top_level_xpath = [ root.xpath("//ns2:fcsNotificationCancel", namespaces=lnamespaces) ] | |
top_level_include = ["id", "publishDate", "purchaseNumber", "publishDTInETP", "docPublishDate"] | |
result = [ xpath_one_level(top_level_xpath, top_level_include) ] | |
prepare_to_insert(result, section_name, region_name, afile) | |
elif "contractprocedure" in afile.lower(): # | |
pass | |
#print("ORA: contractprocedure (платежки) пока не грузим") | |
#sys.exit(1) | |
""" | |
top_level_xpath = [root.xpath("//ns2:contractProcedure", namespaces=lnamespaces)] | |
top_level_include = ["id", "publishDate", "publishDate"] # по идее можно еще , "currency/code", "paid" но там вложенность | |
result = [ xpath_one_level(top_level_xpath, top_level_include) ] | |
prepare_to_insert(result, section_name, region_name, afile) | |
""" | |
elif "contracts" in section_name.lower() and not "contractprocedure" in afile.lower(): | |
#print("\n\ncontracts") | |
top_level_xpath = [ | |
root.xpath("//ns2:contract", namespaces=lnamespaces), | |
root.xpath("//ns2:contract/ns:foundation/ns:fcsOrder/ns:order", namespaces=lnamespaces), | |
root.xpath("//ns2:contract/ns:customer", namespaces=lnamespaces), | |
root.xpath("//ns2:contract/ns:suppliers/ns:supplier/ns:legalEntityRF", namespaces=lnamespaces), | |
root.xpath("//ns2:contract/ns:priceInfo", namespaces=lnamespaces), | |
root.xpath("//ns2:contract/ns:priceInfo/ns:currency", namespaces=lnamespaces), | |
root.xpath("//ns2:contract/ns:products/ns:product/ns:OKPD2", namespaces=lnamespaces), | |
root.xpath("//ns2:contract/ns:priceInfo", namespaces=lnamespaces), | |
root.xpath("//ns2:contract/ns:priceInfo/ns:currency", namespaces=lnamespaces), | |
root.xpath("//ns2:contract/ns:products/ns:product", namespaces=lnamespaces), | |
] | |
top_level_include = ["id", "publishDate", "purchaseNumber", "notificationNumber", "lotNumber", "inn", "kpp", "fullName", "address", "price", "code", "price", "price", "quantity", "sum" ] | |
# top_level_include = ["id"] | |
second_level_xpath = root.xpath("//ns2:fcsNotificationEF/ns:lot/ns:purchaseObjects", namespaces=lnamespaces) | |
second_level_include = ["OKPD2/code", "OKPD2/name", "quantity/value", "price"] | |
result = [ | |
xpath_one_level(top_level_xpath, top_level_include), | |
] | |
#print("result: ", result) | |
prepare_to_insert(result, section_name, region_name, afile) | |
elif "protocol" in section_name.lower(): | |
#print("\nprotocol") | |
isappParticipantsSectionExists = root.xpath("boolean(//*[contains(local-name(), 'appParticipants')])", namespaces=lnamespaces) | |
print("isappParticipantsSectionExists: ", isappParticipantsSectionExists) | |
# Это немного не те типы. Протоколы рассмотрения заявок и протокол проведения электронного аукциона | |
# top_level_xpath = [] | |
if not isappParticipantsSectionExists: # одиночный appParticipant | |
if "EF1_" in afile: | |
top_level_xpath = [ root.xpath("//ns2:fcsProtocolEF1", namespaces=lnamespaces) | |
] | |
return # пропускам эту секцию | |
elif "EF2_" in afile: | |
top_level_xpath = [ root.xpath("//ns2:fcsProtocolEF2", namespaces=lnamespaces) | |
] | |
return # пропускам эту секцию | |
elif "EF3_" in afile: | |
top_level_xpath = [ root.xpath("//ns2:fcsProtocolEF3", namespaces=lnamespaces), | |
root.xpath("//ns:appParticipant", namespaces=lnamespaces), | |
] | |
elif "OK1_" in afile: | |
top_level_xpath = [ root.xpath("//ns2:fcsProtocolOK1", namespaces=lnamespaces) ] | |
elif "OK2_" in afile: | |
top_level_xpath = [ root.xpath("//ns2:fcsProtocolOK2", namespaces=lnamespaces) ] | |
elif "Evasion_" in afile: # протокол отказа от заключения договора, пропускам его | |
top_level_xpath = [ root.xpath("//ns2:fcsProtocolEvasion", namespaces=lnamespaces), | |
] | |
return # нет смысла продолжать | |
elif "OKOU2_" in afile: | |
top_level_xpath = [ root.xpath("//ns2:fcsProtocolOKOU2", namespaces=lnamespaces), | |
] | |
elif "EFSinglePart_" in afile: | |
top_level_xpath = [ root.xpath("//ns2:fcsProtocolEFSinglePart", namespaces=lnamespaces), | |
] | |
elif "SingleApp" in afile: | |
top_level_xpath = [ root.xpath("//ns2:fcsProtocolEFSingleApp", namespaces=lnamespaces), | |
root.xpath("//ns:appParticipant", namespaces=lnamespaces), | |
] | |
elif "PPI_" in afile: | |
top_level_xpath = [ root.xpath("//ns2:epProtocolEOK3", namespaces=lnamespaces), | |
root.xpath("//ns2:epProtocolEOK3/ns7:commonInfo", namespaces=lnamespaces), | |
] | |
elif "PR615_" in afile: | |
top_level_xpath = [ root.xpath("//ns2:pprf615ProtocolEF1", namespaces=lnamespaces), | |
root.xpath("//ns2:pprf615ProtocolEF1/ns6:commonInfo", namespaces=lnamespaces), | |
] | |
elif "ProtocolZ" in afile: | |
if root_element_name == "epProtocolEZP2": | |
top_level_xpath = [ root.xpath("//ns2:epProtocolEZP2", namespaces=lnamespaces), | |
root.xpath("//ns2:epProtocolEZP2/ns7:commonInfo", namespaces=lnamespaces), | |
] | |
elif root_element_name == "fcsProtocolZP": | |
top_level_xpath = [ root.xpath("//ns2:fcsProtocolZP", namespaces=lnamespaces), | |
] | |
elif root_element_name == "fcsProtocolZK": | |
top_level_xpath = [ root.xpath("//ns2:fcsProtocolZK", namespaces=lnamespaces), | |
] | |
elif root_element_name == "fcsProtocolZPFinal": | |
top_level_xpath = [ root.xpath("//ns2:fcsProtocolZPFinal", namespaces=lnamespaces), | |
] | |
elif root_element_name == "fcsProtocolZPExtract": | |
print("Skipping this section") | |
return | |
else: | |
raise Exception("Unkown root element name: " + root_element_name) | |
elif "PRO_" in afile: | |
top_level_xpath = [ root.xpath("//ns2:epProtocolEZK2", namespaces=lnamespaces), | |
root.xpath("//ns2:epProtocolEZK2/ns7:commonInfo", namespaces=lnamespaces), | |
] | |
elif "VPP_" in afile: | |
top_level_xpath = [ root.xpath("//ns2:epProtocolEZP1Extract", namespaces=lnamespaces), | |
root.xpath("//ns2:epProtocolEZP1Extract/ns7:commonInfo", namespaces=lnamespaces), | |
] | |
elif "Cancel" in afile: | |
raise Exception("fcsProtocolCancel not processing currently") # пока не обрабатываем их. У части есть id, у части нет. Это только к проблемам приводит | |
top_level_xpath = [ root.xpath("//ns2:fcsProtocolCancel", namespaces=lnamespaces) ] | |
else: | |
raise Exception("Unknown subsection for protocols!!") | |
top_level_include = ["id", "publishDate", "purchaseNumber", "publishDTInETP", "docPublishDate", "inn", "kpp", "organizationName", "postAddress"] # если нет publishDate то есть publishDTInETP | |
# docPublishDate только у PR615 и Cancel | |
result = [ | |
xpath_one_level(top_level_xpath, top_level_include), | |
] | |
print("!!!result: ", result) | |
prepare_to_insert(result, section_name, region_name, afile) | |
else: # множественные appParticipants | |
print("we are here") | |
if "EF3_" in afile: | |
top_level_xpath = [ root.xpath("//ns2:fcsProtocolEF3", namespaces=lnamespaces) ] | |
elif "OK1_" in afile: | |
top_level_xpath = [ root.xpath("//ns2:fcsProtocolOK1", namespaces=lnamespaces) ] | |
elif "OK2_" in afile: | |
top_level_xpath = [ root.xpath("//ns2:fcsProtocolOK2", namespaces=lnamespaces) ] | |
elif "Evasion_" in afile: # протокол отказа от заключения договора | |
top_level_xpath = [ root.xpath("//ns2:fcsProtocolEvasion", namespaces=lnamespaces), | |
] | |
return # нет смысла продолжать | |
elif "OKOU2_" in afile: | |
top_level_xpath = [ root.xpath("//ns2:fcsProtocolOKOU2", namespaces=lnamespaces), | |
] | |
elif "EFSinglePart_" in afile: | |
top_level_xpath = [ root.xpath("//ns2:fcsProtocolEFSinglePart", namespaces=lnamespaces), | |
] | |
elif "PPI_" in afile: | |
top_level_xpath = [ root.xpath("//ns2:epProtocolEOK3", namespaces=lnamespaces), | |
root.xpath("//ns2:epProtocolEOK3/ns7:commonInfo", namespaces=lnamespaces), | |
] | |
elif "PR615_" in afile: | |
top_level_xpath = [ root.xpath("//ns2:pprf615ProtocolEF1", namespaces=lnamespaces), | |
root.xpath("//ns2:pprf615ProtocolEF1/ns6:commonInfo", namespaces=lnamespaces), | |
] | |
elif "ProtocolZ" in afile: | |
if root_element_name == "epProtocolEZP2": | |
top_level_xpath = [ root.xpath("//ns2:epProtocolEZP2", namespaces=lnamespaces), | |
root.xpath("//ns2:epProtocolEZP2/ns7:commonInfo", namespaces=lnamespaces), | |
] | |
elif root_element_name == "fcsProtocolZP": | |
top_level_xpath = [ root.xpath("//ns2:fcsProtocolZP", namespaces=lnamespaces), | |
] | |
elif root_element_name == "fcsProtocolZK": | |
top_level_xpath = [ root.xpath("//ns2:fcsProtocolZK", namespaces=lnamespaces), | |
] | |
elif root_element_name == "fcsProtocolZPFinal": | |
top_level_xpath = [ root.xpath("//ns2:fcsProtocolZPFinal", namespaces=lnamespaces), | |
] | |
elif root_element_name == "fcsProtocolZPExtract": | |
print("Skipping this section") | |
return | |
else: | |
raise Exception("Unkown root element name: " + root_element_name) | |
elif "PRO_" in afile: | |
top_level_xpath = [ root.xpath("//ns2:epProtocolEZK2", namespaces=lnamespaces), | |
root.xpath("//ns2:epProtocolEZK2/ns7:commonInfo", namespaces=lnamespaces), | |
] | |
elif "VPP_" in afile: | |
top_level_xpath = [ root.xpath("//ns2:epProtocolEZP1Extract", namespaces=lnamespaces), | |
root.xpath("//ns2:epProtocolEZP1Extract/ns7:commonInfo", namespaces=lnamespaces), | |
] | |
elif "Cancel" in afile: | |
raise Exception("fcsProtocolCancel not processing currently") # пока не обрабатываем их. У части есть id, у части нет. Это только к проблемам приводит | |
top_level_xpath = [ root.xpath("//ns2:fcsProtocolCancel", namespaces=lnamespaces) ] | |
else: | |
raise Exception("Unknown subsection for protocols!!!!") | |
top_level_include = ["id", "publishDate", "purchaseNumber", "publishDTInETP", "docPublishDate"] # если нет publishDate то есть publishDTInETP | |
# docPublishDate только у PR615 и Cancel | |
second_level_xpath = root.xpath("//ns:appParticipants", namespaces=lnamespaces) # проще искать от этого уровня | |
second_level_include = ["inn", "kpp", "organizationName", "postAddress"] | |
result = [ | |
xpath_one_level(top_level_xpath, top_level_include), | |
xpath_two_level(second_level_xpath, second_level_include), | |
] | |
print("+++result: ", result) | |
prepare_to_insert(result, section_name, region_name, afile) | |
else: | |
raise Exception("Unkown file type. Expected contract/protocol/...") | |
# else: | |
# print("Ошибка! Неизвестная секция") | |
# return result, section_name, region_name, afile | |
def prepare_to_insert(result, section_name, region_name, afile): | |
if "noti" in section_name.lower(): | |
sql_504 = "" # объявим заранее | |
#print("----------------- >", afile) | |
if "Cancel" not in afile: | |
# Даже если у нотайса одиночный лот, то вставляем его в ОТДЕЛЬНУЮ таблицу т.к. есть нотайсы где лоты множестве | |
sql_test = """ | |
MERGE INTO NOTIFICATION_FZ44 p | |
USING (SELECT | |
:fcsNotif_id as doc_id, | |
:fcsNotif_purchaseNumber as purchaseNumber, | |
:fcsNotif_docpublishdate as docpublishdate, | |
:responsibleOrg_INN as PLACER_INN, | |
:responsibleOrg_KPP as PLACER_KPP, | |
:responsibleOrg_fullName as PLACER_name, | |
:responsibleOrg_factAddress as PLACER_ADDRESS, | |
:region as region | |
FROM DUAL | |
) v | |
ON (p.purchaseNumber=v.purchaseNumber) | |
WHEN MATCHED THEN | |
UPDATE SET | |
p.docpublishdate = TO_DATE(v.docpublishdate,'YYYY-MM-DD'), | |
p.PLACER_INN = v.PLACER_INN, | |
p.PLACER_KPP = v.PLACER_KPP, | |
p.PLACER_name = v.PLACER_name, | |
p.PLACER_ADDRESS = v.PLACER_ADDRESS, | |
p.region = v.region | |
WHEN NOT MATCHED THEN | |
INSERT (p.doc_id, p.purchaseNumber, p.docpublishdate, p.PLACER_INN, p.PLACER_KPP, p.PLACER_name, p.PLACER_ADDRESS, p.region) | |
VALUES(v.doc_id, v.purchaseNumber, TO_DATE(v.docpublishdate,'YYYY-MM-DD'), v.PLACER_INN, v.PLACER_KPP, v.PLACER_name, v.PLACER_ADDRESS, v.region ) | |
""" | |
sql_test_lots = """ | |
MERGE INTO NOTIFICATION_LOTS_FZ44 p | |
USING (SELECT | |
:fcsNotif_purchaseNumber as purchaseNumber, | |
:lot_maxPrice as lot_maxPrice, | |
:currency_code as lot_currency_code, | |
:lot_lotNumber as lot_Number, | |
:MYUNIQ_ID as MYUNIQ_ID, | |
:lot_type as multylot, | |
:xml_name as xml_name | |
FROM DUAL | |
) v | |
ON (p.myuniq_id=v.myuniq_id) | |
WHEN MATCHED THEN | |
UPDATE SET | |
p.purchaseNumber = v.purchaseNumber, | |
p.lot_maxPrice = v.lot_maxPrice, | |
p.lot_currency_code = v.lot_currency_code, | |
p.multylot = v.multylot, | |
p.xml_name = v.xml_name | |
WHEN NOT MATCHED THEN | |
INSERT (p.purchaseNumber, p.lot_maxPrice, p.lot_currency_code, p.MYUNIQ_ID, p.lot_Number, p.xml_name, p.multylot) | |
VALUES(v.purchaseNumber, v.lot_maxPrice, v.lot_currency_code, v.MYUNIQ_ID, v.lot_Number, v.xml_name, v.multylot) | |
""" | |
sql_test_2 = """ | |
MERGE INTO NOTIFICATION_OBJS p | |
USING (SELECT | |
:myuniq_id as myuniq_id, | |
:fcsNotif_purchaseNumber as purchaseNumber, | |
:OKPD2_code as OKPD2_code, | |
:OKPD2_name as OKPD2_name | |
FROM DUAL | |
) v | |
ON (p.myuniq_id=v.myuniq_id AND p.OKPD2_code=v.OKPD2_code) | |
WHEN MATCHED THEN | |
UPDATE SET | |
p.purchaseNumber = v.purchaseNumber, | |
-- p.OKPD2_code = v.OKPD2_code, | |
p.OKPD2_name = v.OKPD2_name | |
WHEN NOT MATCHED THEN | |
INSERT (p.myuniq_id, p.purchaseNumber, p.OKPD2_code, p.OKPD2_name) | |
VALUES(v.myuniq_id, v.purchaseNumber, v.OKPD2_code, v.OKPD2_name) | |
""" | |
if "NotificationCancel" in afile: # внимание тут есть DOC_ID | |
sql_test = """ | |
MERGE INTO Notification_CANCEL_FZ44 p | |
USING (SELECT | |
:fcsNotif_id as doc_id, | |
:fcsNotif_purchaseNumber as PURCHASENUMBER, | |
:fcsNotif_docPublishDate as PUBLISHDATE, | |
:region as region | |
FROM DUAL | |
) v | |
ON (p.PURCHASENUMBER=v.PURCHASENUMBER) | |
WHEN MATCHED THEN | |
UPDATE SET | |
p.doc_id = v.doc_id, | |
p.PUBLISHDATE = TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), | |
p.region = v.region | |
WHEN NOT MATCHED THEN | |
INSERT (p.doc_id, p.PURCHASENUMBER, p.PUBLISHDATE, p.region) | |
VALUES (v.doc_id, v.PURCHASENUMBER, TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), v.region) | |
""" | |
if "NotificationZK504" in afile or "NotificationZP504" in afile: | |
sql_504 = """ | |
MERGE INTO NOTIFICATION_FZ44 p | |
USING (SELECT | |
:epNotifi_id as doc_id, | |
:commonInfo_purchaseNumber as purchaseNumber, | |
:responsi_INN as PLACER_INN, | |
:responsi_KPP as PLACER_KPP, | |
:responsi_fullName as PLACER_name, | |
:responsi_factAddress as PLACER_ADDRESS, | |
:region as region | |
FROM DUAL | |
) v | |
ON (p.purchaseNumber=v.purchaseNumber) | |
WHEN MATCHED THEN | |
UPDATE SET | |
p.PLACER_INN = v.PLACER_INN, | |
p.PLACER_KPP = v.PLACER_KPP, | |
p.PLACER_name = v.PLACER_name, | |
p.PLACER_ADDRESS = v.PLACER_ADDRESS, | |
p.region = v.region | |
WHEN NOT MATCHED THEN | |
INSERT (p.doc_id, p.purchaseNumber, p.PLACER_INN, p.PLACER_KPP, p.PLACER_name, p.PLACER_ADDRESS, p.region) | |
VALUES(v.doc_id, v.purchaseNumber, v.PLACER_INN, v.PLACER_KPP, v.PLACER_name, v.PLACER_ADDRESS, v.region ) | |
""" | |
if "NotificationOK504" in afile: | |
sql_504 = """ | |
MERGE INTO NOTIFICATION_FZ44 p | |
USING (SELECT | |
:epNotifi_id as doc_id, | |
:commonInfo_purchaseNumber as purchaseNumber, | |
:responsi_INN as PLACER_INN, | |
:responsi_KPP as PLACER_KPP, | |
:responsi_fullName as PLACER_name, | |
:responsi_factAddress as PLACER_ADDRESS, | |
:region as region | |
FROM DUAL | |
) v | |
ON (p.purchaseNumber=v.purchaseNumber) | |
WHEN MATCHED THEN | |
UPDATE SET | |
p.PLACER_INN = v.PLACER_INN, | |
p.PLACER_KPP = v.PLACER_KPP, | |
p.PLACER_name = v.PLACER_name, | |
p.PLACER_ADDRESS = v.PLACER_ADDRESS, | |
p.region = v.region | |
WHEN NOT MATCHED THEN | |
INSERT (p.doc_id, p.purchaseNumber, p.PLACER_INN, p.PLACER_KPP, p.PLACER_name, p.PLACER_ADDRESS, p.region) | |
VALUES(v.doc_id, v.purchaseNumber, v.PLACER_INN, v.PLACER_KPP, v.PLACER_name, v.PLACER_ADDRESS, v.region ) | |
""" | |
if "PlacementResult" in afile: | |
sql_504 = """ | |
MERGE INTO NOTIFICATION_RESULT_FZ44 p | |
USING (SELECT | |
:fcsPlace_purchaseNumber as PURCHASENUMBER, | |
:fcsPlace_lotNumber as LOTNUMBER, | |
:fcsPlace_foundationProtNumber as FOUNDATION_PROTOCOLNUMBER, | |
:fcsPlace_procedurelFailed as PROCEDURELFAILED, | |
:application_journalNumber as APPLICATION_JOURNALNUMBER, | |
:application_appRating as APPLICATION_RATING, | |
:application_price as APPLICATION_PRICE, | |
:region as region | |
FROM DUAL | |
) v | |
ON (p.purchaseNumber=v.purchaseNumber) | |
WHEN MATCHED THEN | |
UPDATE SET | |
p.LOTNUMBER = v.LOTNUMBER, | |
p.FOUNDATION_PROTOCOLNUMBER = v.FOUNDATION_PROTOCOLNUMBER, | |
p.PROCEDURELFAILED = v.PROCEDURELFAILED, | |
p.APPLICATION_JOURNALNUMBER = v.APPLICATION_JOURNALNUMBER, | |
p.APPLICATION_RATING = v.APPLICATION_RATING, | |
p.APPLICATION_PRICE = v.APPLICATION_PRICE, | |
p.region = v.region | |
WHEN NOT MATCHED THEN | |
INSERT (p.purchaseNumber, p.LOTNUMBER, p.FOUNDATION_PROTOCOLNUMBER, p.PROCEDURELFAILED, p.APPLICATION_JOURNALNUMBER, p.APPLICATION_RATING, p.APPLICATION_PRICE, p.region) | |
VALUES(v.purchaseNumber, v.LOTNUMBER, v.FOUNDATION_PROTOCOLNUMBER, v.PROCEDURELFAILED, v.APPLICATION_JOURNALNUMBER, v.APPLICATION_RATING, v.APPLICATION_PRICE, v.region) | |
""" | |
# Идей лучше чем эту сецию сделать исключительной до тех пор пока не ясно сколько вообще подобных секций будет нет | |
if "NotificationOK504" in afile: | |
result[0]["root"]["region"] = region_name | |
# print("we are here ", type(result[0]["root"])) | |
save_to_ora(sql_504, result[0]["root"]) | |
if "NotificationZK504" in afile or "NotificationZP504" in afile: | |
result[0]["root"]["region"] = region_name | |
# print("we are here ", type(result[0]["root"])) | |
save_to_ora(sql_504, result[0]["root"]) | |
if "fcsPlacementResult" in afile: | |
result[0]["root"]["region"] = region_name | |
fcsPlace_purchaseNumber = result[0]["root"]["fcsPlace_purchaseNumber"] | |
fcsPlace_lotNumber = result[0]["root"]["fcsPlace_lotNumber"] | |
fcsPlace_foundationProtocolNumber = result[0]["root"]["fcsPlace_foundationProtocolNumber"] | |
fcsPlace_procedurelFailed = result[0]["root"]["fcsPlace_procedurelFailed"] | |
region = result[0]["root"]["region"] | |
if "applications" in str(result): # т.к. все данные мы вставляем в одну таблицу, то идем от обратного и все данные из root мы подставляем в result | |
for el in result[1]['applications']: # В теории у нас может быть множественность | |
el["region"] = region | |
el["fcsPlace_procedurelFailed"] = fcsPlace_procedurelFailed | |
el["fcsPlace_foundationProtNumber"] = fcsPlace_foundationProtocolNumber # название сокращено | |
el["fcsPlace_lotNumber"] = fcsPlace_lotNumber | |
el["fcsPlace_purchaseNumber"] = fcsPlace_purchaseNumber | |
el["fcsPlace_procedurelFailed"] = fcsPlace_procedurelFailed | |
if not "application_appRating" in el: | |
el["application_appRating"] = "" | |
save_to_ora(sql_504, el) | |
# save_to_ora(sql_504, result[0]["root"]) | |
if not "504_" in afile and not "fcsPlacementResult" in afile: # обрабатываем случаи NotificationOK504_ и NotificationK504_ | |
external_key = None # во вложенном словаре нужно будет знать кто его родитель, нужно для foreign_key | |
# Нужно проверить а может у нас уже есть LOTS! | |
# тут нужно модифицировать оригинальный словарь, чтобы он выглядел так, как будто бы там есть lots пусть и с одним элементом, иначе это никак не унифицировать sql_test_lots | |
#print("____________________result: ", result) | |
if not "lots" in result[1]: | |
result[0]["lots"] = [] # создаем в корне секцию лотов. Теперь у нас там еще и root и purchaseObjects , в данном случае в ней не будет множественности, но она нужна нам | |
temp_dict = {} | |
for k,v in result[0]["root"].items(): | |
if k == "lot_maxPrice": | |
temp_dict["lot_maxPrice"] = v | |
lot_maxPrice = dict() | |
if k == "currency_code": | |
temp_dict["currency_code"] = v | |
temp_dict["OKPD2_code"] = "" # чтобы в запросе на вставку было нужное количество арнументов подсунем пустышку | |
temp_dict["lot_type"] = "single" # нет вложенных элементов и разные ОКВЕДов | |
temp_dict["lot_lotNumber"] = "1" # там где нет lot_lotNumber всегда ставим единицу т.к. она нужна будет для формирования ключа | |
result[0]["lots"].append(temp_dict) | |
# Удаляем из оригинала поля которые мы уже засунули во вложенный массив | |
del result[0]["root"]["lot_maxPrice"] | |
del result[0]["root"]["currency_code"] | |
else: # если у нас множественность, то надо пройтись по всем элементам и поставить там флаг малтипл. Типа в документе куча лотов | |
for el in result[1]["lots"]: | |
el["lot_type"] = "multiple" | |
print("\n\n\n\n\n\nresult: ", result) | |
print("________________") | |
myuniqid = "" | |
for r in result: | |
print("rrrrrrrrrrrrr: ", r) | |
for k,v in r.items(): # k - название крупных разделов | |
if k == "root": | |
external_key = v["fcsNotif_purchaseNumber"] # сохраним внешний ключ, который будем использовать во внутреннем словаре | |
v["region"] = region_name | |
save_to_ora(sql_test, v) | |
if k == "lots": | |
for el in v: | |
el["fcsNotif_purchaseNumber"] = external_key | |
myuniqid = str(external_key) + "_" + str(el["lot_lotNumber"]) # PURCHASENUMBER плюс LOT_NUMBER в итоге связка будет уникальной | |
el["myuniq_id"] = myuniqid | |
if not "currency_code" in el: | |
el["currency_code"] = "" | |
if not "lot_maxPrice" in el: | |
el["lot_maxPrice"] = "" | |
if "OKPD2_code" in el: | |
del el["OKPD2_code"] # бывает захватывается. Почему именно нужно разбираться | |
el["xml_name"] = Path(afile).name | |
# el["lot_type"] = "multiple" # ПОЧИНИТЬ ЭТУ ЧАСТЬ. Сейчас флаг выставляется везде без разбора! чтобы видеть, что у нас лоты множественные, а не те которые из одиночной секции lot взяты | |
save_to_ora(sql_test_lots, el) | |
if k == "purchaseObjects": | |
for el in v: | |
el["myuniq_id"] = myuniqid | |
el["fcsNotif_purchaseNumber"] = external_key | |
if not "quantity_value" in el: | |
el["quantity_value"] = "" | |
if "OKEI_nationalCode" in el: # удаляем случайно захваченные теги | |
del el["OKEI_nationalCode"] | |
if "OKEI_code" in el: | |
del el["OKEI_code"] | |
if "purchaseObject_price" in el: # тоже захватывается, но не нужно цена у нас в лоте есть | |
del el["purchaseObject_price"] | |
if "customerQuantities" in el: # чистим разный попавший сюда мусор | |
del el["customerQuantities"] | |
if "customerQuantities_customerQuantity" in el: | |
del el["customerQuantities_customerQuantity"] | |
if "quantity_value" in el: # хотя мы и собираем количество, но оно тут будет не корректным т.к. окведы у товаров часто совпадают | |
del el["quantity_value"] # пример notification_Moskva_2018010100_2018020100_002/fcsNotificationEA44_0873500000817000938_14959861.xml | |
save_to_ora(sql_test_2, el) | |
elif "contractprocedure" in afile.lower(): | |
sql_test = """ | |
MERGE INTO CONTRACT_PROCEDURE_FZ44 p | |
USING (SELECT | |
:contract_id as doc_id, | |
:contract_publishDate as PUBLISHDATE, | |
:region as region | |
FROM DUAL | |
) v | |
ON (p.doc_id=v.doc_id) | |
WHEN MATCHED THEN | |
UPDATE SET | |
p.PUBLISHDATE = TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), | |
p.region = v.region | |
WHEN NOT MATCHED THEN | |
INSERT (p.doc_id, p.PUBLISHDATE, p.region) | |
VALUES(v.doc_id, TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), v.region) | |
""" | |
for r in result: | |
for k,v in r.items(): # k - название крупных разделов | |
if k == "root": | |
v["region"] = region_name | |
save_to_ora(sql_test, v) | |
elif "contracts" in section_name.lower() and not "contractprocedure" in afile.lower(): | |
sql_test = """ | |
MERGE INTO CONTRACT_FZ44 p | |
USING (SELECT | |
:contract_id as doc_id, | |
:contract_publishDate as PUBLISHDATE, | |
:order_notificationNumber as NOTIFICATIONNUMBER, | |
:order_lotNumber as lotNumber, | |
:customer_inn as CUSTOMER_INN, | |
:customer_kpp as CUSTOMER_KPP, | |
:customer_fullName as CUSTOMER_NAME, | |
:legalEntityRF_INN as SUPPLIER_INN, | |
:legalEntityRF_KPP as SUPPLIER_KPP, | |
:legalEntityRF_fullName as SUPPLIER_NAME, | |
:legalEntityRF_address as SUPPLIER_ADDRESS, | |
:priceInfo_price as PRICE, | |
:CURRENCY_CODE as CURRENCY_CODE, | |
:OKPD2_code as PRODUCT_OKPD2_CODE, | |
:product_price as PRODUCT_PRICE, | |
:product_quantity as PRODUCT_QUANTITY, | |
:product_sum as PRODUCT_SUM, | |
:region as region, | |
:file_name as file_name | |
FROM DUAL | |
) v | |
ON (p.NOTIFICATIONNUMBER=v.NOTIFICATIONNUMBER) | |
WHEN MATCHED THEN | |
UPDATE SET | |
p.PUBLISHDATE = TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), | |
p.lotNumber = v.lotNumber, | |
p.CUSTOMER_INN = v.CUSTOMER_INN, | |
p.CUSTOMER_KPP = v.CUSTOMER_KPP, | |
p.CUSTOMER_NAME = v.CUSTOMER_NAME, | |
p.SUPPLIER_INN = v.SUPPLIER_INN, | |
p.SUPPLIER_KPP = v.SUPPLIER_KPP, | |
p.SUPPLIER_NAME = v.SUPPLIER_NAME, | |
p.SUPPLIER_ADDRESS = v.SUPPLIER_ADDRESS, | |
p.PRICE = v.PRICE, | |
p.CURRENCY_CODE = v.CURRENCY_CODE, | |
p.PRODUCT_OKPD2_CODE = v.PRODUCT_OKPD2_CODE, | |
p.PRODUCT_PRICE = v.PRODUCT_PRICE, | |
p.PRODUCT_QUANTITY = v.PRODUCT_QUANTITY, | |
p.PRODUCT_SUM = v.PRODUCT_SUM, | |
p.region = v.region, | |
p.file_name = v.file_name | |
WHEN NOT MATCHED THEN | |
INSERT (p.doc_id, p.PUBLISHDATE, p.NOTIFICATIONNUMBER, p.lotNumber, p.CUSTOMER_INN, p.CUSTOMER_KPP, p.CUSTOMER_NAME, p.SUPPLIER_INN, p.SUPPLIER_KPP, p.SUPPLIER_NAME, p.SUPPLIER_ADDRESS, p.PRICE, p.CURRENCY_CODE, p.PRODUCT_OKPD2_CODE, p.PRODUCT_PRICE, p.PRODUCT_QUANTITY, p.PRODUCT_SUM, p.region, p.file_name) | |
VALUES(v.doc_id, TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), v.NOTIFICATIONNUMBER, v.lotNumber, v.CUSTOMER_INN, v.CUSTOMER_KPP, v.CUSTOMER_NAME, v.SUPPLIER_INN, v.SUPPLIER_KPP, v.SUPPLIER_NAME, v.SUPPLIER_ADDRESS, v.PRICE, v.CURRENCY_CODE, v.PRODUCT_OKPD2_CODE, v.PRODUCT_PRICE, v.PRODUCT_QUANTITY, v.PRODUCT_SUM, v.region, v.file_name) | |
""" | |
for r in result: | |
for k,v in r.items(): # k - название крупных разделов | |
if k == "root": | |
v["region"] = region_name | |
v["file_name"] = Path(afile).name | |
save_to_ora(sql_test, v) | |
elif "protocols" in section_name.lower(): | |
sql_test = "" | |
sql_suppliers = "" | |
# В перспективе можно не привязываться к имени файла, но тогда нужно выше вводить понятие подтип. | |
if "EF1_" in afile: | |
sql_test = """ | |
MERGE INTO PROTOCOLS_FZ44 p | |
USING (SELECT | |
:fcsProtocolEF1_id as doc_id, | |
:fcsProtocolEF1_purchaseNumber as PURCHASENUMBER, | |
:fcsProtocolEF1_publishDate as PUBLISHDATE, | |
:region as region, | |
:file_name as file_name | |
FROM DUAL | |
) v | |
ON (p.PURCHASENUMBER=v.PURCHASENUMBER) | |
WHEN MATCHED THEN | |
UPDATE SET | |
p.PUBLISHDATE = TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), | |
p.region = v.region, | |
p.file_name = v.file_name | |
WHEN NOT MATCHED THEN | |
INSERT (p.doc_id, p.PURCHASENUMBER, p.PUBLISHDATE, p.region, p.file_name) | |
VALUES(v.doc_id, v.PURCHASENUMBER, TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), v.region, v.file_name) | |
""" | |
elif "EF2_" in afile: | |
sql_test = """ | |
MERGE INTO PROTOCOLS_FZ44 p | |
USING (SELECT | |
:fcsProtocolEF2_id as doc_id, | |
:fcsProtocolEF2_purchaseNumber as PURCHASENUMBER, | |
:fcsProtocolEF2_publishDate as PUBLISHDATE, | |
:region as region, | |
:file_name as file_name | |
FROM DUAL | |
) v | |
ON (p.PURCHASENUMBER=v.PURCHASENUMBER) | |
WHEN MATCHED THEN | |
UPDATE SET | |
p.PUBLISHDATE = TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), | |
p.region = v.region, | |
p.file_name = v.file_name | |
WHEN NOT MATCHED THEN | |
INSERT (p.doc_id, p.PURCHASENUMBER, p.PUBLISHDATE, p.region, p.file_name) | |
VALUES(v.doc_id, v.PURCHASENUMBER, TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), v.region, v.file_name) | |
""" | |
elif "EF3_" in afile: | |
sql_test = """ | |
MERGE INTO PROTOCOLS_FZ44 p | |
USING (SELECT | |
:fcsProtocolEF3_id as doc_id, | |
:fcsProtocolEF3_purchaseNumber as PURCHASENUMBER, | |
:fcsProtocolEF3_publishDate as PUBLISHDATE, | |
:region as region, | |
:file_name as file_name | |
FROM DUAL | |
) v | |
ON (p.PURCHASENUMBER=v.PURCHASENUMBER) | |
WHEN MATCHED THEN | |
UPDATE SET | |
p.PUBLISHDATE = TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), | |
p.region = v.region, | |
p.file_name = v.file_name | |
WHEN NOT MATCHED THEN | |
INSERT (p.doc_id, p.PURCHASENUMBER, p.PUBLISHDATE, p.region, p.file_name) | |
VALUES(v.doc_id, v.PURCHASENUMBER, TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), v.region, v.file_name) | |
""" | |
elif "OK1_" in afile: | |
sql_test = """ | |
MERGE INTO PROTOCOLS_FZ44 p | |
USING (SELECT | |
:fcsProtocolOK1_id as doc_id, | |
:fcsProtocolOK1_purchaseNumber as PURCHASENUMBER, | |
:fcsProtocolOK1_publishDate as PUBLISHDATE, | |
:region as region, | |
:file_name as file_name | |
FROM DUAL | |
) v | |
ON (p.PURCHASENUMBER=v.PURCHASENUMBER) | |
WHEN MATCHED THEN | |
UPDATE SET | |
p.PUBLISHDATE = TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), | |
p.region = v.region, | |
p.file_name = v.file_name | |
WHEN NOT MATCHED THEN | |
INSERT (p.doc_id, p.PURCHASENUMBER, p.PUBLISHDATE, p.region, p.file_name) | |
VALUES(v.doc_id, v.PURCHASENUMBER, TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), v.region, v.file_name) | |
""" | |
elif "OK2_" in afile: | |
sql_test = """ | |
MERGE INTO PROTOCOLS_FZ44 p | |
USING (SELECT | |
:fcsProtocolOK2_id as doc_id, | |
:fcsProtocolOK2_purchaseNumber as PURCHASENUMBER, | |
:fcsProtocolOK2_publishDate as PUBLISHDATE, | |
:region as region, | |
:file_name as file_name | |
FROM DUAL | |
) v | |
ON (p.PURCHASENUMBER=v.PURCHASENUMBER) | |
WHEN MATCHED THEN | |
UPDATE SET | |
p.PUBLISHDATE = TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), | |
p.region = v.region, | |
p.file_name = v.file_name | |
WHEN NOT MATCHED THEN | |
INSERT (p.doc_id, p.PURCHASENUMBER, p.PUBLISHDATE, p.region, p.file_name) | |
VALUES(v.doc_id, v.PURCHASENUMBER, TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), v.region, v.file_name) | |
""" | |
elif "PZP_" in afile: | |
sql_test = """ | |
MERGE INTO PROTOCOLS_FZ44 p | |
USING (SELECT | |
:epProtocolEZP2_id as doc_id, | |
:commonInfo_purchaseNumber as PURCHASENUMBER, | |
SUBSTR(:commonInfo_publishDTInETP, 0, 10) as PUBLISHDATE, | |
:region as region, | |
:file_name as file_name | |
FROM DUAL | |
) v | |
ON (p.PURCHASENUMBER=v.PURCHASENUMBER) | |
WHEN MATCHED THEN | |
UPDATE SET | |
p.PUBLISHDATE = TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), | |
p.region = v.region, | |
p.file_name = v.file_name | |
WHEN NOT MATCHED THEN | |
INSERT (p.doc_id, p.PURCHASENUMBER, p.PUBLISHDATE, p.region, p.file_name) | |
VALUES(v.doc_id, v.PURCHASENUMBER, TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), v.region, v.file_name) | |
""" | |
elif "ProtocolCancel" in afile: | |
sql_test = """ | |
MERGE INTO PROTOCOLS_CANCEL_FZ44 p | |
USING (SELECT | |
:fcsProto_purchaseNumber as PURCHASENUMBER, | |
:fcsProto_docPublishDate as PUBLISHDATE, | |
:region as region, | |
:file_name as file_name | |
FROM DUAL | |
) v | |
ON (p.PURCHASENUMBER=v.PURCHASENUMBER) | |
WHEN MATCHED THEN | |
UPDATE SET | |
p.PUBLISHDATE = TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), | |
p.region = v.region, | |
p.file_name = v.file_name | |
WHEN NOT MATCHED THEN | |
INSERT (p.PURCHASENUMBER, p.PUBLISHDATE, p.region, p.file_name) | |
VALUES (v.PURCHASENUMBER, TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), v.region, v.file_name) | |
""" | |
# print(sql_test) | |
elif "Evasion_" in afile: | |
sql_test = """ | |
MERGE INTO PROTOCOLS_FZ44 p | |
USING (SELECT | |
:fcsProto_id as doc_id, | |
:fcsProto_purchaseNumber as PURCHASENUMBER, | |
:fcsProto_publishDate as PUBLISHDATE, | |
:region as region, | |
:file_name as file_name | |
FROM DUAL | |
) v | |
ON (p.PURCHASENUMBER=v.PURCHASENUMBER) | |
WHEN MATCHED THEN | |
UPDATE SET | |
p.PUBLISHDATE = TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), | |
p.region = v.region, | |
p.file_name = v.file_name | |
WHEN NOT MATCHED THEN | |
INSERT (p.doc_id, p.PURCHASENUMBER, p.PUBLISHDATE, p.region, p.file_name) | |
VALUES(v.doc_id, v.PURCHASENUMBER, TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), v.region, v.file_name) | |
""" | |
elif "VPP_" in afile: | |
sql_test = """ | |
MERGE INTO PROTOCOLS_FZ44 p | |
USING (SELECT | |
:epProtoc_id as doc_id, | |
:commonInfo_purchaseNumber as PURCHASENUMBER, | |
SUBSTR(:commonInfo_publishDTInETP, 0, 10) as PUBLISHDATE, | |
:region as region, | |
:file_name as file_name | |
FROM DUAL | |
) v | |
ON (p.PURCHASENUMBER=v.PURCHASENUMBER) | |
WHEN MATCHED THEN | |
UPDATE SET | |
p.PUBLISHDATE = TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), | |
p.region = v.region, | |
p.file_name = v.file_name | |
WHEN NOT MATCHED THEN | |
INSERT (p.doc_id, p.PURCHASENUMBER, p.PUBLISHDATE, p.region, p.file_name) | |
VALUES(v.doc_id, v.PURCHASENUMBER, TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), v.region, v.file_name) | |
""" | |
elif "PR615_" in afile: | |
sql_test = """ | |
MERGE INTO PROTOCOLS_FZ44 p | |
USING (SELECT | |
:pprf615P_id as doc_id, | |
:commonInfo_purchaseNumber as PURCHASENUMBER, | |
:commonInfo_docPublishDate as PUBLISHDATE, | |
:region as region, | |
:file_name as file_name | |
FROM DUAL | |
) v | |
ON (p.PURCHASENUMBER=v.PURCHASENUMBER) | |
WHEN MATCHED THEN | |
UPDATE SET | |
p.PUBLISHDATE = TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), | |
p.region = v.region, | |
p.file_name = v.file_name | |
WHEN NOT MATCHED THEN | |
INSERT (p.doc_id, p.PURCHASENUMBER, p.PUBLISHDATE, p.region, p.file_name) | |
VALUES(v.doc_id, v.PURCHASENUMBER, TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), v.region, v.file_name) | |
""" | |
elif "PRO_" in afile: | |
sql_test = """ | |
MERGE INTO PROTOCOLS_FZ44 p | |
USING (SELECT | |
:epProtocolEZK2_id as doc_id, | |
:commonInfo_purchaseNumber as PURCHASENUMBER, | |
SUBSTR(:commonInfo_publishDTInETP, 0, 10) as PUBLISHDATE, | |
:region as region, | |
:file_name as file_name | |
FROM DUAL | |
) v | |
ON (p.PURCHASENUMBER=v.PURCHASENUMBER) | |
WHEN MATCHED THEN | |
UPDATE SET | |
p.PUBLISHDATE = TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), | |
p.region = v.region, | |
p.file_name = v.file_name | |
WHEN NOT MATCHED THEN | |
INSERT (p.doc_id, p.PURCHASENUMBER, p.PUBLISHDATE, p.region, p.file_name) | |
VALUES(v.doc_id, v.PURCHASENUMBER, TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), v.region, v.file_name) | |
""" | |
elif "PPI_" in afile: | |
sql_test = """ | |
MERGE INTO PROTOCOLS_FZ44 p | |
USING (SELECT | |
:epProtocolEOK3_id as doc_id, | |
:commonInfo_purchaseNumber as PURCHASENUMBER, | |
SUBSTR(:commonInfo_publishDTInETP, 0, 10) as PUBLISHDATE, | |
:region as region, | |
:file_name as file_name | |
FROM DUAL | |
) v | |
ON (p.PURCHASENUMBER=v.PURCHASENUMBER) | |
WHEN MATCHED THEN | |
UPDATE SET | |
p.PUBLISHDATE = TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), | |
p.region = v.region, | |
p.file_name = v.file_name | |
WHEN NOT MATCHED THEN | |
INSERT (p.doc_id, p.PURCHASENUMBER, p.PUBLISHDATE, p.region, p.file_name) | |
VALUES(v.doc_id, v.PURCHASENUMBER, TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), v.region, v.file_name) | |
""" | |
elif "ProtocolZ" in afile: | |
if "fcsProtocolZP" in afile and not "fcsProtocolZPFinal" in afile: | |
sql_test = """ | |
MERGE INTO PROTOCOLS_FZ44 p | |
USING (SELECT | |
:fcsProtocolZP_id as doc_id, | |
:fcsProtocolZP_purchaseNumber as PURCHASENUMBER, | |
SUBSTR(:fcsProtocolZP_publishDate, 0, 10) as PUBLISHDATE, | |
:region as region, | |
:file_name as file_name | |
FROM DUAL | |
) v | |
ON (p.PURCHASENUMBER=v.PURCHASENUMBER) | |
WHEN MATCHED THEN | |
UPDATE SET | |
p.PUBLISHDATE = TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), | |
p.region = v.region, | |
p.file_name = v.file_name | |
WHEN NOT MATCHED THEN | |
INSERT (p.doc_id, p.PURCHASENUMBER, p.PUBLISHDATE, p.region, p.file_name) | |
VALUES(v.doc_id, v.PURCHASENUMBER, TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), v.region, v.file_name) | |
""" | |
elif "fcsProtocolZK" in afile: | |
sql_test = """ | |
MERGE INTO PROTOCOLS_FZ44 p | |
USING (SELECT | |
:fcsProtocolZK_id as doc_id, | |
:fcsProtocolZK_purchaseNumber as PURCHASENUMBER, | |
SUBSTR(:fcsProtocolZK_publishDate, 0, 10) as PUBLISHDATE, | |
:region as region, | |
:file_name as file_name | |
FROM DUAL | |
) v | |
ON (p.PURCHASENUMBER=v.PURCHASENUMBER) | |
WHEN MATCHED THEN | |
UPDATE SET | |
p.PUBLISHDATE = TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), | |
p.region = v.region, | |
p.file_name = v.file_name | |
WHEN NOT MATCHED THEN | |
INSERT (p.doc_id, p.PURCHASENUMBER, p.PUBLISHDATE, p.region, p.file_name) | |
VALUES(v.doc_id, v.PURCHASENUMBER, TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), v.region, v.file_name) | |
""" | |
elif "fcsProtocolZPFinal" in afile: | |
sql_test = """ | |
MERGE INTO PROTOCOLS_FZ44 p | |
USING (SELECT | |
:fcsProto_id as doc_id, | |
:fcsProto_purchaseNumber as PURCHASENUMBER, | |
SUBSTR(:fcsProto_publishDate, 0, 10) as PUBLISHDATE, | |
:region as region, | |
:file_name as file_name | |
FROM DUAL | |
) v | |
ON (p.PURCHASENUMBER=v.PURCHASENUMBER) | |
WHEN MATCHED THEN | |
UPDATE SET | |
p.PUBLISHDATE = TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), | |
p.region = v.region, | |
p.file_name = v.file_name | |
WHEN NOT MATCHED THEN | |
INSERT (p.doc_id, p.PURCHASENUMBER, p.PUBLISHDATE, p.region, p.file_name) | |
VALUES(v.doc_id, v.PURCHASENUMBER, TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), v.region, v.file_name) | |
""" | |
else: | |
sql_test = """ | |
MERGE INTO PROTOCOLS_FZ44 p | |
USING (SELECT | |
:epProtocolEZP2_id as doc_id, | |
:commonInfo_purchaseNumber as PURCHASENUMBER, | |
SUBSTR(:commonInfo_publishDTInETP, 0, 10) as PUBLISHDATE, | |
:region as region, | |
:file_name as file_name | |
FROM DUAL | |
) v | |
ON (p.PURCHASENUMBER=v.PURCHASENUMBER) | |
WHEN MATCHED THEN | |
UPDATE SET | |
p.PUBLISHDATE = TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), | |
p.region = v.region, | |
p.file_name = v.file_name | |
WHEN NOT MATCHED THEN | |
INSERT (p.doc_id, p.PURCHASENUMBER, p.PUBLISHDATE, p.region, p.file_name) | |
VALUES(v.doc_id, v.PURCHASENUMBER, TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), v.region, v.file_name) | |
""" | |
elif "OKOU2_" in afile: | |
sql_test = """ | |
MERGE INTO PROTOCOLS_FZ44 p | |
USING (SELECT | |
:fcsProto_id as doc_id, | |
:fcsProto_purchaseNumber as PURCHASENUMBER, | |
:fcsProto_publishDate as PUBLISHDATE, | |
:region as region, | |
:file_name as file_name | |
FROM DUAL | |
) v | |
ON (p.PURCHASENUMBER=v.PURCHASENUMBER) | |
WHEN MATCHED THEN | |
UPDATE SET | |
p.PUBLISHDATE = TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), | |
p.region = v.region, | |
p.file_name = v.file_name | |
WHEN NOT MATCHED THEN | |
INSERT (p.doc_id, p.PURCHASENUMBER, p.PUBLISHDATE, p.region, p.file_name) | |
VALUES(v.doc_id, v.PURCHASENUMBER, TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), v.region, v.file_name) | |
""" | |
elif "Single" in afile: | |
sql_test = """ | |
MERGE INTO PROTOCOLS_FZ44 p | |
USING (SELECT | |
:fcsProto_id as doc_id, | |
:fcsProto_purchaseNumber as PURCHASENUMBER, | |
:fcsProto_publishDate as PUBLISHDATE, | |
:region as region, | |
:file_name as file_name | |
FROM DUAL | |
) v | |
ON (p.PURCHASENUMBER=v.PURCHASENUMBER) | |
WHEN MATCHED THEN | |
UPDATE SET | |
p.PUBLISHDATE = TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), | |
p.region = v.region, | |
p.file_name = v.file_name | |
WHEN NOT MATCHED THEN | |
INSERT (p.doc_id, p.PURCHASENUMBER, p.PUBLISHDATE, p.region, p.file_name) | |
VALUES(v.doc_id, v.PURCHASENUMBER, TO_DATE(v.PUBLISHDATE,'YYYY-MM-DD'), v.region, v.file_name) | |
""" | |
# elif "EFSingleApp_" in afile: | |
# sql_suppliers = """ | |
# MERGE INTO SUPPLIERS_FZ44 p | |
# USING (SELECT | |
# :PURCHASENUMBER as PURCHASENUMBER, | |
# :appParticipant_inn as Participant_inn, | |
# :appParticipant_kpp as Participant_kpp, | |
# :appParticipant_Name as PARTICIPANT_name | |
# FROM DUAL | |
# ) v | |
# ON (p.PURCHASENUMBER=v.PURCHASENUMBER) | |
# WHEN MATCHED THEN | |
# UPDATE SET | |
# p.Participant_inn = v.Participant_inn, | |
# p.Participant_kpp = v.Participant_kpp, | |
# p.PARTICIPANT_name = v.PARTICIPANT_name | |
# WHEN NOT MATCHED THEN | |
# INSERT (p.PURCHASENUMBER, p.Participant_inn, p.Participant_kpp, p.PARTICIPANT_name) | |
# VALUES(v.PURCHASENUMBER, v.Participant_inn, v.Participant_kpp, v.PARTICIPANT_name) | |
# """ | |
else: | |
raise Exceprion("Unknown type: " + Path(afile).name) | |
#print("ORA Unknown section") # мы потом будем парсить выдачу консоли, и искать там ORA поэтому не убирать! Иначе фиг отловим | |
sql_suppliers = """ | |
MERGE INTO SUPPLIERS_FZ44 p | |
USING (SELECT | |
:PURCHASENUMBER as PURCHASENUMBER, | |
:appParticipant_inn as Participant_inn, | |
:appParticipant_kpp as Participant_kpp, | |
:appParticipant_Name as PARTICIPANT_name, | |
:APPPARTICIPANT_POSTADDRESS as APPPARTICIPANT_POSTADDRESS | |
FROM DUAL | |
) v | |
ON (p.PURCHASENUMBER=v.PURCHASENUMBER) | |
WHEN MATCHED THEN | |
UPDATE SET | |
p.Participant_inn = v.Participant_inn, | |
p.Participant_kpp = v.Participant_kpp, | |
p.PARTICIPANT_name = v.PARTICIPANT_name, | |
p.APPPARTICIPANT_POSTADDRESS = v.APPPARTICIPANT_POSTADDRESS | |
WHEN NOT MATCHED THEN | |
INSERT (p.PURCHASENUMBER, p.Participant_inn, p.Participant_kpp, p.PARTICIPANT_name, p.APPPARTICIPANT_POSTADDRESS) | |
VALUES(v.PURCHASENUMBER, v.Participant_inn, v.Participant_kpp, v.PARTICIPANT_name, v.APPPARTICIPANT_POSTADDRESS) | |
""" | |
print("______________result[1] : ", result) | |
print("______________result[1]___________ : ", result[0]) | |
if not "appParticipants" in str(result): | |
result[0]["appParticipants"] = [] # создаем в корне секцию appParticipants. | |
temp_dict = {} | |
for k,v in result[0]["root"].items(): | |
if k == "appParticipant_inn": | |
temp_dict["appParticipant_inn"] = v | |
appParticipant_inn = dict() | |
if k == "appParticipant_kpp": | |
temp_dict["appParticipant_kpp"] = v | |
if k == "appParticipant_postAddress": | |
temp_dict["appParticipant_postAddress"] = v | |
if k == "appParticipant_organizationName": | |
temp_dict["appParticipant_organizationName"] = v | |
result[0]["appParticipants"].append(temp_dict) | |
# Удаляем из оригинала поля которые мы уже засунули во вложенный массив | |
if "appParticipant_inn" in result[0]["root"]: | |
del result[0]["root"]["appParticipant_inn"] | |
if "appParticipant_kpp" in result[0]["root"]: | |
del result[0]["root"]["appParticipant_kpp"] | |
if "appParticipant_organizationName" in result[0]["root"]: | |
del result[0]["root"]["appParticipant_organizationName"] | |
if "appParticipant_postAddress" in result[0]["root"]: | |
del result[0]["root"]["appParticipant_postAddress"] | |
# print("-----------------=============================") | |
print("++++++++++++++----------result -------- : ", result) | |
external_key = None | |
for r in result: | |
for k,v in r.items(): # k - название крупных разделов | |
if k == "root": | |
for k1, v1 in v.items(): # придется поитерироваться | |
if "purchaseNumber" in k1: # префикс у purchaseNumber может отличаться. Иного способа найти его нет | |
external_key = v1 # | |
v["region"] = region_name | |
v["file_name"] = str(Path(afile).name) | |
print("vvvvvv : ", v) | |
save_to_ora(sql_test, v) | |
if k == "appParticipants": | |
for el in v: # appParticipants у нас список. Придется по нему проитерироваться. Каждый элемент списка словарь | |
el["purchaseNumber"] = external_key | |
if not "appParticipant_organizationName" in el: | |
el["appParticipant_organizationName"] = "" # пару раз и такое было | |
x = el["appParticipant_organizationName"] # увы для оракла слишком длинное название | |
el["appParticipant_Name"] = x | |
del el["appParticipant_organizationName"] | |
# del el["fcsProto_publishDate"] | |
# del el["region"] | |
# del el["file_name"] | |
if not "appParticipant_kpp" in el: | |
el["appParticipant_kpp"] = "" # бывает что КПП нет | |
if not "appParticipant_postAddress" in el: | |
el["appParticipant_postAddress"] = "" # вдруг нет адреса | |
if not "appParticipant_inn" in el: | |
el["appParticipant_inn"] = "" # бывает что ИНН нет | |
print("el---------------> ", el) | |
save_to_ora(sql_suppliers, el) | |
#sys.exit(0) # не упали значит все хорошо | |
def insert_prepare(asql, acursor): | |
try: | |
acursor.prepare(asql) | |
except cx_Oracle.DatabaseError as e: | |
raise cx_Oracle.DatabaseError("insert_prepare: ", e) | |
#print('Ошибка! Не возможно приготовить курсор (' + str(e) + ')') | |
#sys.exit(1) | |
def oraconnect(aconstr): | |
conn = cx_Oracle.connect(aconstr) | |
cursor = conn.cursor() | |
def oradisc(): | |
cursor.close() | |
conn.close() | |
def save_to_ora(asql, abindvar): | |
try: | |
print("--abindvar--> ", abindvar) | |
print("\n", asql) | |
if cursor.statement != asql: | |
insert_prepare(asql, cursor) | |
#print("-------===========--------") | |
# print (file_object.write(asql + "\n" + str(abindvar) + "\n\n\n")) | |
# print("-----------------------------") | |
cursor.execute(cursor.statement, abindvar) | |
conn.commit() | |
except cx_Oracle.DatabaseError as e: | |
raise cx_Oracle.DatabaseError("insert_prepare: ", e) | |
print("eee111 :", e) | |
oradisc() | |
#sys.exit(1) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment