Add duplicate keys management for extras #1

Merged
a.fabrizio0a806 merged 6 commits from feature/implement_validators into main 2024-11-29 16:09:42 +01:00
1 changed files with 91 additions and 221 deletions
Showing only changes of commit 691798a2b9 - Show all commits

View File

@ -23,21 +23,12 @@ from typing import (
Literal,
)
if TYPE_CHECKING:
import ckan.model as modelDict
from ckan.common import (
g
)
from flask import Flask, g
from ckan.lib.app_globals import app_globals
from ckan.logic import get_action
# Created by Francesco Mangiacrapa
# francesco.mangiacrapa@isti.cnr.it
# ISTI-CNR Pisa (ITALY)
log = getLogger(__name__)
d4s_ctg_namespaces_controller = None
@ -48,193 +39,112 @@ def remove_check_replicated_custom_key(schema):
return schema
#CREATED BY FRANCESCO MANGIACRAPA FOR OVERRIDING THE package_extras_save FROM dictization.model_save.py
# Is this needed?
def _package_extras_save(extra_dicts: Optional[list[dict[str, Any]]], pkg: 'model.Package',
def _package_extras_save(
extra_dicts: Optional[list[dict[str, Any]]], pkg: 'model.Package',
context: Context) -> None:
allow_partial_update = context.get("allow_partial_update", False)
if extra_dicts is None and allow_partial_update:
log.debug("extra dicts is NONE")
return
log.debug("USING CUSTOM SAVE")
#pass
#model = context["model"]
session = context["session"]
old_extras = pkg._extras
log.debug("OLD_EXTRAS", old_extras)
model = context["model"]
#extras_list = obj.extras_list
extras_list = session.query(model.PackageExtra).filter_by(package_id=pkg.id).all()
#extras = dict((extra.key, extra) for extra in extras_list)
old_extras = {}
extras = {}
for extra in extras_list or []:
old_extras.setdefault(extra.key, []).append(extra.value)
extras.setdefault(extra.key, []).append(extra)
new_extras = [] # Lista di dizionari per supportare chiavi duplicate
new_extras = {}
for extra_dict in extra_dicts or []:
#new_extras.setdefault(extra_dict["key"], []).append(extra_dict["value"])
if extra_dict.get("deleted"):
continue
if extra_dict['value'] is None:
pass
else:
new_extras.append({"key": extra_dict["key"], "value": extra_dict["value"]})
#if extra_dict['value'] is not None and not extra_dict["value"] == "":
if extra_dict['value'] is not None:
new_extras.setdefault(extra_dict["key"], []).append(extra_dict["value"])
# new
log.debug("new_extra:", new_extras)
for new_extra in new_extras:
new_key = new_extra["key"]
new_value = new_extra["value"]
# Verifica se l'extra esiste già in old_extras con lo stesso valore
if not any(extra.key == new_key and extra.value == new_value for extra in old_extras.values()):
# Crea un nuovo extra solo se non esiste già proviamo ad usare il modello packageExtra
#extra = {"key": key, "value": value}
extra = model.PackageExtra(key = new_key, value = new_value, package_id = pkg.id, state= 'active') #state?
log.debug("extra:", extra)
session.add(extra)
#pkg.extras[key] = value
#log.debug("pkg.extras[key]", pkg.extras)
# changed
for extra in old_extras.values():
matching_new_extras = [new_extra for new_extra in new_extras if new_extra["key"] == extra.key]
log.debug("MATCHING EXTRAS", matching_new_extras)
if matching_new_extras:
for new_extra in matching_new_extras:
if new_extra["value"] != extra.value:
# Aggiorna il valore se differente
extra.value = new_extra["value"]
extra.state = 'active'
log.debug("changed", extra)
#session.add(extra) per l'update non dovrebbe servire
# deleted
log.debug("Pre delete", old_extras, old_extras.values())
log.debug("new extra pre delete", new_extra)
to_delete = [
extra for extra in old_extras.values()
if not any(new_extra["key"] == extra.key and new_extra["value"] == extra.value for new_extra in new_extras)
]
log.debug("TO DELETE", to_delete)
for extra in to_delete:
log.debug('delete extra', extra)
extra.state = 'deleted'
session.delete(extra)
#new
for key in set(new_extras.keys()) - set(old_extras.keys()):
state = 'active'
extra_lst = new_extras[key]
for extra in extra_lst:
extra = model.PackageExtra(state=state, key=key, value=extra, package_id = pkg.id)
session.add(extra)
extras_list.append(extra)
#deleted
for key in set(old_extras.keys()) - set(new_extras.keys()):
extra_lst = extras[key]
for extra in extra_lst:
state = 'deleted'
extra.state = state
extras_list.remove(extra)
#changed
for key in set(new_extras.keys()) & set(old_extras.keys()):
#for each value of new list
for value in new_extras[key]:
old_occur = old_extras[key].count(value)
new_occur = new_extras[key].count(value)
# it is an old value deleted or not
if value in old_extras[key]:
if old_occur == new_occur:
#there is a little bug, this code return always the first element, so I'm fixing with #FIX-STATUS
extra_values = get_package_for_value(extras[key], value)
#extras_list.append(extra)
for extra in extra_values:
state = 'active'
extra.state = state
session.add(extra)
elif new_occur > old_occur:
state = 'active'
extra = model.PackageExtra(state=state, key=key, value=value, package_id = pkg.id)
extra.state = state
session.add(extra)
extras_list.append(extra)
old_extras[key].append(value)
else:
#remove all occurrences deleted - this code could be optimized, it is run several times but could be performed one shot
countDelete = old_occur-new_occur
extra_values = get_package_for_value(extras[key], value)
for idx, extra in enumerate(extra_values):
if idx < countDelete:
state = 'deleted'
extra.state = state
else:
state = 'active'
extra.state = state
session.add(extra) #valuta se metterlo dentro il for, ma fuori dall'else
else:
state = 'active'
extra = model.PackageExtra(state=state, key=key, value=value, package_id = pkg.id)
extra.state = state
session.add(extra)
extras_list.append(extra)
#for each value of old list
for value in old_extras[key]:
#if value is not present in new list
if value not in new_extras[key]:
extra_values = get_package_for_value(extras[key], value)
for extra in extra_values:
state = 'deleted'
extra.state = state
#add session.delete(extra)?
##ADDED BY FRANCESCO MANGIACRAPA
#
#extras_list = pkg.extras
##extras_list = list(obj.extras)
##extras = dict((extra.key, extra) for extra in extras_list)
#old_extras = {}
##old_extras = {extra.key: extra for extra in extras_list}
#extras = {}
#for extra in extras_list or []:
# old_extras.setdefault(extra.key, []).append(extra.value)
# extras.setdefault(extra.key, []).append(extra)
#
##ADDED BY FRANCESCO MANGIACRAPA
##print "old_extras: "+str(old_extras)
#
#new_extras = {}
#for extra_dict in extra_dicts or []:
# #new_extras.setdefault(extra_dict["key"], []).append(extra_dict["value"])
# if extra_dict.get("deleted"):
# continue
#
# #if extra_dict['value'] is not None and not extra_dict["value"] == "":
# if extra_dict['value'] is not None:
# new_extras.setdefault(extra_dict["key"], []).append(extra_dict["value"])
#
##ADDED BY FRANCESCO MANGIACRAPA
#log.debug("new_extras: %s", new_extras)
##print "new_extras: "+str(new_extras)
#
##new
#for key in set(new_extras.keys()) - set(old_extras.keys()):
# state = 'active'
# extra_lst = new_extras[key]
# for extra in extra_lst:
# extra = model.PackageExtra(state=state, key=key, value=extra)
# session.add(extra)
# extras_list.append(extra)
#
##deleted
#for key in set(old_extras.keys()) - set(new_extras.keys()):
# extra_lst = extras[key]
# for extra in extra_lst:
# state = 'deleted'
# extra.state = state
# extras_list.remove(extra)
#
##changed
#for key in set(new_extras.keys()) & set(old_extras.keys()):
# #for each value of new list
# for value in new_extras[key]:
# old_occur = old_extras[key].count(value)
# new_occur = new_extras[key].count(value)
# # it is an old value deleted or not
# if value in old_extras[key]:
# if old_occur == new_occur:
# #there is a little bug, this code return always the first element, so I'm fixing with #FIX-STATUS
# extra_values = get_package_for_value(extras[key], value)
# #extras_list.append(extra)
# for extra in extra_values:
# state = 'active'
# extra.state = state
# session.add(extra)
# #print "extra updated: "+str(extra)
# log.debug("extra updated: %s", extra)
#
# elif new_occur > old_occur:
# state = 'active'
# extra = model.PackageExtra(state=state, key=key, value=value)
# extra.state = state
# session.add(extra)
# extras_list.append(extra)
# old_extras[key].append(value)
#
# else:
# #remove all occurrences deleted - this code could be optimized, it is run several times but could be performed one shot
# countDelete = old_occur-new_occur
# extra_values = get_package_for_value(extras[key], value)
# for idx, extra in enumerate(extra_values):
# if idx < countDelete:
# state = 'deleted'
# extra.state = state
#
# else:
# state = 'active'
# extra.state = state
# session.add(extra)
#
# else:
# state = 'active'
# extra = model.PackageExtra(state=state, key=key, value=value)
# extra.state = state
# session.add(extra)
# extras_list.append(extra)
#
#
# #for each value of old list
# for value in old_extras[key]:
# #if value is not present in new list
# if value not in new_extras[key]:
# extra_values = get_package_for_value(extras[key], value)
# for extra in extra_values:
# state = 'deleted'
# extra.state = state
#ADDED BY FRANCESCO MANGIACRAPA
def get_package_for_value(list_package, value):
''' Returns a list of packages containing the value passed in input
'''
lst = []
for x in list_package:
if x.value == value:
lst.append(x)
else:
return lst
return lst
'''Returns a list of packages containing the value passed in input'''
return [x for x in list_package if x.value == value]
#OVERRIDING BASE SQL ALCHEMY ENGINE INSTANCE serve per la connessione con gcube?
def _init_TrackingMiddleware(self, app, config):
@ -364,47 +274,7 @@ class D4Science_ThemePlugin(plugins.SingletonPlugin, toolkit.DefaultDatasetForm)
return {
'ignore_duplicate_keys': validators.ignore_duplicate_keys
}
#IPackageController
#def before_dataset_search(self, search_params):
# search_params = search_params or {}
# return search_params
#
#def after_dataset_search(self, search_results, data_dict):
# return search_results
#
#def before_search(self, search_params):
# # Controlla se search_params è None e sostituiscilo con un dizionario vuoto
# search_params = search_params or {}
# return search_params
#
#def before_create(self, context, data_dict):
# self.apply_custom_extras_validator(data_dict)
#
#def before_update(self, context, data_dict):
# self.apply_custom_extras_validator(data_dict)
#
## utile ##
#def before_dataset_save(self, context, data_dict):
# log.debug("sto chiamando before_dataset_save")
# # Intercetta il salvataggio dei dataset prima che venga effettuato il controllo
# extras_list = data_dict.get('extras', [])
#
# # Aggiungi una logica per evitare che venga applicato un controllo su chiavi duplicate
# # Si elimina la logica di validazione delle chiavi duplicate
# unique_extras = {}
# for extra in extras_list:
# key = extra.get('key')
# if key in unique_extras:
# # Consenti la duplicazione delle chiavi, quindi non lo filtriamo
# unique_extras[key].append(extra)
# else:
# unique_extras[key] = [extra]
#
# # Restituisci un nuovo 'extras' senza rimuovere le chiavi duplicate
# data_dict['extras'] = [e for extras in unique_extras.values() for e in extras]
# return data_dict
def convert_to_boolean(self, value):
log.debug("value boolean %s", value)
if isinstance(value, str):