Add duplicate keys management for extras #1

Merged
a.fabrizio0a806 merged 6 commits from feature/implement_validators into main 2024-11-29 16:09:42 +01:00
1 changed files with 212 additions and 138 deletions
Showing only changes of commit 890b920e1b - Show all commits

View File

@ -10,16 +10,28 @@ import ckan.model as model
from ckanext.d4science_theme.controllers.home import d4SHomeController
from ckanext.d4science_theme.controllers.systemtype import d4STypeController
from ckanext.d4science_theme.controllers.organization import OrganizationVREController
import sqlalchemy as sa
from ckan.config.middleware.common_middleware import TrackingMiddleware
import ckan.lib.dictization.model_save as model_save
#from ckan.controllers.home import HomeController
#from ckan.plugins import IRoutes
from flask import Blueprint, render_template
from ckan.types import Context
from typing import (
Any, Collection, Optional, TYPE_CHECKING, Type, Union, cast, overload,
Literal,
)
if TYPE_CHECKING:
import ckan.model as modelDict
from ckan.common import (
g
)
from flask import Flask, g
from ckan.lib.app_globals import app_globals
import ckan.plugins.toolkit as toolkit
from ckan.logic import get_action
# Created by Francesco Mangiacrapa
@ -38,145 +50,178 @@ def remove_check_replicated_custom_key(schema):
#CREATED BY FRANCESCO MANGIACRAPA FOR OVERRIDING THE package_extras_save FROM dictization.model_save.py
# Is this needed?
def _package_extras_save(extra_dicts, obj, context):
''' It can save repeated extras as key-value '''
def _package_extras_save(extra_dicts: Optional[list[dict[str, Any]]], pkg: 'model.Package',
context: Context) -> None:
allow_partial_update = context.get("allow_partial_update", False)
if extra_dicts is None and allow_partial_update:
log.debug("extra dicts is NONE")
return
model = context["model"]
log.debug("USING CUSTOM SAVE")
#pass
#model = context["model"]
session = context["session"]
#ADDED BY FRANCESCO MANGIACRAPA
log.debug("extra_dicts: %s", extra_dicts)
#print "extra_dicts: "+str(extra_dicts)
extras_list = obj.extras_list
#extras = dict((extra.key, extra) for extra in extras_list)
old_extras = {}
extras = {}
for extra in extras_list or []:
old_extras.setdefault(extra.key, []).append(extra.value)
extras.setdefault(extra.key, []).append(extra)
old_extras = pkg._extras
log.debug("OLD_EXTRAS", old_extras)
#ADDED BY FRANCESCO MANGIACRAPA
#print "old_extras: "+str(old_extras)
new_extras = {}
new_extras = [] # Lista di dizionari per supportare chiavi duplicate
for extra_dict in extra_dicts or []:
#print 'extra_dict key: '+extra_dict["key"] + ', value: '+extra_dict["value"]
#new_extras.setdefault(extra_dict["key"], []).append(extra_dict["value"])
if extra_dict.get("deleted"):
log.debug("extra_dict deleted: %s ", extra_dict["key"])
#print 'extra_dict deleted: '+extra_dict["key"]
continue
#if extra_dict['value'] is not None and not extra_dict["value"] == "":
if extra_dict['value'] is not None:
new_extras.setdefault(extra_dict["key"], []).append(extra_dict["value"])
if extra_dict['value'] is None:
pass
else:
new_extras.append({"key": extra_dict["key"], "value": extra_dict["value"]})
#ADDED BY FRANCESCO MANGIACRAPA
log.debug("new_extras: %s", new_extras)
#print "new_extras: "+str(new_extras)
# new
log.debug("new_extra:", new_extras)
for new_extra in new_extras:
new_key = new_extra["key"]
new_value = new_extra["value"]
#new
for key in set(new_extras.keys()) - set(old_extras.keys()):
state = 'active'
log.debug("adding key: %s", key)
#print "adding key: "+str(key)
extra_lst = new_extras[key]
for extra in extra_lst:
extra = model.PackageExtra(state=state, key=key, value=extra)
session.add(extra)
extras_list.append(extra)
# Verifica se l'extra esiste già in old_extras con lo stesso valore
if not any(extra.key == new_key and extra.value == new_value for extra in old_extras.values()):
# Crea un nuovo extra solo se non esiste già proviamo ad usare il modello packageExtra
#extra = {"key": key, "value": value}
extra = model.PackageExtra(key = new_key, value = new_value, package_id = pkg.id, state= 'active') #state?
log.debug("extra:", extra)
session.add(extra)
#pkg.extras[key] = value
#log.debug("pkg.extras[key]", pkg.extras)
# changed
for extra in old_extras.values():
matching_new_extras = [new_extra for new_extra in new_extras if new_extra["key"] == extra.key]
log.debug("MATCHING EXTRAS", matching_new_extras)
if matching_new_extras:
for new_extra in matching_new_extras:
if new_extra["value"] != extra.value:
# Aggiorna il valore se differente
extra.value = new_extra["value"]
extra.state = 'active'
log.debug("changed", extra)
#session.add(extra) per l'update non dovrebbe servire
# deleted
log.debug("Pre delete", old_extras, old_extras.values())
log.debug("new extra pre delete", new_extra)
to_delete = [
extra for extra in old_extras.values()
if not any(new_extra["key"] == extra.key and new_extra["value"] == extra.value for new_extra in new_extras)
]
log.debug("TO DELETE", to_delete)
for extra in to_delete:
log.debug('delete extra', extra)
extra.state = 'deleted'
session.delete(extra)
#deleted
for key in set(old_extras.keys()) - set(new_extras.keys()):
log.debug("deleting key: "+str(key))
#print "deleting key: "+str(key)
extra_lst = extras[key]
for extra in extra_lst:
state = 'deleted'
extra.state = state
extras_list.remove(extra)
#changed
for key in set(new_extras.keys()) & set(old_extras.keys()):
#for each value of new list
for value in new_extras[key]:
old_occur = old_extras[key].count(value)
new_occur = new_extras[key].count(value)
log.debug("value: %s\n new_occur: %s\n old_occur: %s", value, new_occur, old_occur)
#print "value: "+str(value) + ", new_occur: "+str(new_occur) + ", old_occur: "+str(old_occur)
# it is an old value deleted or not
if value in old_extras[key]:
if old_occur == new_occur:
#print "extra - occurrences of: "+str(value) +", are equal into both list"
log.debug("extra - occurrences of: %s are equal into both list", value)
#there is a little bug, this code return always the first element, so I'm fixing with #FIX-STATUS
extra_values = get_package_for_value(extras[key], value)
#extras_list.append(extra)
for extra in extra_values:
state = 'active'
extra.state = state
session.add(extra)
#print "extra updated: "+str(extra)
log.debug("extra updated: %s", extra)
elif new_occur > old_occur:
#print "extra - a new occurrence of: "+str(value) +", is present into new list, adding it to old list"
log.debug("extra - a new occurrence of: %s, is present into new list, adding it to old list", value)
state = 'active'
extra = model.PackageExtra(state=state, key=key, value=value)
extra.state = state
session.add(extra)
extras_list.append(extra)
old_extras[key].append(value)
log.debug("old extra values updated: %s", old_extras[key])
#print "old extra values updated: "+str(old_extras[key])
else:
#remove all occurrences deleted - this code could be optimized, it is run several times but could be performed one shot
countDelete = old_occur-new_occur
log.debug("extra - occurrence of: %s, is not present into new list, removing: %s occurrence/s from old list", value, countDelete)
#print "extra - occurrence of: "+str(value) +", is not present into new list, removing "+str(countDelete)+" occurrence/s from old list"
extra_values = get_package_for_value(extras[key], value)
for idx, extra in enumerate(extra_values):
if idx < countDelete:
#print "extra - occurrence of: "+str(value) +", is not present into new list, removing it from old list"
log.debug("pkg extra deleting: %s", extra.value)
#print "pkg extra deleting: "+str(extra.value)
state = 'deleted'
extra.state = state
else:
#print "pkg extra reactivating: "+str(extra.value)
log.debug("pkg extra reactivating: %s", extra.value)
state = 'active'
extra.state = state
session.add(extra)
else:
#print "extra new value: "+str(value)
log.debug("extra new value: %s", value)
state = 'active'
extra = model.PackageExtra(state=state, key=key, value=value)
extra.state = state
session.add(extra)
extras_list.append(extra)
#for each value of old list
for value in old_extras[key]:
#if value is not present in new list
if value not in new_extras[key]:
extra_values = get_package_for_value(extras[key], value)
for extra in extra_values:
#print "not present extra deleting: "+str(extra)
log.debug("not present extra deleting: "+str(extra))
state = 'deleted'
extra.state = state
##ADDED BY FRANCESCO MANGIACRAPA
#
#extras_list = pkg.extras
##extras_list = list(obj.extras)
##extras = dict((extra.key, extra) for extra in extras_list)
#old_extras = {}
##old_extras = {extra.key: extra for extra in extras_list}
#extras = {}
#for extra in extras_list or []:
# old_extras.setdefault(extra.key, []).append(extra.value)
# extras.setdefault(extra.key, []).append(extra)
#
##ADDED BY FRANCESCO MANGIACRAPA
##print "old_extras: "+str(old_extras)
#
#new_extras = {}
#for extra_dict in extra_dicts or []:
# #new_extras.setdefault(extra_dict["key"], []).append(extra_dict["value"])
# if extra_dict.get("deleted"):
# continue
#
# #if extra_dict['value'] is not None and not extra_dict["value"] == "":
# if extra_dict['value'] is not None:
# new_extras.setdefault(extra_dict["key"], []).append(extra_dict["value"])
#
##ADDED BY FRANCESCO MANGIACRAPA
#log.debug("new_extras: %s", new_extras)
##print "new_extras: "+str(new_extras)
#
##new
#for key in set(new_extras.keys()) - set(old_extras.keys()):
# state = 'active'
# extra_lst = new_extras[key]
# for extra in extra_lst:
# extra = model.PackageExtra(state=state, key=key, value=extra)
# session.add(extra)
# extras_list.append(extra)
#
##deleted
#for key in set(old_extras.keys()) - set(new_extras.keys()):
# extra_lst = extras[key]
# for extra in extra_lst:
# state = 'deleted'
# extra.state = state
# extras_list.remove(extra)
#
##changed
#for key in set(new_extras.keys()) & set(old_extras.keys()):
# #for each value of new list
# for value in new_extras[key]:
# old_occur = old_extras[key].count(value)
# new_occur = new_extras[key].count(value)
# # it is an old value deleted or not
# if value in old_extras[key]:
# if old_occur == new_occur:
# #there is a little bug, this code return always the first element, so I'm fixing with #FIX-STATUS
# extra_values = get_package_for_value(extras[key], value)
# #extras_list.append(extra)
# for extra in extra_values:
# state = 'active'
# extra.state = state
# session.add(extra)
# #print "extra updated: "+str(extra)
# log.debug("extra updated: %s", extra)
#
# elif new_occur > old_occur:
# state = 'active'
# extra = model.PackageExtra(state=state, key=key, value=value)
# extra.state = state
# session.add(extra)
# extras_list.append(extra)
# old_extras[key].append(value)
#
# else:
# #remove all occurrences deleted - this code could be optimized, it is run several times but could be performed one shot
# countDelete = old_occur-new_occur
# extra_values = get_package_for_value(extras[key], value)
# for idx, extra in enumerate(extra_values):
# if idx < countDelete:
# state = 'deleted'
# extra.state = state
#
# else:
# state = 'active'
# extra.state = state
# session.add(extra)
#
# else:
# state = 'active'
# extra = model.PackageExtra(state=state, key=key, value=value)
# extra.state = state
# session.add(extra)
# extras_list.append(extra)
#
#
# #for each value of old list
# for value in old_extras[key]:
# #if value is not present in new list
# if value not in new_extras[key]:
# extra_values = get_package_for_value(extras[key], value)
# for extra in extra_values:
# state = 'deleted'
# extra.state = state
#ADDED BY FRANCESCO MANGIACRAPA
def get_package_for_value(list_package, value):
@ -191,6 +236,31 @@ def get_package_for_value(list_package, value):
return lst
#OVERRIDING BASE SQL ALCHEMY ENGINE INSTANCE serve per la connessione con gcube?
def _init_TrackingMiddleware(self, app, config):
self.app = app
log.debug('TrackingMiddleware d4Science instance')
sqlalchemy_url = config.get('sqlalchemy.url')
log.debug('sqlalchemy_url read: '+str(sqlalchemy_url))
sqlalchemy_pool = config.get('sqlalchemy.pool_size')
if sqlalchemy_pool is None:
sqlalchemy_pool = 5
log.debug('sqlalchemy_pool read: '+str(sqlalchemy_pool))
sqlalchemy_overflow = config.get('sqlalchemy.max_overflow')
if sqlalchemy_overflow is None:
sqlalchemy_overflow = 10
log.debug('sqlalchemy_overflow read: '+str(sqlalchemy_overflow))
try:
self.engine = sa.create_engine(sqlalchemy_url, pool_size=int(sqlalchemy_pool), max_overflow=int(sqlalchemy_overflow))
except TypeError as e:
log.error('pool size does not work: ' +str(e.args))
self.engine = sa.create_engine(sqlalchemy_url)
class D4Science_ThemePlugin(plugins.SingletonPlugin, toolkit.DefaultDatasetForm):
plugins.implements(plugins.IConfigurer)
@ -220,7 +290,6 @@ class D4Science_ThemePlugin(plugins.SingletonPlugin, toolkit.DefaultDatasetForm)
# that we'll use to refer to this fanstatic directory from CKAN
# templates.
toolkit.add_resource('assets', 'd4science_theme')
# toolkit.add_resource('assets', 'd4science_scripts')
def _modify_package_schema(self):
log.debug("*** modify package ***")
@ -242,30 +311,33 @@ class D4Science_ThemePlugin(plugins.SingletonPlugin, toolkit.DefaultDatasetForm)
# let's grab the default schema in our plugin
log.debug("creating package....")
schema = super(D4Science_ThemePlugin, self).create_package_schema()
log.debug("schema after create prima del validator %s", schema)
#schema = remove_check_replicated_custom_key(schema)
#log.debug("schema after create prima del validator %s", schema)
schema = remove_check_replicated_custom_key(schema)
#log.debug("create_package1 (remove __before): %s", schema)
#schema.update(self._modify_package_schema(schema))
schema.update(self._modify_package_schema())
#d.package_dict_save = _package_dict_save
log.debug("create_package1 %s", schema)
#log.debug("create_package2 (remove extras validator): %s", schema)
return schema
#IDatasetForm
def update_package_schema(self):
log.debug("** update_package **")
schema = super(D4Science_ThemePlugin, self).update_package_schema()
#schema = remove_check_replicated_custom_key(schema)
schema = remove_check_replicated_custom_key(schema)
#log.debug("update_package1 (remove __before) %s", schema)
#schema.update(self._modify_package_schema(schema))
schema.update(self._modify_package_schema())
log.debug("update_package1 %s", schema)
#log.debug("update_package2 (remove extras validator) %s", schema)
return schema
#IDatasetForm
def show_package_schema(self):
log.debug("** show package **")
schema = super(D4Science_ThemePlugin, self).show_package_schema()
log.debug("show_package1 %s", schema)
#schema = remove_check_replicated_custom_key(schema)
#log.debug("show_package1 %s", schema)
schema = remove_check_replicated_custom_key(schema)
#log.debug("show_package1.5 no before %s", schema)
#schema.update(self._modify_package_schema(schema))
schema.update({
'extras': {
@ -274,7 +346,7 @@ class D4Science_ThemePlugin(plugins.SingletonPlugin, toolkit.DefaultDatasetForm)
'value': [toolkit.get_validator('not_missing')]
}
})
log.debug("show_package2 %s", schema)
#log.debug("show_package2 %s", schema)
return schema
#IDatasetForm
@ -424,7 +496,7 @@ class D4Science_ThemePlugin(plugins.SingletonPlugin, toolkit.DefaultDatasetForm)
#Overriding package_extras_save method
# Is this needed?
# model_save.package_extras_save = _package_extras_save
model_save.package_extras_save = _package_extras_save
#Overriding index home controller - rimosso in ckan 2.10
#d4sHC = d4SHomeController()
@ -432,6 +504,9 @@ class D4Science_ThemePlugin(plugins.SingletonPlugin, toolkit.DefaultDatasetForm)
global d4s_ctg_namespaces_controller
#OVERRIDING BASE SQL ALCHEMY ENGINE INSTANCE
TrackingMiddleware.__init__ = _init_TrackingMiddleware
#if d4s_ctg_namespaces_controller is None:
# log.info("d4s_ctg_namespaces_controller instancing...")
# d4s_ctg_namespaces_controller = helpers.get_d4s_namespace_controller()
@ -567,4 +642,3 @@ class D4Science_ThemePlugin(plugins.SingletonPlugin, toolkit.DefaultDatasetForm)
return facets_dict