add duplicate keys in extras (missing delete and update)

This commit is contained in:
Alessio Fabrizio 2024-11-28 15:19:55 +01:00
parent 401fcbe1c7
commit 890b920e1b
1 changed files with 212 additions and 138 deletions

View File

@ -10,16 +10,28 @@ import ckan.model as model
from ckanext.d4science_theme.controllers.home import d4SHomeController from ckanext.d4science_theme.controllers.home import d4SHomeController
from ckanext.d4science_theme.controllers.systemtype import d4STypeController from ckanext.d4science_theme.controllers.systemtype import d4STypeController
from ckanext.d4science_theme.controllers.organization import OrganizationVREController from ckanext.d4science_theme.controllers.organization import OrganizationVREController
import sqlalchemy as sa
from ckan.config.middleware.common_middleware import TrackingMiddleware
import ckan.lib.dictization.model_save as model_save
#from ckan.controllers.home import HomeController #from ckan.controllers.home import HomeController
#from ckan.plugins import IRoutes #from ckan.plugins import IRoutes
from flask import Blueprint, render_template from flask import Blueprint, render_template
from ckan.types import Context
from typing import (
Any, Collection, Optional, TYPE_CHECKING, Type, Union, cast, overload,
Literal,
)
if TYPE_CHECKING:
import ckan.model as modelDict
from ckan.common import ( from ckan.common import (
g g
) )
from flask import Flask, g from flask import Flask, g
from ckan.lib.app_globals import app_globals from ckan.lib.app_globals import app_globals
import ckan.plugins.toolkit as toolkit from ckan.logic import get_action
# Created by Francesco Mangiacrapa # Created by Francesco Mangiacrapa
@ -38,145 +50,178 @@ def remove_check_replicated_custom_key(schema):
#CREATED BY FRANCESCO MANGIACRAPA FOR OVERRIDING THE package_extras_save FROM dictization.model_save.py #CREATED BY FRANCESCO MANGIACRAPA FOR OVERRIDING THE package_extras_save FROM dictization.model_save.py
# Is this needed? # Is this needed?
def _package_extras_save(extra_dicts, obj, context): def _package_extras_save(extra_dicts: Optional[list[dict[str, Any]]], pkg: 'model.Package',
''' It can save repeated extras as key-value ''' context: Context) -> None:
allow_partial_update = context.get("allow_partial_update", False) allow_partial_update = context.get("allow_partial_update", False)
if extra_dicts is None and allow_partial_update: if extra_dicts is None and allow_partial_update:
log.debug("extra dicts is NONE")
return return
model = context["model"] log.debug("USING CUSTOM SAVE")
#pass
#model = context["model"]
session = context["session"] session = context["session"]
#ADDED BY FRANCESCO MANGIACRAPA old_extras = pkg._extras
log.debug("extra_dicts: %s", extra_dicts) log.debug("OLD_EXTRAS", old_extras)
#print "extra_dicts: "+str(extra_dicts)
extras_list = obj.extras_list
#extras = dict((extra.key, extra) for extra in extras_list)
old_extras = {}
extras = {}
for extra in extras_list or []:
old_extras.setdefault(extra.key, []).append(extra.value)
extras.setdefault(extra.key, []).append(extra)
#ADDED BY FRANCESCO MANGIACRAPA new_extras = [] # Lista di dizionari per supportare chiavi duplicate
#print "old_extras: "+str(old_extras)
new_extras = {}
for extra_dict in extra_dicts or []: for extra_dict in extra_dicts or []:
#print 'extra_dict key: '+extra_dict["key"] + ', value: '+extra_dict["value"]
#new_extras.setdefault(extra_dict["key"], []).append(extra_dict["value"])
if extra_dict.get("deleted"): if extra_dict.get("deleted"):
log.debug("extra_dict deleted: %s ", extra_dict["key"])
#print 'extra_dict deleted: '+extra_dict["key"]
continue continue
#if extra_dict['value'] is not None and not extra_dict["value"] == "": if extra_dict['value'] is None:
if extra_dict['value'] is not None: pass
new_extras.setdefault(extra_dict["key"], []).append(extra_dict["value"]) else:
new_extras.append({"key": extra_dict["key"], "value": extra_dict["value"]})
#ADDED BY FRANCESCO MANGIACRAPA # new
log.debug("new_extras: %s", new_extras) log.debug("new_extra:", new_extras)
#print "new_extras: "+str(new_extras) for new_extra in new_extras:
new_key = new_extra["key"]
new_value = new_extra["value"]
#new # Verifica se l'extra esiste già in old_extras con lo stesso valore
for key in set(new_extras.keys()) - set(old_extras.keys()): if not any(extra.key == new_key and extra.value == new_value for extra in old_extras.values()):
state = 'active' # Crea un nuovo extra solo se non esiste già proviamo ad usare il modello packageExtra
log.debug("adding key: %s", key) #extra = {"key": key, "value": value}
#print "adding key: "+str(key) extra = model.PackageExtra(key = new_key, value = new_value, package_id = pkg.id, state= 'active') #state?
extra_lst = new_extras[key] log.debug("extra:", extra)
for extra in extra_lst: session.add(extra)
extra = model.PackageExtra(state=state, key=key, value=extra) #pkg.extras[key] = value
session.add(extra) #log.debug("pkg.extras[key]", pkg.extras)
extras_list.append(extra)
# changed
for extra in old_extras.values():
matching_new_extras = [new_extra for new_extra in new_extras if new_extra["key"] == extra.key]
log.debug("MATCHING EXTRAS", matching_new_extras)
if matching_new_extras:
for new_extra in matching_new_extras:
if new_extra["value"] != extra.value:
# Aggiorna il valore se differente
extra.value = new_extra["value"]
extra.state = 'active'
log.debug("changed", extra)
#session.add(extra) per l'update non dovrebbe servire
# deleted
log.debug("Pre delete", old_extras, old_extras.values())
log.debug("new extra pre delete", new_extra)
to_delete = [
extra for extra in old_extras.values()
if not any(new_extra["key"] == extra.key and new_extra["value"] == extra.value for new_extra in new_extras)
]
log.debug("TO DELETE", to_delete)
for extra in to_delete:
log.debug('delete extra', extra)
extra.state = 'deleted'
session.delete(extra)
#deleted
for key in set(old_extras.keys()) - set(new_extras.keys()):
log.debug("deleting key: "+str(key))
#print "deleting key: "+str(key)
extra_lst = extras[key]
for extra in extra_lst:
state = 'deleted'
extra.state = state
extras_list.remove(extra)
#changed
for key in set(new_extras.keys()) & set(old_extras.keys()):
#for each value of new list
for value in new_extras[key]:
old_occur = old_extras[key].count(value)
new_occur = new_extras[key].count(value)
log.debug("value: %s\n new_occur: %s\n old_occur: %s", value, new_occur, old_occur)
#print "value: "+str(value) + ", new_occur: "+str(new_occur) + ", old_occur: "+str(old_occur)
# it is an old value deleted or not
if value in old_extras[key]:
if old_occur == new_occur:
#print "extra - occurrences of: "+str(value) +", are equal into both list"
log.debug("extra - occurrences of: %s are equal into both list", value)
#there is a little bug, this code return always the first element, so I'm fixing with #FIX-STATUS
extra_values = get_package_for_value(extras[key], value)
#extras_list.append(extra)
for extra in extra_values:
state = 'active'
extra.state = state
session.add(extra)
#print "extra updated: "+str(extra)
log.debug("extra updated: %s", extra)
elif new_occur > old_occur:
#print "extra - a new occurrence of: "+str(value) +", is present into new list, adding it to old list"
log.debug("extra - a new occurrence of: %s, is present into new list, adding it to old list", value)
state = 'active'
extra = model.PackageExtra(state=state, key=key, value=value)
extra.state = state
session.add(extra)
extras_list.append(extra)
old_extras[key].append(value)
log.debug("old extra values updated: %s", old_extras[key])
#print "old extra values updated: "+str(old_extras[key])
else:
#remove all occurrences deleted - this code could be optimized, it is run several times but could be performed one shot
countDelete = old_occur-new_occur
log.debug("extra - occurrence of: %s, is not present into new list, removing: %s occurrence/s from old list", value, countDelete)
#print "extra - occurrence of: "+str(value) +", is not present into new list, removing "+str(countDelete)+" occurrence/s from old list"
extra_values = get_package_for_value(extras[key], value)
for idx, extra in enumerate(extra_values):
if idx < countDelete:
#print "extra - occurrence of: "+str(value) +", is not present into new list, removing it from old list"
log.debug("pkg extra deleting: %s", extra.value)
#print "pkg extra deleting: "+str(extra.value)
state = 'deleted'
extra.state = state
else:
#print "pkg extra reactivating: "+str(extra.value)
log.debug("pkg extra reactivating: %s", extra.value)
state = 'active'
extra.state = state
session.add(extra)
else:
#print "extra new value: "+str(value)
log.debug("extra new value: %s", value)
state = 'active'
extra = model.PackageExtra(state=state, key=key, value=value)
extra.state = state
session.add(extra)
extras_list.append(extra)
#for each value of old list
for value in old_extras[key]:
#if value is not present in new list
if value not in new_extras[key]:
extra_values = get_package_for_value(extras[key], value)
for extra in extra_values:
#print "not present extra deleting: "+str(extra)
log.debug("not present extra deleting: "+str(extra))
state = 'deleted'
extra.state = state
##ADDED BY FRANCESCO MANGIACRAPA
#
#extras_list = pkg.extras
##extras_list = list(obj.extras)
##extras = dict((extra.key, extra) for extra in extras_list)
#old_extras = {}
##old_extras = {extra.key: extra for extra in extras_list}
#extras = {}
#for extra in extras_list or []:
# old_extras.setdefault(extra.key, []).append(extra.value)
# extras.setdefault(extra.key, []).append(extra)
#
##ADDED BY FRANCESCO MANGIACRAPA
##print "old_extras: "+str(old_extras)
#
#new_extras = {}
#for extra_dict in extra_dicts or []:
# #new_extras.setdefault(extra_dict["key"], []).append(extra_dict["value"])
# if extra_dict.get("deleted"):
# continue
#
# #if extra_dict['value'] is not None and not extra_dict["value"] == "":
# if extra_dict['value'] is not None:
# new_extras.setdefault(extra_dict["key"], []).append(extra_dict["value"])
#
##ADDED BY FRANCESCO MANGIACRAPA
#log.debug("new_extras: %s", new_extras)
##print "new_extras: "+str(new_extras)
#
##new
#for key in set(new_extras.keys()) - set(old_extras.keys()):
# state = 'active'
# extra_lst = new_extras[key]
# for extra in extra_lst:
# extra = model.PackageExtra(state=state, key=key, value=extra)
# session.add(extra)
# extras_list.append(extra)
#
##deleted
#for key in set(old_extras.keys()) - set(new_extras.keys()):
# extra_lst = extras[key]
# for extra in extra_lst:
# state = 'deleted'
# extra.state = state
# extras_list.remove(extra)
#
##changed
#for key in set(new_extras.keys()) & set(old_extras.keys()):
# #for each value of new list
# for value in new_extras[key]:
# old_occur = old_extras[key].count(value)
# new_occur = new_extras[key].count(value)
# # it is an old value deleted or not
# if value in old_extras[key]:
# if old_occur == new_occur:
# #there is a little bug, this code return always the first element, so I'm fixing with #FIX-STATUS
# extra_values = get_package_for_value(extras[key], value)
# #extras_list.append(extra)
# for extra in extra_values:
# state = 'active'
# extra.state = state
# session.add(extra)
# #print "extra updated: "+str(extra)
# log.debug("extra updated: %s", extra)
#
# elif new_occur > old_occur:
# state = 'active'
# extra = model.PackageExtra(state=state, key=key, value=value)
# extra.state = state
# session.add(extra)
# extras_list.append(extra)
# old_extras[key].append(value)
#
# else:
# #remove all occurrences deleted - this code could be optimized, it is run several times but could be performed one shot
# countDelete = old_occur-new_occur
# extra_values = get_package_for_value(extras[key], value)
# for idx, extra in enumerate(extra_values):
# if idx < countDelete:
# state = 'deleted'
# extra.state = state
#
# else:
# state = 'active'
# extra.state = state
# session.add(extra)
#
# else:
# state = 'active'
# extra = model.PackageExtra(state=state, key=key, value=value)
# extra.state = state
# session.add(extra)
# extras_list.append(extra)
#
#
# #for each value of old list
# for value in old_extras[key]:
# #if value is not present in new list
# if value not in new_extras[key]:
# extra_values = get_package_for_value(extras[key], value)
# for extra in extra_values:
# state = 'deleted'
# extra.state = state
#ADDED BY FRANCESCO MANGIACRAPA #ADDED BY FRANCESCO MANGIACRAPA
def get_package_for_value(list_package, value): def get_package_for_value(list_package, value):
@ -191,6 +236,31 @@ def get_package_for_value(list_package, value):
return lst return lst
#OVERRIDING BASE SQL ALCHEMY ENGINE INSTANCE serve per la connessione con gcube?
def _init_TrackingMiddleware(self, app, config):
self.app = app
log.debug('TrackingMiddleware d4Science instance')
sqlalchemy_url = config.get('sqlalchemy.url')
log.debug('sqlalchemy_url read: '+str(sqlalchemy_url))
sqlalchemy_pool = config.get('sqlalchemy.pool_size')
if sqlalchemy_pool is None:
sqlalchemy_pool = 5
log.debug('sqlalchemy_pool read: '+str(sqlalchemy_pool))
sqlalchemy_overflow = config.get('sqlalchemy.max_overflow')
if sqlalchemy_overflow is None:
sqlalchemy_overflow = 10
log.debug('sqlalchemy_overflow read: '+str(sqlalchemy_overflow))
try:
self.engine = sa.create_engine(sqlalchemy_url, pool_size=int(sqlalchemy_pool), max_overflow=int(sqlalchemy_overflow))
except TypeError as e:
log.error('pool size does not work: ' +str(e.args))
self.engine = sa.create_engine(sqlalchemy_url)
class D4Science_ThemePlugin(plugins.SingletonPlugin, toolkit.DefaultDatasetForm): class D4Science_ThemePlugin(plugins.SingletonPlugin, toolkit.DefaultDatasetForm):
plugins.implements(plugins.IConfigurer) plugins.implements(plugins.IConfigurer)
@ -220,7 +290,6 @@ class D4Science_ThemePlugin(plugins.SingletonPlugin, toolkit.DefaultDatasetForm)
# that we'll use to refer to this fanstatic directory from CKAN # that we'll use to refer to this fanstatic directory from CKAN
# templates. # templates.
toolkit.add_resource('assets', 'd4science_theme') toolkit.add_resource('assets', 'd4science_theme')
# toolkit.add_resource('assets', 'd4science_scripts')
def _modify_package_schema(self): def _modify_package_schema(self):
log.debug("*** modify package ***") log.debug("*** modify package ***")
@ -242,30 +311,33 @@ class D4Science_ThemePlugin(plugins.SingletonPlugin, toolkit.DefaultDatasetForm)
# let's grab the default schema in our plugin # let's grab the default schema in our plugin
log.debug("creating package....") log.debug("creating package....")
schema = super(D4Science_ThemePlugin, self).create_package_schema() schema = super(D4Science_ThemePlugin, self).create_package_schema()
log.debug("schema after create prima del validator %s", schema) #log.debug("schema after create prima del validator %s", schema)
#schema = remove_check_replicated_custom_key(schema) schema = remove_check_replicated_custom_key(schema)
#log.debug("create_package1 (remove __before): %s", schema)
#schema.update(self._modify_package_schema(schema)) #schema.update(self._modify_package_schema(schema))
schema.update(self._modify_package_schema()) schema.update(self._modify_package_schema())
#d.package_dict_save = _package_dict_save #d.package_dict_save = _package_dict_save
log.debug("create_package1 %s", schema) #log.debug("create_package2 (remove extras validator): %s", schema)
return schema return schema
#IDatasetForm #IDatasetForm
def update_package_schema(self): def update_package_schema(self):
log.debug("** update_package **") log.debug("** update_package **")
schema = super(D4Science_ThemePlugin, self).update_package_schema() schema = super(D4Science_ThemePlugin, self).update_package_schema()
#schema = remove_check_replicated_custom_key(schema) schema = remove_check_replicated_custom_key(schema)
#log.debug("update_package1 (remove __before) %s", schema)
#schema.update(self._modify_package_schema(schema)) #schema.update(self._modify_package_schema(schema))
schema.update(self._modify_package_schema()) schema.update(self._modify_package_schema())
log.debug("update_package1 %s", schema) #log.debug("update_package2 (remove extras validator) %s", schema)
return schema return schema
#IDatasetForm #IDatasetForm
def show_package_schema(self): def show_package_schema(self):
log.debug("** show package **") log.debug("** show package **")
schema = super(D4Science_ThemePlugin, self).show_package_schema() schema = super(D4Science_ThemePlugin, self).show_package_schema()
log.debug("show_package1 %s", schema) #log.debug("show_package1 %s", schema)
#schema = remove_check_replicated_custom_key(schema) schema = remove_check_replicated_custom_key(schema)
#log.debug("show_package1.5 no before %s", schema)
#schema.update(self._modify_package_schema(schema)) #schema.update(self._modify_package_schema(schema))
schema.update({ schema.update({
'extras': { 'extras': {
@ -274,7 +346,7 @@ class D4Science_ThemePlugin(plugins.SingletonPlugin, toolkit.DefaultDatasetForm)
'value': [toolkit.get_validator('not_missing')] 'value': [toolkit.get_validator('not_missing')]
} }
}) })
log.debug("show_package2 %s", schema) #log.debug("show_package2 %s", schema)
return schema return schema
#IDatasetForm #IDatasetForm
@ -424,7 +496,7 @@ class D4Science_ThemePlugin(plugins.SingletonPlugin, toolkit.DefaultDatasetForm)
#Overriding package_extras_save method #Overriding package_extras_save method
# Is this needed? # Is this needed?
# model_save.package_extras_save = _package_extras_save model_save.package_extras_save = _package_extras_save
#Overriding index home controller - rimosso in ckan 2.10 #Overriding index home controller - rimosso in ckan 2.10
#d4sHC = d4SHomeController() #d4sHC = d4SHomeController()
@ -432,6 +504,9 @@ class D4Science_ThemePlugin(plugins.SingletonPlugin, toolkit.DefaultDatasetForm)
global d4s_ctg_namespaces_controller global d4s_ctg_namespaces_controller
#OVERRIDING BASE SQL ALCHEMY ENGINE INSTANCE
TrackingMiddleware.__init__ = _init_TrackingMiddleware
#if d4s_ctg_namespaces_controller is None: #if d4s_ctg_namespaces_controller is None:
# log.info("d4s_ctg_namespaces_controller instancing...") # log.info("d4s_ctg_namespaces_controller instancing...")
# d4s_ctg_namespaces_controller = helpers.get_d4s_namespace_controller() # d4s_ctg_namespaces_controller = helpers.get_d4s_namespace_controller()
@ -567,4 +642,3 @@ class D4Science_ThemePlugin(plugins.SingletonPlugin, toolkit.DefaultDatasetForm)
return facets_dict return facets_dict