Compare commits

...

2 Commits

Author SHA1 Message Date
Alessio Fabrizio 925db5d411 fix: ⬆️ ckan spatial 2-10 support
changed the import according to spatial plugin 2.10-support branch
2024-12-11 12:24:08 +01:00
Alessio Fabrizio 5c483d60a6 feat: 🚧 update d4science customizations from python 2 to 3
updating python 2 code to python 3
2024-12-11 12:12:42 +01:00
1 changed files with 632 additions and 17 deletions

View File

@ -13,14 +13,31 @@ from ckan.plugins.core import SingletonPlugin
from ckanext.spatial.lib.csw_client import CswService
from ckanext.spatial.harvesters.csw import CSWHarvester
from ckanext.spatial.harvested_metadata import ISODocument
from ckanext.spatial.harvested_metadata import ISOElement
#from ckanext.spatial.harvested_metadata import ISODocument
#from ckanext.spatial.harvested_metadata import ISOElement
#in spatial branch 2.10-support ISOObjects are located Here
from ckanext.spatial.model.harvested_metadata import ISODocument
from ckanext.spatial.model.harvested_metadata import ISOElement
from ckan.logic import ValidationError, NotFound, get_action
from ckan.common import config
from datetime import datetime
#add dependencies updated to python3
import xml.etree.ElementTree as ElementTree
import urllib.request
import urllib.error
from ckanext.spatial.model.harvested_metadata import ISOResourceLocator
import re
from urllib.parse import urlparse
GIS_GEONETWORK_METADATA_SOURCE = 'gis_geonetwork:GN_Metadata_Source'
GIS_GEONETWORK_METADATA_SHOW = 'gis_geonetwork:GN_Metadata_Show'
GIS_GEONETWORK_GN_URL = 'gis_geonetwork:GN_URL'
log = logging.getLogger(__name__)
# Extend the ISODocument definitions by adding some more useful elements
@ -35,6 +52,72 @@ ISODocument.elements.append(
multiplicity="*",
))
# D4S MAPPING FOR AUTHOR IS THE ORGANIZATION NAME
log.info('GeoNetwork harvester: extending ISODocument with organisation-name-responsible-party')
ISODocument.elements.append(
ISOElement(
name="organisation-name-responsible-party",
search_paths=[
"gmd:contact/gmd:CI_ResponsibleParty/gmd:organisationName/gco:CharacterString/text()",
],
multiplicity="*",
))
# D4S MAPPING FOR MAINTAINERS
log.info('GeoNetwork harvester: extending ISODocument with identification-info-responsible-party')
ISODocument.elements.append(
ISOElement(
name="identification-info-responsible-party",
search_paths=[
"gmd:identificationInfo/gmd:MD_DataIdentification/gmd:pointOfContact/gmd:CI_ResponsibleParty",
],
multiplicity="*",
elements=[
ISOElement(
name="individualName",
search_paths=[
"gmd:individualName/gco:CharacterString/text()",
],
multiplicity="*",
),
ISOElement(
name="email",
search_paths=[
"gmd:contactInfo/gmd:CI_Contact/gmd:address/gmd:CI_Address/gmd:electronicMailAddress/gco:CharacterString/text()",
],
multiplicity="*",
),
ISOResourceLocator(
name="online-resource",
search_paths=[
"gmd:contactInfo/gmd:CI_Contact/gmd:onlineResource/gmd:CI_OnlineResource",
],
multiplicity="*",
),
]
))
# D4S MAPPING FOR MD_FeatureCatalogueDescription
ISODocument.elements.append(
ISOElement(
name="featurecataloguedescription-responsible-party",
search_paths=[
"gmd:contentInfo/gmd:MD_FeatureCatalogueDescription/gmd:featureCatalogueCitation/gmd:CI_Citation/gmd:citedResponsibleParty/gmd:CI_ResponsibleParty",
],
multiplicity="*",
elements=[
ISOResourceLocator(
name="online-resource",
search_paths=[
"gmd:contactInfo/gmd:CI_Contact/gmd:onlineResource/gmd:CI_OnlineResource",
],
multiplicity="*",
),
]
))
# Some old GN instances still uses the old GML URL
# We'll add more xpath for addressing this issue
log.info('GeoNetwork harvester: adding old GML URI')
@ -52,9 +135,78 @@ for element in ISODocument.elements:
element.search_paths.append(newpath)
log.info("Added old URI for gml to %s", element.name)
#custom classes
class Harvest_Session(dict):
def __missing__(self, key):
return None
class D4S_HTTP_Request_Util:
# Returns body response as string if the request is ok, None otherwise
@staticmethod
def get_response_body(uri, data=None, headers={}):
log.debug("Performing request to uri: %s" % uri)
log.debug("headers are: %s" % headers)
log.debug("data passed as body are: %s" % data)
req = urllib.request.Request(uri, data=data, headers=headers)
try:
resp = urllib.request.urlopen(req, timeout=2)
except urllib.error.HTTPError as e:
log.error("Error on contacting URI: %s" % uri)
log.error("HTTPError: %d" % e.code)
return None
except urllib.error.URLError as e:
log.error("URLError - Input URI: %s is not valid!!" % uri)
return None
else:
body = resp.read()
return body.decode("utf-8") # Decodifica per ottenere una stringa (UTF-8)
# Returns True if status of the http request is Successful 2xx, False otherwise
@staticmethod
def check_url(uri):
try:
status_code = urllib.request.urlopen(uri).getcode()
if 200 <= status_code <= 206:
return True
log.error("Error on contacting URI: %s" % uri)
return False
except urllib.error.HTTPError as e:
log.error("Error on contacting URI: %s" % uri)
log.error("HTTPError: %d" % e.code)
return False
except Exception as e:
log.error("check_url fail on: %s " % uri)
return False
class D4S_IS_Resource_Discovery:
gcubeTokenParam = "gcube-token"
def __init__(self, urlICProxy, resourceID, gcubeToken):
self.urlICProxy = urlICProxy
self.resourceID = resourceID
self.gcubeToken = gcubeToken
def performRequest(self):
uri = ""
try:
uri = f"{self.urlICProxy}/{self.resourceID}?{D4S_IS_Resource_Discovery.gcubeTokenParam}={self.gcubeToken}"
log.debug("Contacting URL: %s" % uri)
theResource = D4S_HTTP_Request_Util.get_response_body(uri)
log.debug("Resource returned %s " % theResource)
if theResource:
return ElementTree.XML(theResource)
else:
return None
except Exception as e:
log.error("Error on performing the request from uri: {}".format(uri))
log.debug("Returning None")
return None
class GeoNetworkHarvester(CSWHarvester, SingletonPlugin):
catalogue_resolver = None
harvest_session = Harvest_Session()
def info(self):
return {
'name': 'geonetwork',
@ -63,6 +215,408 @@ class GeoNetworkHarvester(CSWHarvester, SingletonPlugin):
'form_config_interface': 'Text'
}
#custom class methods
def add_geonetwork_informations_to_package(self, gn_localized_url, package_dict, harvest_object, harvest_session):
namespaces = {'ows': "http://www.opengis.net/ows", "xlink": "http://www.w3.org/1999/xlink"}
gn_url_session_key = self._get_session_key(harvest_object, "the_geonetwork_url")
if harvest_session.get(gn_url_session_key):
log.info('Harvest session for current job, the_geonetwork_url: %s, is_geonetwork_3: %s',
harvest_session.get(gn_url_session_key),
harvest_session.get(self._get_session_key(harvest_object, "is_geonetwork_3")))
return self._add_geoentworks_links(harvest_session.get(gn_url_session_key), package_dict, harvest_object,
harvest_session)
if gn_localized_url:
get_capabilities_request = "request=GetCapabilities&service=CSW&acceptVersions=2.0.2&acceptFormats=application%2Fxml"
resp = D4S_HTTP_Request_Util.get_response_body(gn_localized_url + "?" + get_capabilities_request)
if resp:
try:
# Decodifica il contenuto se necessario
if isinstance(resp, bytes):
resp = resp.decode("utf-8")
root = ElementTree.fromstring(resp)
geonetwork_url = None
for provideSites in root.findall('.//ows:ProviderSite', namespaces):
geonetwork_url = provideSites.get('{http://www.w3.org/1999/xlink}href')
log.info("Read geonetwork_url from GetCapabilities: %s", geonetwork_url)
if geonetwork_url:
parsedUrl = urlparse(geonetwork_url)
# Rimuovi le porte 80 o 8080
if parsedUrl.port in {80, 8080}:
geonetwork_url = f"{parsedUrl.scheme}://{parsedUrl.hostname}{parsedUrl.path}"
log.info("Removed port 80 or 8080 from geonetwork_url: %s", geonetwork_url)
if geonetwork_url:
harvest_session[gn_url_session_key] = geonetwork_url
is_gn3_session_key = self._get_session_key(harvest_object, "is_geonetwork_3")
harvest_session[is_gn3_session_key] = D4S_HTTP_Request_Util.check_url(
geonetwork_url + "/srv/eng/catalog.search#/home")
log.info('Updated Harvest session for current job %s', harvest_session)
package_dict = self._add_geoentworks_links(geonetwork_url, package_dict, harvest_object, harvest_session)
except Exception as err:
log.warning("No Geonetwork informations added!! Error on parsing the get capabilities: %s", err)
return package_dict
return package_dict
def _add_geoentworks_links(self, geonetwork_url, package_dict, harvest_object, harvest_session):
geonetwork_service_url = geonetwork_url + '/srv/en'
if harvest_session.get(self._get_session_key(harvest_object, "is_geonetwork_3")):
#TODO check if package_dict['extras'] is corret/works
log.debug('GN3 Service URL is %s', geonetwork_service_url)
package_dict['extras'].append({'key': GIS_GEONETWORK_GN_URL, 'value': geonetwork_url})
package_dict['extras'].append({'key': GIS_GEONETWORK_METADATA_SHOW,
'value': geonetwork_service_url + '/catalog.search#/metadata/' + harvest_object.guid})
package_dict['extras'].append({'key': GIS_GEONETWORK_METADATA_SOURCE,
'value': geonetwork_service_url + '/xml.metadata.get?uuid=' + harvest_object.guid})
else:
log.debug('GN2 Service URL is %s', geonetwork_service_url)
package_dict['extras'].append({'key': GIS_GEONETWORK_GN_URL, 'value': geonetwork_url})
package_dict['extras'].append({'key': GIS_GEONETWORK_METADATA_SHOW,
'value': geonetwork_service_url + '/metadata.show?uuid=' + harvest_object.guid})
package_dict['extras'].append({'key': GIS_GEONETWORK_METADATA_SOURCE,
'value': geonetwork_service_url + '/xml.metadata.get?uuid=' + harvest_object.guid})
return package_dict
def _get_session_key(self, harvest_object, key):
'''Returns a session key for the harvest job running'''
return harvest_object.job.id + "_key_" + key
# Added by Francesco Mangiacrapa
def add_item_url_to_package(self, gn_localized_url, package_dict):
log.debug("add_item_url_to_package")
set_item_url = config.get('ckan.d4science_theme.harvesting_set_item_url')
if set_item_url is not None and not set_item_url:
log.info("set item url configuration is False, returning")
return package_dict
if not self.catalogue_resolver:
urlICProxy = config.get('ckan.d4science_theme.ic_proxy_url') # "https://registry.d4science.org/icproxy/gcube/service"
resourceID = config.get('ckan.d4science_theme.ckandatacatalogue_resourceid') # CkanDataCatalogue GR
if not resourceID:
resourceID = "56ec4876-999f-4afc-a9e3-efbda5f5c8bc" # DEV CkanDataCatalogue GR
resourceID = "2e067010-3d97-11e8-bcb7-f39deee66c72" # PROD CkanDataCatalogue GR
log.warn("'ckan.d4science_theme.ckandatacatalogue_resourceid' not found into configuration. Hard-cabling CkanDataCatalogue GR resourceID: " + resourceID)
gcubeToken = config.get('ckan.d4science_theme.application_token') # The gCube Token
log.debug("urlICProxy: " + urlICProxy)
log.debug("resourceID: " + resourceID)
log.debug("gcubeToken: " + gcubeToken)
disc = D4S_IS_Resource_Discovery(urlICProxy, resourceID, gcubeToken)
response = disc.performRequest()
# print ElementTree.tostring(response)
if response:
the_end_points = response.xpath(
'/Resource/Profile/AccessPoint/Properties/Property/Name[text()="URL_RESOLVER"]/../Value')
if the_end_points:
self.catalogue_resolver = the_end_points[0].text
log.info("Found catalogue_resolver: %s" % self.catalogue_resolver)
if self.catalogue_resolver:
url_split = gn_localized_url.split("/")
the_scope = None
# ONLY IF THE URL IS A GEONETWORK RESOLVER LINK, I TRY TO GET THE (GCUBE) SCOPE
if len(url_split) >= 4 and not 'csw' in url_split:
the_scope = url_split[4]
the_scope = the_scope.replace("|", "/")
the_scope = the_scope.replace("%7C", "/")
the_scope = the_scope if the_scope.startswith("/") else "/" + the_scope
log.debug("Found the scope: %s" % the_scope)
the_item_url = None
if the_scope:
query = '{"gcube_scope" : "' + the_scope + '","entity_context" : "dataset", "entity_name" : "'+ package_dict["name"] + '"}'
headers = {"Content-Type": "application/json"}
the_item_url = D4S_HTTP_Request_Util.get_response_body(self.catalogue_resolver, query, headers)
try:
# Python 3 change: use urllib.request.urlopen instead of urllib2.urlopen, check extras
with urllib.request.urlopen(the_item_url) as response:
if response.getcode() == 200:
package_dict['extras'].append({'key': 'Item URL', 'value': the_item_url})
log.info("Added Item URL: %s" % the_item_url)
except Exception as inst:
log.warning(u"No Item URL added!! Error on performing the request from uri: {}".format(the_item_url))
return package_dict
return package_dict
def add_license_to_package(self, package_dict):
# Added by Francesco Mangiacrapa
k_license_id = 'license_id'
k_license_title = 'license_title'
k_harvest_license_id = 'ckan.d4science_theme.harvest_license_id'
k_harvest_license_title = 'ckan.d4science_theme.harvest_license_title'
try:
v_license_id = self.source_config.get(k_license_id)
log.debug(f'Read {k_license_id} as {v_license_id} from input configuration parameter')
if v_license_id is None:
v_license_id = config.get(k_harvest_license_id)
log.debug(f'Read {k_harvest_license_id} as {v_license_id} from production.ini')
if v_license_id is None:
v_license_id = 'CC-BY-SA-4.0'
log.debug(f'Using default {k_license_id} {v_license_id}')
v_license_title = self.source_config.get(k_license_title)
log.debug(f'Read {k_license_title} as {v_license_title} from input configuration parameter')
if v_license_title is None:
v_license_title = config.get(k_harvest_license_title)
log.debug(f'Read {k_harvest_license_title} as {v_license_title} from production.ini')
if v_license_title is None:
if v_license_id != 'CC-BY-SA-4.0':
v_license_title = 'Unknown License Title'
else:
v_license_title = 'Creative Commons Attribution Share-Alike 4.0'
log.debug(f'Using default {k_license_title} {v_license_title}')
licence_v = None
for e in package_dict['extras']:
if ('key' in e) and (e['key'] == 'licence'):
licence_v = e['value']
log.debug(f'licence value in extra field has value {licence_v}')
break
if licence_v is None or not licence_v or len(licence_v) == 0 or licence_v == '[]':
package_dict[k_license_id] = v_license_id
package_dict[k_license_title] = v_license_title
log.debug(f'license_id has value: {package_dict[k_license_id]}')
log.debug(f'license_title has value: {package_dict[k_license_title]}')
except Exception as inst:
log.warning(f"Impossible to add the license_id: {str(inst)}")
return package_dict
return package_dict
def add_systemtype_to_package(self, package_dict):
# ADDED BY FRANCESCO MANGIACRAPRA
# Task #8726
k = self.source_config.get('systemtypefield')
v = self.source_config.get('systemtypevalue')
log.debug(f'Read systemtypefield {k} from input configuration parameter')
if k is None:
k = config.get('ckan.d4science_theme.systemtypefield')
log.debug(f'Read ckan.d4science_theme.systemtypefield {k} from production.ini')
if k is None:
k = 'system:type'
log.debug(f'Using default systemtypefield {k}')
log.debug(f'Read systemtypevalue {v} from input configuration parameter')
if v is None:
# Task #9281
for e in package_dict['extras']:
''' Setting system:type based on "gmd:hierarchyLevel/gmd:MD_ScopeCode" the codelist codes.
# ISO 19139 XML defines an extended list of scopes (GMX codelist).
Sticking with this codelist, "process" is not included but the generic "service" should be used IMHO.
There is also "software" that may be applicable. Here are the values from this codelist: attribute ,attributeType ,collectionHardware ,collectionSession ,
dataset ,series ,nonGeographicDataset ,dimensionGroup ,feature ,featureType ,propertyType ,fieldSession ,software ,service ,model ,tile ,initiative ,stereomate ,
sensor ,platformSeries ,sensorSeries ,productionSeries ,transferAggregate ,otherAggregate'''
if ('key' in e) and (e['key'] == 'resource-type'):
v = e['value']
log.debug(f'resource-type value in extra field has value {v}')
break
if v is None or not v or len(v) == 0:
v = config.get('ckan.d4science_theme.harvestingsystemtypevalue')
log.debug(f'Read ckan.d4science_theme.harvestingsystemtypevalue {v} from production.ini')
if v is None or not v:
v = 'Dataset'
log.debug(f'Using hard-coded value for system:type {v}')
v = v.capitalize()
log.debug(f"adding key: {k} capitalized value: {v}")
package_dict['extras'].append({'key': k, 'value': v})
return package_dict
# Added by Francesco Mangiacrapa
def infer_authors(self, values):
authors = []
for author in values["organisation-name-responsible-party"]:
if author not in authors:
authors.append(author)
log.info(f"{len(authors)} AUTHOR found as OrganisationName in ResponsibleParty")
return authors
def infer_point_of_contacts(self, values):
point_of_contacts = []
for responsible_party in values["identification-info-responsible-party"]:
point_of_contact = {}
log.debug(f"responsible_party: {responsible_party}")
if 'individualName' in responsible_party:
if responsible_party['individualName']:
point_of_contact['name'] = ', '.join(responsible_party['individualName'])
if 'email' in responsible_party:
if responsible_party['email']:
point_of_contact['email'] = ', '.join(responsible_party['email'])
log.debug(f"Adding point_of_contact: {point_of_contact}")
point_of_contacts.append(point_of_contact)
log.info(f"{len(point_of_contacts)} Point of Contact/s found as Contact-Info in ResponsibleParty")
return point_of_contacts
# Added by Francesco Mangiacrapa
def infer_featurecataloguedescription_iso110(self, values):
feature_catalogue_descrs = []
for feature_catalogue in values["featurecataloguedescription-responsible-party"]:
feature_catalogue_field = {}
log.debug(f"Feature catalogue description: {feature_catalogue}")
if 'online-resource' in feature_catalogue:
feature_resource_list = feature_catalogue['online-resource']
log.debug(f"Feature resource list is: {feature_resource_list}")
for feature_resource in feature_resource_list:
log.debug(f"Feature resource is: {feature_resource}")
if 'url' in feature_resource:
feature_catalogue_field['url'] = feature_resource['url']
if 'description' in feature_resource:
feature_catalogue_field['description'] = feature_resource['description']
if 'name' in feature_resource:
feature_catalogue_field['name'] = feature_resource['name']
else:
feature_catalogue_field['name'] = "Unnamed Resource"
log.debug(f"Adding feature catalogue description: {feature_catalogue_field}")
feature_catalogue_descrs.append(feature_catalogue_field)
log.info(f"{len(feature_catalogue_descrs)} Feature Catalogue description added")
return feature_catalogue_descrs
def add_topic_category_to_group(self, iso_values, package_dict, the_user='harvest'):
add_iso_categories = self.source_config.get('add_topic_category_to_group')
if add_iso_categories == "False":
return package_dict
if 'topic-category' in iso_values:
log.debug('iso_values contains topic category')
topic_category_values = ""
if len(iso_values['topic-category']) > 0:
topic_category_values = ", ".join(iso_values['topic-category'])
package_dict['extras'].append({'key': 'topic_category', 'value': topic_category_values})
log.debug(f'topic_category: {topic_category_values}')
# Adding to groups if the group as iso-category already exists
for iso_cat in iso_values['topic-category']:
log.debug(f"Adding topic_category: {iso_cat} as group")
isocat_name_lower = iso_cat.lower()
if 'groups' in package_dict:
group_found = False
for group in package_dict['groups']:
if group['name'].lower() == isocat_name_lower:
group_found = True
break
if not group_found:
context = {'model': model, 'session': Session, 'user': the_user}
try:
data_dict = {'id': isocat_name_lower}
get_action('group_show')(context, data_dict)
package_dict['groups'].append({'name': isocat_name_lower})
log.info(f'Dataset added to group: {isocat_name_lower}')
except NotFound as e:
log.warning(f'Group {iso_cat} from category {iso_cat} is not available: {e}')
else:
log.debug(f'creating groups into dictionary and assigning to group: {iso_cat}')
package_dict['groups'] = [{'name': isocat_name_lower}]
else:
log.info('iso_values does not contain topic category')
if 'groups' in package_dict:
log.info(f'groups are: {package_dict["groups"]}')
return package_dict
def add_as_resources(self, package_dict, resource_list):
if not resource_list:
return package_dict
for resource in resource_list:
the_resource = {}
if 'name' in resource:
the_resource['name'] = resource['name']
if 'description' in resource:
the_resource['description'] = resource['description']
if 'url' in resource:
the_resource['url'] = resource['url']
if 'format' in resource:
the_resource['format'] = resource['format']
package_dict['resources'].append(the_resource)
log.info(f'Added resource {the_resource} to resources')
return package_dict
# Added by Francesco Mangiacrapa
def add_as_tag(self, package_dict, tag_list):
if not tag_list:
return package_dict
for tag_name in tag_list:
package_dict['tags'].append({'name': tag_name})
log.info(f'Added tag {tag_name}')
return package_dict
# Added by Francesco Mangiacrapa
def validate_tags(self, package_dict):
new_tags = []
# log.info("New Tags working")
if package_dict['tags']:
# log.info("Tags are: %s"%package_dict['tags'])
check_duplicated_keys = {}
for tag in package_dict['tags']:
try:
if tag['name'] and tag['name'] not in check_duplicated_keys:
check_duplicated_keys[tag['name']] = 1
purged_tag_name = re.sub('[^A-Za-z0-9 ._-]+', ' ', tag['name'])
purged_tag_name = purged_tag_name.strip()
new_tags.append({'name': purged_tag_name})
# log.info(f"New Tag: {purged_tag_name}")
else:
log.info(f"Skipping None or Duplicated Tag: {tag['name']}")
except Exception as e: # Use 'as' for exception handling in Python 3
log.error(f"error validating tag: {e}")
if len(new_tags) > 0:
package_dict['tags'] = new_tags
# log.info(f"New Tags are: {package_dict['tags']}")
return package_dict
def get_package_dict(self, iso_values, harvest_object):
package_dict = super(GeoNetworkHarvester, self).get_package_dict(iso_values, harvest_object)
@ -92,21 +646,81 @@ class GeoNetworkHarvester(CSWHarvester, SingletonPlugin):
harvest_job_id=str(harvest_object.job.id),
harvest_object_id=str(harvest_object.id),
guid=str(harvest_object.guid))
package_dict['extras'].append({'key': key, 'value': value})
#add check
if key != "contact-email":
package_dict['extras'].append({'key': key, 'value': value})
else:
log.debug('Skipping existing extra %s', key)
# Add GeoNetwork specific extras
gn_localized_url = harvest_object.job.source.url.strip('/')
if gn_localized_url[-3:] == 'csw':
gn_localized_url = gn_localized_url[:-3]
# this code is not needed(?)
#if gn_localized_url[-3:] == 'csw':
# gn_localized_url = gn_localized_url[:-3]
log.debug('GN localized URL %s', gn_localized_url)
#log.debug('Package dict is %r ', package_dict['extras'])
package_dict['extras'].append({'key': 'gn_view_metadata_url', 'value': gn_localized_url + '/metadata.show?uuid=' + harvest_object.guid})
package_dict['extras'].append({'key': 'gn_localized_url', 'value': gn_localized_url})
# not in d4science but in geonetwork 2.10
#package_dict['extras'].append({'key': 'gn_view_metadata_url', 'value': gn_localized_url + '/metadata.show?uuid=' + harvest_object.guid})
#package_dict['extras'].append({'key': 'gn_localized_url', 'value': gn_localized_url})
#d4science code
package_dict = self.add_geonetwork_informations_to_package(gn_localized_url, package_dict, harvest_object, self.harvest_session)
package_dict = self.add_item_url_to_package(gn_localized_url, package_dict)
authors = self.infer_authors(iso_values)
# Adding Authors
if authors:
package_dict['author'] = ",".join(authors)
log.debug(f'Author/s are: {package_dict["author"]}')
# Adding Logged User as Maintainer
user_logged = super(GeoNetworkHarvester, self)._get_user_name()
if user_logged:
package_dict['maintainer'] = user_logged
main_mail = config.get('ckan.admin_email')
if main_mail:
package_dict['maintainer_email'] = main_mail
# Adding Point of Contacts
point_of_contacts = self.infer_point_of_contacts(iso_values)
for idx, item in enumerate(point_of_contacts):
log.debug(f"point_of_contact: {item}")
poc_value = ''
if 'name' in item:
poc_value = item['name']
if 'email' in item:
if len(poc_value) > 1:
poc_value += f', {item["email"]}'
else:
poc_value += item['email']
poc_key = 'point_of_contact'
if len(point_of_contacts) > 1:
poc_key += f' {idx + 1}'
if poc_value and len(poc_value) > 1:
package_dict['extras'].append({'key': poc_key, 'value': poc_value})
log.debug(f'Added point of contact: {poc_key} {poc_value}')
package_dict = self.add_topic_category_to_group(iso_values, package_dict, user_logged)
package_dict = self.add_systemtype_to_package(package_dict)
package_dict = self.add_license_to_package(package_dict)
fc_descriptions = self.infer_featurecataloguedescription_iso110(iso_values)
# Adding List of Feature Catalogue Description as CKAN RESOURCES
package_dict = self.add_as_resources(package_dict, fc_descriptions)
# If the previous list is not empty, It adds the label 'Feature Catalogue Resource' as CKAN TAG
if len(fc_descriptions) > 0:
package_dict = self.add_as_tag(package_dict, ['Feature Catalog Resource'])
package_dict = self.validate_tags(package_dict)
# Add other elements from ISO metadata
time_extents = self.infer_timeinstants(iso_values)
@ -193,14 +807,15 @@ class GeoNetworkHarvester(CSWHarvester, SingletonPlugin):
def fix_resource_type(self, resources):
for resource in resources:
if 'OGC:WMS' in resource['resource_locator_protocol']:
resource['format'] = 'wms'
if 'resource_locator_protocol' in resource: #added check
if 'OGC:WMS' in resource['resource_locator_protocol']:
resource['format'] = 'wms'
if config.get('ckanext.spatial.harvest.validate_wms', False):
# Check if the service is a view service
url = resource['url']
test_url = url.split('?')[0] if '?' in url else url
if self._is_wms(test_url):
resource['verified'] = True
resource['verified_date'] = datetime.now().isoformat()
if config.get('ckanext.spatial.harvest.validate_wms', False):
# Check if the service is a view service
url = resource['url']
test_url = url.split('?')[0] if '?' in url else url
if self._is_wms(test_url):
resource['verified'] = True
resource['verified_date'] = datetime.now().isoformat()