diff --git a/ckanext/geonetwork/harvesters/geonetwork.py b/ckanext/geonetwork/harvesters/geonetwork.py index 3d2d050..5dd2b4a 100644 --- a/ckanext/geonetwork/harvesters/geonetwork.py +++ b/ckanext/geonetwork/harvesters/geonetwork.py @@ -21,6 +21,20 @@ from ckan.logic import ValidationError, NotFound, get_action from ckan.common import config from datetime import datetime +#add dependencies updated to python3 +import xml.etree.ElementTree as ElementTree +import urllib.request +import urllib.error +from ckanext.spatial.model import ISOResourceLocator +import re +from urllib.parse import urlparse + +GIS_GEONETWORK_METADATA_SOURCE = 'gis_geonetwork:GN_Metadata_Source' + +GIS_GEONETWORK_METADATA_SHOW = 'gis_geonetwork:GN_Metadata_Show' + +GIS_GEONETWORK_GN_URL = 'gis_geonetwork:GN_URL' + log = logging.getLogger(__name__) # Extend the ISODocument definitions by adding some more useful elements @@ -35,6 +49,72 @@ ISODocument.elements.append( multiplicity="*", )) +# D4S MAPPING FOR AUTHOR IS THE ORGANIZATION NAME +log.info('GeoNetwork harvester: extending ISODocument with organisation-name-responsible-party') +ISODocument.elements.append( + ISOElement( + name="organisation-name-responsible-party", + search_paths=[ + "gmd:contact/gmd:CI_ResponsibleParty/gmd:organisationName/gco:CharacterString/text()", + ], + multiplicity="*", + )) + +# D4S MAPPING FOR MAINTAINERS +log.info('GeoNetwork harvester: extending ISODocument with identification-info-responsible-party') +ISODocument.elements.append( + ISOElement( + name="identification-info-responsible-party", + search_paths=[ + "gmd:identificationInfo/gmd:MD_DataIdentification/gmd:pointOfContact/gmd:CI_ResponsibleParty", + ], + multiplicity="*", + elements=[ + ISOElement( + name="individualName", + search_paths=[ + "gmd:individualName/gco:CharacterString/text()", + ], + multiplicity="*", + ), + ISOElement( + name="email", + search_paths=[ + "gmd:contactInfo/gmd:CI_Contact/gmd:address/gmd:CI_Address/gmd:electronicMailAddress/gco:CharacterString/text()", + ], + multiplicity="*", + ), + ISOResourceLocator( + name="online-resource", + search_paths=[ + "gmd:contactInfo/gmd:CI_Contact/gmd:onlineResource/gmd:CI_OnlineResource", + ], + multiplicity="*", + ), + + ] + )) + +# D4S MAPPING FOR MD_FeatureCatalogueDescription +ISODocument.elements.append( + ISOElement( + name="featurecataloguedescription-responsible-party", + search_paths=[ + "gmd:contentInfo/gmd:MD_FeatureCatalogueDescription/gmd:featureCatalogueCitation/gmd:CI_Citation/gmd:citedResponsibleParty/gmd:CI_ResponsibleParty", + ], + multiplicity="*", + elements=[ + ISOResourceLocator( + name="online-resource", + search_paths=[ + "gmd:contactInfo/gmd:CI_Contact/gmd:onlineResource/gmd:CI_OnlineResource", + ], + multiplicity="*", + ), + + ] + )) + # Some old GN instances still uses the old GML URL # We'll add more xpath for addressing this issue log.info('GeoNetwork harvester: adding old GML URI') @@ -52,9 +132,78 @@ for element in ISODocument.elements: element.search_paths.append(newpath) log.info("Added old URI for gml to %s", element.name) +#custom classes +class Harvest_Session(dict): + def __missing__(self, key): + return None + +class D4S_HTTP_Request_Util: + # Returns body response as string if the request is ok, None otherwise + @staticmethod + def get_response_body(uri, data=None, headers={}): + log.debug("Performing request to uri: %s" % uri) + log.debug("headers are: %s" % headers) + log.debug("data passed as body are: %s" % data) + req = urllib.request.Request(uri, data=data, headers=headers) + try: + resp = urllib.request.urlopen(req, timeout=2) + except urllib.error.HTTPError as e: + log.error("Error on contacting URI: %s" % uri) + log.error("HTTPError: %d" % e.code) + return None + except urllib.error.URLError as e: + log.error("URLError - Input URI: %s is not valid!!" % uri) + return None + else: + body = resp.read() + return body.decode("utf-8") # Decodifica per ottenere una stringa (UTF-8) + + # Returns True if status of the http request is Successful 2xx, False otherwise + @staticmethod + def check_url(uri): + try: + status_code = urllib.request.urlopen(uri).getcode() + if 200 <= status_code <= 206: + return True + + log.error("Error on contacting URI: %s" % uri) + return False + except urllib.error.HTTPError as e: + log.error("Error on contacting URI: %s" % uri) + log.error("HTTPError: %d" % e.code) + return False + except Exception as e: + log.error("check_url fail on: %s " % uri) + return False + +class D4S_IS_Resource_Discovery: + gcubeTokenParam = "gcube-token" + + def __init__(self, urlICProxy, resourceID, gcubeToken): + self.urlICProxy = urlICProxy + self.resourceID = resourceID + self.gcubeToken = gcubeToken + + def performRequest(self): + uri = "" + try: + uri = f"{self.urlICProxy}/{self.resourceID}?{D4S_IS_Resource_Discovery.gcubeTokenParam}={self.gcubeToken}" + log.debug("Contacting URL: %s" % uri) + theResource = D4S_HTTP_Request_Util.get_response_body(uri) + log.debug("Resource returned %s " % theResource) + if theResource: + return ElementTree.XML(theResource) + else: + return None + except Exception as e: + log.error("Error on performing the request from uri: {}".format(uri)) + log.debug("Returning None") + return None class GeoNetworkHarvester(CSWHarvester, SingletonPlugin): - + catalogue_resolver = None + harvest_session = Harvest_Session() + def info(self): return { 'name': 'geonetwork', @@ -63,6 +212,408 @@ class GeoNetworkHarvester(CSWHarvester, SingletonPlugin): 'form_config_interface': 'Text' } + #custom class methods + def add_geonetwork_informations_to_package(self, gn_localized_url, package_dict, harvest_object, harvest_session): + namespaces = {'ows': "http://www.opengis.net/ows", "xlink": "http://www.w3.org/1999/xlink"} + + gn_url_session_key = self._get_session_key(harvest_object, "the_geonetwork_url") + + if harvest_session.get(gn_url_session_key): + log.info('Harvest session for current job, the_geonetwork_url: %s, is_geonetwork_3: %s', + harvest_session.get(gn_url_session_key), + harvest_session.get(self._get_session_key(harvest_object, "is_geonetwork_3"))) + return self._add_geoentworks_links(harvest_session.get(gn_url_session_key), package_dict, harvest_object, + harvest_session) + + if gn_localized_url: + get_capabilities_request = "request=GetCapabilities&service=CSW&acceptVersions=2.0.2&acceptFormats=application%2Fxml" + resp = D4S_HTTP_Request_Util.get_response_body(gn_localized_url + "?" + get_capabilities_request) + + if resp: + try: + # Decodifica il contenuto se necessario + if isinstance(resp, bytes): + resp = resp.decode("utf-8") + + root = ElementTree.fromstring(resp) + + geonetwork_url = None + for provideSites in root.findall('.//ows:ProviderSite', namespaces): + geonetwork_url = provideSites.get('{http://www.w3.org/1999/xlink}href') + log.info("Read geonetwork_url from GetCapabilities: %s", geonetwork_url) + if geonetwork_url: + parsedUrl = urlparse(geonetwork_url) + # Rimuovi le porte 80 o 8080 + if parsedUrl.port in {80, 8080}: + geonetwork_url = f"{parsedUrl.scheme}://{parsedUrl.hostname}{parsedUrl.path}" + log.info("Removed port 80 or 8080 from geonetwork_url: %s", geonetwork_url) + + if geonetwork_url: + harvest_session[gn_url_session_key] = geonetwork_url + is_gn3_session_key = self._get_session_key(harvest_object, "is_geonetwork_3") + harvest_session[is_gn3_session_key] = D4S_HTTP_Request_Util.check_url( + geonetwork_url + "/srv/eng/catalog.search#/home") + log.info('Updated Harvest session for current job %s', harvest_session) + package_dict = self._add_geoentworks_links(geonetwork_url, package_dict, harvest_object, harvest_session) + + except Exception as err: + log.warning("No Geonetwork informations added!! Error on parsing the get capabilities: %s", err) + return package_dict + + return package_dict + + def _add_geoentworks_links(self, geonetwork_url, package_dict, harvest_object, harvest_session): + + geonetwork_service_url = geonetwork_url + '/srv/en' + + if harvest_session.get(self._get_session_key(harvest_object, "is_geonetwork_3")): + + #TODO check if package_dict['extras'] is corret/works + log.debug('GN3 Service URL is %s', geonetwork_service_url) + package_dict['extras'].append({'key': GIS_GEONETWORK_GN_URL, 'value': geonetwork_url}) + package_dict['extras'].append({'key': GIS_GEONETWORK_METADATA_SHOW, + 'value': geonetwork_service_url + '/catalog.search#/metadata/' + harvest_object.guid}) + package_dict['extras'].append({'key': GIS_GEONETWORK_METADATA_SOURCE, + 'value': geonetwork_service_url + '/xml.metadata.get?uuid=' + harvest_object.guid}) + else: + + log.debug('GN2 Service URL is %s', geonetwork_service_url) + package_dict['extras'].append({'key': GIS_GEONETWORK_GN_URL, 'value': geonetwork_url}) + package_dict['extras'].append({'key': GIS_GEONETWORK_METADATA_SHOW, + 'value': geonetwork_service_url + '/metadata.show?uuid=' + harvest_object.guid}) + package_dict['extras'].append({'key': GIS_GEONETWORK_METADATA_SOURCE, + 'value': geonetwork_service_url + '/xml.metadata.get?uuid=' + harvest_object.guid}) + + return package_dict + + def _get_session_key(self, harvest_object, key): + '''Returns a session key for the harvest job running''' + return harvest_object.job.id + "_key_" + key + + # Added by Francesco Mangiacrapa + def add_item_url_to_package(self, gn_localized_url, package_dict): + log.debug("add_item_url_to_package") + + set_item_url = config.get('ckan.d4science_theme.harvesting_set_item_url') + + if set_item_url is not None and not set_item_url: + log.info("set item url configuration is False, returning") + return package_dict + + if not self.catalogue_resolver: + + urlICProxy = config.get('ckan.d4science_theme.ic_proxy_url') # "https://registry.d4science.org/icproxy/gcube/service" + resourceID = config.get('ckan.d4science_theme.ckandatacatalogue_resourceid') # CkanDataCatalogue GR + if not resourceID: + resourceID = "56ec4876-999f-4afc-a9e3-efbda5f5c8bc" # DEV CkanDataCatalogue GR + resourceID = "2e067010-3d97-11e8-bcb7-f39deee66c72" # PROD CkanDataCatalogue GR + log.warn("'ckan.d4science_theme.ckandatacatalogue_resourceid' not found into configuration. Hard-cabling CkanDataCatalogue GR resourceID: " + resourceID) + + gcubeToken = config.get('ckan.d4science_theme.application_token') # The gCube Token + + log.debug("urlICProxy: " + urlICProxy) + log.debug("resourceID: " + resourceID) + log.debug("gcubeToken: " + gcubeToken) + + disc = D4S_IS_Resource_Discovery(urlICProxy, resourceID, gcubeToken) + response = disc.performRequest() + # print ElementTree.tostring(response) + + if response: + the_end_points = response.xpath( + '/Resource/Profile/AccessPoint/Properties/Property/Name[text()="URL_RESOLVER"]/../Value') + + if the_end_points: + self.catalogue_resolver = the_end_points[0].text + + log.info("Found catalogue_resolver: %s" % self.catalogue_resolver) + + if self.catalogue_resolver: + + url_split = gn_localized_url.split("/") + + the_scope = None + # ONLY IF THE URL IS A GEONETWORK RESOLVER LINK, I TRY TO GET THE (GCUBE) SCOPE + if len(url_split) >= 4 and not 'csw' in url_split: + the_scope = url_split[4] + the_scope = the_scope.replace("|", "/") + the_scope = the_scope.replace("%7C", "/") + the_scope = the_scope if the_scope.startswith("/") else "/" + the_scope + + log.debug("Found the scope: %s" % the_scope) + + the_item_url = None + + if the_scope: + query = '{"gcube_scope" : "' + the_scope + '","entity_context" : "dataset", "entity_name" : "'+ package_dict["name"] + '"}' + headers = {"Content-Type": "application/json"} + the_item_url = D4S_HTTP_Request_Util.get_response_body(self.catalogue_resolver, query, headers) + + try: + # Python 3 change: use urllib.request.urlopen instead of urllib2.urlopen, check extras + with urllib.request.urlopen(the_item_url) as response: + if response.getcode() == 200: + package_dict['extras'].append({'key': 'Item URL', 'value': the_item_url}) + log.info("Added Item URL: %s" % the_item_url) + except Exception as inst: + log.warning(u"No Item URL added!! Error on performing the request from uri: {}".format(the_item_url)) + return package_dict + + return package_dict + + def add_license_to_package(self, package_dict): + # Added by Francesco Mangiacrapa + k_license_id = 'license_id' + k_license_title = 'license_title' + k_harvest_license_id = 'ckan.d4science_theme.harvest_license_id' + k_harvest_license_title = 'ckan.d4science_theme.harvest_license_title' + + try: + v_license_id = self.source_config.get(k_license_id) + log.debug(f'Read {k_license_id} as {v_license_id} from input configuration parameter') + if v_license_id is None: + v_license_id = config.get(k_harvest_license_id) + log.debug(f'Read {k_harvest_license_id} as {v_license_id} from production.ini') + if v_license_id is None: + v_license_id = 'CC-BY-SA-4.0' + log.debug(f'Using default {k_license_id} {v_license_id}') + + v_license_title = self.source_config.get(k_license_title) + log.debug(f'Read {k_license_title} as {v_license_title} from input configuration parameter') + if v_license_title is None: + v_license_title = config.get(k_harvest_license_title) + log.debug(f'Read {k_harvest_license_title} as {v_license_title} from production.ini') + if v_license_title is None: + if v_license_id != 'CC-BY-SA-4.0': + v_license_title = 'Unknown License Title' + else: + v_license_title = 'Creative Commons Attribution Share-Alike 4.0' + + log.debug(f'Using default {k_license_title} {v_license_title}') + + licence_v = None + for e in package_dict['extras']: + if ('key' in e) and (e['key'] == 'licence'): + licence_v = e['value'] + log.debug(f'licence value in extra field has value {licence_v}') + break + + if licence_v is None or not licence_v or len(licence_v) == 0 or licence_v == '[]': + package_dict[k_license_id] = v_license_id + package_dict[k_license_title] = v_license_title + log.debug(f'license_id has value: {package_dict[k_license_id]}') + log.debug(f'license_title has value: {package_dict[k_license_title]}') + + except Exception as inst: + log.warning(f"Impossible to add the license_id: {str(inst)}") + return package_dict + + return package_dict + + def add_systemtype_to_package(self, package_dict): + + # ADDED BY FRANCESCO MANGIACRAPRA + # Task #8726 + k = self.source_config.get('systemtypefield') + v = self.source_config.get('systemtypevalue') + + log.debug(f'Read systemtypefield {k} from input configuration parameter') + if k is None: + k = config.get('ckan.d4science_theme.systemtypefield') + log.debug(f'Read ckan.d4science_theme.systemtypefield {k} from production.ini') + if k is None: + k = 'system:type' + log.debug(f'Using default systemtypefield {k}') + + log.debug(f'Read systemtypevalue {v} from input configuration parameter') + if v is None: + # Task #9281 + for e in package_dict['extras']: + ''' Setting system:type based on "gmd:hierarchyLevel/gmd:MD_ScopeCode" the codelist codes. + # ISO 19139 XML defines an extended list of scopes (GMX codelist). + Sticking with this codelist, "process" is not included but the generic "service" should be used IMHO. + There is also "software" that may be applicable. Here are the values from this codelist: attribute ,attributeType ,collectionHardware ,collectionSession , + dataset ,series ,nonGeographicDataset ,dimensionGroup ,feature ,featureType ,propertyType ,fieldSession ,software ,service ,model ,tile ,initiative ,stereomate , + sensor ,platformSeries ,sensorSeries ,productionSeries ,transferAggregate ,otherAggregate''' + + if ('key' in e) and (e['key'] == 'resource-type'): + v = e['value'] + log.debug(f'resource-type value in extra field has value {v}') + break + + if v is None or not v or len(v) == 0: + v = config.get('ckan.d4science_theme.harvestingsystemtypevalue') + log.debug(f'Read ckan.d4science_theme.harvestingsystemtypevalue {v} from production.ini') + if v is None or not v: + v = 'Dataset' + log.debug(f'Using hard-coded value for system:type {v}') + + v = v.capitalize() + log.debug(f"adding key: {k} capitalized value: {v}") + package_dict['extras'].append({'key': k, 'value': v}) + return package_dict + + # Added by Francesco Mangiacrapa + def infer_authors(self, values): + authors = [] + + for author in values["organisation-name-responsible-party"]: + if author not in authors: + authors.append(author) + + log.info(f"{len(authors)} AUTHOR found as OrganisationName in ResponsibleParty") + return authors + def infer_point_of_contacts(self, values): + point_of_contacts = [] + for responsible_party in values["identification-info-responsible-party"]: + point_of_contact = {} + log.debug(f"responsible_party: {responsible_party}") + + if 'individualName' in responsible_party: + if responsible_party['individualName']: + point_of_contact['name'] = ', '.join(responsible_party['individualName']) + + if 'email' in responsible_party: + if responsible_party['email']: + point_of_contact['email'] = ', '.join(responsible_party['email']) + + log.debug(f"Adding point_of_contact: {point_of_contact}") + point_of_contacts.append(point_of_contact) + + log.info(f"{len(point_of_contacts)} Point of Contact/s found as Contact-Info in ResponsibleParty") + return point_of_contacts + + # Added by Francesco Mangiacrapa + def infer_featurecataloguedescription_iso110(self, values): + feature_catalogue_descrs = [] + for feature_catalogue in values["featurecataloguedescription-responsible-party"]: + feature_catalogue_field = {} + log.debug(f"Feature catalogue description: {feature_catalogue}") + + if 'online-resource' in feature_catalogue: + feature_resource_list = feature_catalogue['online-resource'] + log.debug(f"Feature resource list is: {feature_resource_list}") + + for feature_resource in feature_resource_list: + log.debug(f"Feature resource is: {feature_resource}") + if 'url' in feature_resource: + feature_catalogue_field['url'] = feature_resource['url'] + if 'description' in feature_resource: + feature_catalogue_field['description'] = feature_resource['description'] + if 'name' in feature_resource: + feature_catalogue_field['name'] = feature_resource['name'] + else: + feature_catalogue_field['name'] = "Unnamed Resource" + + log.debug(f"Adding feature catalogue description: {feature_catalogue_field}") + feature_catalogue_descrs.append(feature_catalogue_field) + + log.info(f"{len(feature_catalogue_descrs)} Feature Catalogue description added") + return feature_catalogue_descrs + + def add_topic_category_to_group(self, iso_values, package_dict, the_user='harvest'): + add_iso_categories = self.source_config.get('add_topic_category_to_group') + if add_iso_categories == "False": + return package_dict + + if 'topic-category' in iso_values: + log.debug('iso_values contains topic category') + topic_category_values = "" + if len(iso_values['topic-category']) > 0: + topic_category_values = ", ".join(iso_values['topic-category']) + + package_dict['extras'].append({'key': 'topic_category', 'value': topic_category_values}) + log.debug(f'topic_category: {topic_category_values}') + + # Adding to groups if the group as iso-category already exists + for iso_cat in iso_values['topic-category']: + log.debug(f"Adding topic_category: {iso_cat} as group") + isocat_name_lower = iso_cat.lower() + if 'groups' in package_dict: + group_found = False + for group in package_dict['groups']: + if group['name'].lower() == isocat_name_lower: + group_found = True + break + + if not group_found: + context = {'model': model, 'session': Session, 'user': the_user} + try: + data_dict = {'id': isocat_name_lower} + get_action('group_show')(context, data_dict) + package_dict['groups'].append({'name': isocat_name_lower}) + log.info(f'Dataset added to group: {isocat_name_lower}') + except NotFound as e: + log.warning(f'Group {iso_cat} from category {iso_cat} is not available: {e}') + else: + log.debug(f'creating groups into dictionary and assigning to group: {iso_cat}') + package_dict['groups'] = [{'name': isocat_name_lower}] + else: + log.info('iso_values does not contain topic category') + + if 'groups' in package_dict: + log.info(f'groups are: {package_dict["groups"]}') + + return package_dict + + def add_as_resources(self, package_dict, resource_list): + if not resource_list: + return package_dict + + for resource in resource_list: + the_resource = {} + if 'name' in resource: + the_resource['name'] = resource['name'] + if 'description' in resource: + the_resource['description'] = resource['description'] + if 'url' in resource: + the_resource['url'] = resource['url'] + if 'format' in resource: + the_resource['format'] = resource['format'] + + package_dict['resources'].append(the_resource) + log.info(f'Added resource {the_resource} to resources') + + return package_dict + + # Added by Francesco Mangiacrapa + def add_as_tag(self, package_dict, tag_list): + if not tag_list: + return package_dict + + for tag_name in tag_list: + package_dict['tags'].append({'name': tag_name}) + log.info(f'Added tag {tag_name}') + + return package_dict + + # Added by Francesco Mangiacrapa + def validate_tags(self, package_dict): + new_tags = [] + # log.info("New Tags working") + if package_dict['tags']: + # log.info("Tags are: %s"%package_dict['tags']) + check_duplicated_keys = {} + for tag in package_dict['tags']: + try: + if tag['name'] and tag['name'] not in check_duplicated_keys: + check_duplicated_keys[tag['name']] = 1 + purged_tag_name = re.sub('[^A-Za-z0-9 ._-]+', ' ', tag['name']) + purged_tag_name = purged_tag_name.strip() + new_tags.append({'name': purged_tag_name}) + # log.info(f"New Tag: {purged_tag_name}") + else: + log.info(f"Skipping None or Duplicated Tag: {tag['name']}") + + except Exception as e: # Use 'as' for exception handling in Python 3 + log.error(f"error validating tag: {e}") + + if len(new_tags) > 0: + package_dict['tags'] = new_tags + # log.info(f"New Tags are: {package_dict['tags']}") + + return package_dict + def get_package_dict(self, iso_values, harvest_object): package_dict = super(GeoNetworkHarvester, self).get_package_dict(iso_values, harvest_object) @@ -92,21 +643,81 @@ class GeoNetworkHarvester(CSWHarvester, SingletonPlugin): harvest_job_id=str(harvest_object.job.id), harvest_object_id=str(harvest_object.id), guid=str(harvest_object.guid)) - package_dict['extras'].append({'key': key, 'value': value}) + #add check + if key != "contact-email": + package_dict['extras'].append({'key': key, 'value': value}) else: log.debug('Skipping existing extra %s', key) # Add GeoNetwork specific extras gn_localized_url = harvest_object.job.source.url.strip('/') - if gn_localized_url[-3:] == 'csw': - gn_localized_url = gn_localized_url[:-3] + # this code is not needed(?) + #if gn_localized_url[-3:] == 'csw': + # gn_localized_url = gn_localized_url[:-3] log.debug('GN localized URL %s', gn_localized_url) #log.debug('Package dict is %r ', package_dict['extras']) - package_dict['extras'].append({'key': 'gn_view_metadata_url', 'value': gn_localized_url + '/metadata.show?uuid=' + harvest_object.guid}) - package_dict['extras'].append({'key': 'gn_localized_url', 'value': gn_localized_url}) + # not in d4science but in geonetwork 2.10 + #package_dict['extras'].append({'key': 'gn_view_metadata_url', 'value': gn_localized_url + '/metadata.show?uuid=' + harvest_object.guid}) + #package_dict['extras'].append({'key': 'gn_localized_url', 'value': gn_localized_url}) + + #d4science code + package_dict = self.add_geonetwork_informations_to_package(gn_localized_url, package_dict, harvest_object, self.harvest_session) + package_dict = self.add_item_url_to_package(gn_localized_url, package_dict) + authors = self.infer_authors(iso_values) + + # Adding Authors + if authors: + package_dict['author'] = ",".join(authors) + log.debug(f'Author/s are: {package_dict["author"]}') + + # Adding Logged User as Maintainer + user_logged = super(GeoNetworkHarvester, self)._get_user_name() + if user_logged: + package_dict['maintainer'] = user_logged + main_mail = config.get('ckan.admin_email') + if main_mail: + package_dict['maintainer_email'] = main_mail + + # Adding Point of Contacts + point_of_contacts = self.infer_point_of_contacts(iso_values) + for idx, item in enumerate(point_of_contacts): + log.debug(f"point_of_contact: {item}") + + poc_value = '' + if 'name' in item: + poc_value = item['name'] + + if 'email' in item: + if len(poc_value) > 1: + poc_value += f', {item["email"]}' + else: + poc_value += item['email'] + + poc_key = 'point_of_contact' + + if len(point_of_contacts) > 1: + poc_key += f' {idx + 1}' + + if poc_value and len(poc_value) > 1: + package_dict['extras'].append({'key': poc_key, 'value': poc_value}) + log.debug(f'Added point of contact: {poc_key} {poc_value}') + + package_dict = self.add_topic_category_to_group(iso_values, package_dict, user_logged) + package_dict = self.add_systemtype_to_package(package_dict) + package_dict = self.add_license_to_package(package_dict) + fc_descriptions = self.infer_featurecataloguedescription_iso110(iso_values) + + # Adding List of Feature Catalogue Description as CKAN RESOURCES + package_dict = self.add_as_resources(package_dict, fc_descriptions) + + # If the previous list is not empty, It adds the label 'Feature Catalogue Resource' as CKAN TAG + if len(fc_descriptions) > 0: + package_dict = self.add_as_tag(package_dict, ['Feature Catalog Resource']) + + package_dict = self.validate_tags(package_dict) # Add other elements from ISO metadata time_extents = self.infer_timeinstants(iso_values) @@ -193,14 +804,15 @@ class GeoNetworkHarvester(CSWHarvester, SingletonPlugin): def fix_resource_type(self, resources): for resource in resources: - if 'OGC:WMS' in resource['resource_locator_protocol']: - resource['format'] = 'wms' + if 'resource_locator_protocol' in resource: #added check + if 'OGC:WMS' in resource['resource_locator_protocol']: + resource['format'] = 'wms' - if config.get('ckanext.spatial.harvest.validate_wms', False): - # Check if the service is a view service - url = resource['url'] - test_url = url.split('?')[0] if '?' in url else url - if self._is_wms(test_url): - resource['verified'] = True - resource['verified_date'] = datetime.now().isoformat() + if config.get('ckanext.spatial.harvest.validate_wms', False): + # Check if the service is a view service + url = resource['url'] + test_url = url.split('?')[0] if '?' in url else url + if self._is_wms(test_url): + resource['verified'] = True + resource['verified_date'] = datetime.now().isoformat()