Compare commits
2 Commits
main
...
d4science_
Author | SHA1 | Date |
---|---|---|
Alessio Fabrizio | 925db5d411 | |
Alessio Fabrizio | 5c483d60a6 |
|
@ -13,14 +13,31 @@ from ckan.plugins.core import SingletonPlugin
|
|||
from ckanext.spatial.lib.csw_client import CswService
|
||||
from ckanext.spatial.harvesters.csw import CSWHarvester
|
||||
|
||||
from ckanext.spatial.harvested_metadata import ISODocument
|
||||
from ckanext.spatial.harvested_metadata import ISOElement
|
||||
#from ckanext.spatial.harvested_metadata import ISODocument
|
||||
#from ckanext.spatial.harvested_metadata import ISOElement
|
||||
#in spatial branch 2.10-support ISOObjects are located Here
|
||||
from ckanext.spatial.model.harvested_metadata import ISODocument
|
||||
from ckanext.spatial.model.harvested_metadata import ISOElement
|
||||
|
||||
from ckan.logic import ValidationError, NotFound, get_action
|
||||
|
||||
from ckan.common import config
|
||||
from datetime import datetime
|
||||
|
||||
#add dependencies updated to python3
|
||||
import xml.etree.ElementTree as ElementTree
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
from ckanext.spatial.model.harvested_metadata import ISOResourceLocator
|
||||
import re
|
||||
from urllib.parse import urlparse
|
||||
|
||||
GIS_GEONETWORK_METADATA_SOURCE = 'gis_geonetwork:GN_Metadata_Source'
|
||||
|
||||
GIS_GEONETWORK_METADATA_SHOW = 'gis_geonetwork:GN_Metadata_Show'
|
||||
|
||||
GIS_GEONETWORK_GN_URL = 'gis_geonetwork:GN_URL'
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
# Extend the ISODocument definitions by adding some more useful elements
|
||||
|
@ -35,6 +52,72 @@ ISODocument.elements.append(
|
|||
multiplicity="*",
|
||||
))
|
||||
|
||||
# D4S MAPPING FOR AUTHOR IS THE ORGANIZATION NAME
|
||||
log.info('GeoNetwork harvester: extending ISODocument with organisation-name-responsible-party')
|
||||
ISODocument.elements.append(
|
||||
ISOElement(
|
||||
name="organisation-name-responsible-party",
|
||||
search_paths=[
|
||||
"gmd:contact/gmd:CI_ResponsibleParty/gmd:organisationName/gco:CharacterString/text()",
|
||||
],
|
||||
multiplicity="*",
|
||||
))
|
||||
|
||||
# D4S MAPPING FOR MAINTAINERS
|
||||
log.info('GeoNetwork harvester: extending ISODocument with identification-info-responsible-party')
|
||||
ISODocument.elements.append(
|
||||
ISOElement(
|
||||
name="identification-info-responsible-party",
|
||||
search_paths=[
|
||||
"gmd:identificationInfo/gmd:MD_DataIdentification/gmd:pointOfContact/gmd:CI_ResponsibleParty",
|
||||
],
|
||||
multiplicity="*",
|
||||
elements=[
|
||||
ISOElement(
|
||||
name="individualName",
|
||||
search_paths=[
|
||||
"gmd:individualName/gco:CharacterString/text()",
|
||||
],
|
||||
multiplicity="*",
|
||||
),
|
||||
ISOElement(
|
||||
name="email",
|
||||
search_paths=[
|
||||
"gmd:contactInfo/gmd:CI_Contact/gmd:address/gmd:CI_Address/gmd:electronicMailAddress/gco:CharacterString/text()",
|
||||
],
|
||||
multiplicity="*",
|
||||
),
|
||||
ISOResourceLocator(
|
||||
name="online-resource",
|
||||
search_paths=[
|
||||
"gmd:contactInfo/gmd:CI_Contact/gmd:onlineResource/gmd:CI_OnlineResource",
|
||||
],
|
||||
multiplicity="*",
|
||||
),
|
||||
|
||||
]
|
||||
))
|
||||
|
||||
# D4S MAPPING FOR MD_FeatureCatalogueDescription
|
||||
ISODocument.elements.append(
|
||||
ISOElement(
|
||||
name="featurecataloguedescription-responsible-party",
|
||||
search_paths=[
|
||||
"gmd:contentInfo/gmd:MD_FeatureCatalogueDescription/gmd:featureCatalogueCitation/gmd:CI_Citation/gmd:citedResponsibleParty/gmd:CI_ResponsibleParty",
|
||||
],
|
||||
multiplicity="*",
|
||||
elements=[
|
||||
ISOResourceLocator(
|
||||
name="online-resource",
|
||||
search_paths=[
|
||||
"gmd:contactInfo/gmd:CI_Contact/gmd:onlineResource/gmd:CI_OnlineResource",
|
||||
],
|
||||
multiplicity="*",
|
||||
),
|
||||
|
||||
]
|
||||
))
|
||||
|
||||
# Some old GN instances still uses the old GML URL
|
||||
# We'll add more xpath for addressing this issue
|
||||
log.info('GeoNetwork harvester: adding old GML URI')
|
||||
|
@ -52,9 +135,78 @@ for element in ISODocument.elements:
|
|||
element.search_paths.append(newpath)
|
||||
log.info("Added old URI for gml to %s", element.name)
|
||||
|
||||
#custom classes
|
||||
class Harvest_Session(dict):
|
||||
def __missing__(self, key):
|
||||
return None
|
||||
|
||||
class D4S_HTTP_Request_Util:
|
||||
# Returns body response as string if the request is ok, None otherwise
|
||||
@staticmethod
|
||||
def get_response_body(uri, data=None, headers={}):
|
||||
log.debug("Performing request to uri: %s" % uri)
|
||||
log.debug("headers are: %s" % headers)
|
||||
log.debug("data passed as body are: %s" % data)
|
||||
req = urllib.request.Request(uri, data=data, headers=headers)
|
||||
try:
|
||||
resp = urllib.request.urlopen(req, timeout=2)
|
||||
except urllib.error.HTTPError as e:
|
||||
log.error("Error on contacting URI: %s" % uri)
|
||||
log.error("HTTPError: %d" % e.code)
|
||||
return None
|
||||
except urllib.error.URLError as e:
|
||||
log.error("URLError - Input URI: %s is not valid!!" % uri)
|
||||
return None
|
||||
else:
|
||||
body = resp.read()
|
||||
return body.decode("utf-8") # Decodifica per ottenere una stringa (UTF-8)
|
||||
|
||||
# Returns True if status of the http request is Successful 2xx, False otherwise
|
||||
@staticmethod
|
||||
def check_url(uri):
|
||||
try:
|
||||
status_code = urllib.request.urlopen(uri).getcode()
|
||||
if 200 <= status_code <= 206:
|
||||
return True
|
||||
|
||||
log.error("Error on contacting URI: %s" % uri)
|
||||
return False
|
||||
except urllib.error.HTTPError as e:
|
||||
log.error("Error on contacting URI: %s" % uri)
|
||||
log.error("HTTPError: %d" % e.code)
|
||||
return False
|
||||
except Exception as e:
|
||||
log.error("check_url fail on: %s " % uri)
|
||||
return False
|
||||
|
||||
class D4S_IS_Resource_Discovery:
|
||||
gcubeTokenParam = "gcube-token"
|
||||
|
||||
def __init__(self, urlICProxy, resourceID, gcubeToken):
|
||||
self.urlICProxy = urlICProxy
|
||||
self.resourceID = resourceID
|
||||
self.gcubeToken = gcubeToken
|
||||
|
||||
def performRequest(self):
|
||||
uri = ""
|
||||
try:
|
||||
uri = f"{self.urlICProxy}/{self.resourceID}?{D4S_IS_Resource_Discovery.gcubeTokenParam}={self.gcubeToken}"
|
||||
log.debug("Contacting URL: %s" % uri)
|
||||
theResource = D4S_HTTP_Request_Util.get_response_body(uri)
|
||||
log.debug("Resource returned %s " % theResource)
|
||||
if theResource:
|
||||
return ElementTree.XML(theResource)
|
||||
else:
|
||||
return None
|
||||
except Exception as e:
|
||||
log.error("Error on performing the request from uri: {}".format(uri))
|
||||
log.debug("Returning None")
|
||||
return None
|
||||
|
||||
class GeoNetworkHarvester(CSWHarvester, SingletonPlugin):
|
||||
|
||||
catalogue_resolver = None
|
||||
harvest_session = Harvest_Session()
|
||||
|
||||
def info(self):
|
||||
return {
|
||||
'name': 'geonetwork',
|
||||
|
@ -63,6 +215,408 @@ class GeoNetworkHarvester(CSWHarvester, SingletonPlugin):
|
|||
'form_config_interface': 'Text'
|
||||
}
|
||||
|
||||
#custom class methods
|
||||
def add_geonetwork_informations_to_package(self, gn_localized_url, package_dict, harvest_object, harvest_session):
|
||||
namespaces = {'ows': "http://www.opengis.net/ows", "xlink": "http://www.w3.org/1999/xlink"}
|
||||
|
||||
gn_url_session_key = self._get_session_key(harvest_object, "the_geonetwork_url")
|
||||
|
||||
if harvest_session.get(gn_url_session_key):
|
||||
log.info('Harvest session for current job, the_geonetwork_url: %s, is_geonetwork_3: %s',
|
||||
harvest_session.get(gn_url_session_key),
|
||||
harvest_session.get(self._get_session_key(harvest_object, "is_geonetwork_3")))
|
||||
return self._add_geoentworks_links(harvest_session.get(gn_url_session_key), package_dict, harvest_object,
|
||||
harvest_session)
|
||||
|
||||
if gn_localized_url:
|
||||
get_capabilities_request = "request=GetCapabilities&service=CSW&acceptVersions=2.0.2&acceptFormats=application%2Fxml"
|
||||
resp = D4S_HTTP_Request_Util.get_response_body(gn_localized_url + "?" + get_capabilities_request)
|
||||
|
||||
if resp:
|
||||
try:
|
||||
# Decodifica il contenuto se necessario
|
||||
if isinstance(resp, bytes):
|
||||
resp = resp.decode("utf-8")
|
||||
|
||||
root = ElementTree.fromstring(resp)
|
||||
|
||||
geonetwork_url = None
|
||||
for provideSites in root.findall('.//ows:ProviderSite', namespaces):
|
||||
geonetwork_url = provideSites.get('{http://www.w3.org/1999/xlink}href')
|
||||
log.info("Read geonetwork_url from GetCapabilities: %s", geonetwork_url)
|
||||
if geonetwork_url:
|
||||
parsedUrl = urlparse(geonetwork_url)
|
||||
# Rimuovi le porte 80 o 8080
|
||||
if parsedUrl.port in {80, 8080}:
|
||||
geonetwork_url = f"{parsedUrl.scheme}://{parsedUrl.hostname}{parsedUrl.path}"
|
||||
log.info("Removed port 80 or 8080 from geonetwork_url: %s", geonetwork_url)
|
||||
|
||||
if geonetwork_url:
|
||||
harvest_session[gn_url_session_key] = geonetwork_url
|
||||
is_gn3_session_key = self._get_session_key(harvest_object, "is_geonetwork_3")
|
||||
harvest_session[is_gn3_session_key] = D4S_HTTP_Request_Util.check_url(
|
||||
geonetwork_url + "/srv/eng/catalog.search#/home")
|
||||
log.info('Updated Harvest session for current job %s', harvest_session)
|
||||
package_dict = self._add_geoentworks_links(geonetwork_url, package_dict, harvest_object, harvest_session)
|
||||
|
||||
except Exception as err:
|
||||
log.warning("No Geonetwork informations added!! Error on parsing the get capabilities: %s", err)
|
||||
return package_dict
|
||||
|
||||
return package_dict
|
||||
|
||||
def _add_geoentworks_links(self, geonetwork_url, package_dict, harvest_object, harvest_session):
|
||||
|
||||
geonetwork_service_url = geonetwork_url + '/srv/en'
|
||||
|
||||
if harvest_session.get(self._get_session_key(harvest_object, "is_geonetwork_3")):
|
||||
|
||||
#TODO check if package_dict['extras'] is corret/works
|
||||
log.debug('GN3 Service URL is %s', geonetwork_service_url)
|
||||
package_dict['extras'].append({'key': GIS_GEONETWORK_GN_URL, 'value': geonetwork_url})
|
||||
package_dict['extras'].append({'key': GIS_GEONETWORK_METADATA_SHOW,
|
||||
'value': geonetwork_service_url + '/catalog.search#/metadata/' + harvest_object.guid})
|
||||
package_dict['extras'].append({'key': GIS_GEONETWORK_METADATA_SOURCE,
|
||||
'value': geonetwork_service_url + '/xml.metadata.get?uuid=' + harvest_object.guid})
|
||||
else:
|
||||
|
||||
log.debug('GN2 Service URL is %s', geonetwork_service_url)
|
||||
package_dict['extras'].append({'key': GIS_GEONETWORK_GN_URL, 'value': geonetwork_url})
|
||||
package_dict['extras'].append({'key': GIS_GEONETWORK_METADATA_SHOW,
|
||||
'value': geonetwork_service_url + '/metadata.show?uuid=' + harvest_object.guid})
|
||||
package_dict['extras'].append({'key': GIS_GEONETWORK_METADATA_SOURCE,
|
||||
'value': geonetwork_service_url + '/xml.metadata.get?uuid=' + harvest_object.guid})
|
||||
|
||||
return package_dict
|
||||
|
||||
def _get_session_key(self, harvest_object, key):
|
||||
'''Returns a session key for the harvest job running'''
|
||||
return harvest_object.job.id + "_key_" + key
|
||||
|
||||
# Added by Francesco Mangiacrapa
|
||||
def add_item_url_to_package(self, gn_localized_url, package_dict):
|
||||
log.debug("add_item_url_to_package")
|
||||
|
||||
set_item_url = config.get('ckan.d4science_theme.harvesting_set_item_url')
|
||||
|
||||
if set_item_url is not None and not set_item_url:
|
||||
log.info("set item url configuration is False, returning")
|
||||
return package_dict
|
||||
|
||||
if not self.catalogue_resolver:
|
||||
|
||||
urlICProxy = config.get('ckan.d4science_theme.ic_proxy_url') # "https://registry.d4science.org/icproxy/gcube/service"
|
||||
resourceID = config.get('ckan.d4science_theme.ckandatacatalogue_resourceid') # CkanDataCatalogue GR
|
||||
if not resourceID:
|
||||
resourceID = "56ec4876-999f-4afc-a9e3-efbda5f5c8bc" # DEV CkanDataCatalogue GR
|
||||
resourceID = "2e067010-3d97-11e8-bcb7-f39deee66c72" # PROD CkanDataCatalogue GR
|
||||
log.warn("'ckan.d4science_theme.ckandatacatalogue_resourceid' not found into configuration. Hard-cabling CkanDataCatalogue GR resourceID: " + resourceID)
|
||||
|
||||
gcubeToken = config.get('ckan.d4science_theme.application_token') # The gCube Token
|
||||
|
||||
log.debug("urlICProxy: " + urlICProxy)
|
||||
log.debug("resourceID: " + resourceID)
|
||||
log.debug("gcubeToken: " + gcubeToken)
|
||||
|
||||
disc = D4S_IS_Resource_Discovery(urlICProxy, resourceID, gcubeToken)
|
||||
response = disc.performRequest()
|
||||
# print ElementTree.tostring(response)
|
||||
|
||||
if response:
|
||||
the_end_points = response.xpath(
|
||||
'/Resource/Profile/AccessPoint/Properties/Property/Name[text()="URL_RESOLVER"]/../Value')
|
||||
|
||||
if the_end_points:
|
||||
self.catalogue_resolver = the_end_points[0].text
|
||||
|
||||
log.info("Found catalogue_resolver: %s" % self.catalogue_resolver)
|
||||
|
||||
if self.catalogue_resolver:
|
||||
|
||||
url_split = gn_localized_url.split("/")
|
||||
|
||||
the_scope = None
|
||||
# ONLY IF THE URL IS A GEONETWORK RESOLVER LINK, I TRY TO GET THE (GCUBE) SCOPE
|
||||
if len(url_split) >= 4 and not 'csw' in url_split:
|
||||
the_scope = url_split[4]
|
||||
the_scope = the_scope.replace("|", "/")
|
||||
the_scope = the_scope.replace("%7C", "/")
|
||||
the_scope = the_scope if the_scope.startswith("/") else "/" + the_scope
|
||||
|
||||
log.debug("Found the scope: %s" % the_scope)
|
||||
|
||||
the_item_url = None
|
||||
|
||||
if the_scope:
|
||||
query = '{"gcube_scope" : "' + the_scope + '","entity_context" : "dataset", "entity_name" : "'+ package_dict["name"] + '"}'
|
||||
headers = {"Content-Type": "application/json"}
|
||||
the_item_url = D4S_HTTP_Request_Util.get_response_body(self.catalogue_resolver, query, headers)
|
||||
|
||||
try:
|
||||
# Python 3 change: use urllib.request.urlopen instead of urllib2.urlopen, check extras
|
||||
with urllib.request.urlopen(the_item_url) as response:
|
||||
if response.getcode() == 200:
|
||||
package_dict['extras'].append({'key': 'Item URL', 'value': the_item_url})
|
||||
log.info("Added Item URL: %s" % the_item_url)
|
||||
except Exception as inst:
|
||||
log.warning(u"No Item URL added!! Error on performing the request from uri: {}".format(the_item_url))
|
||||
return package_dict
|
||||
|
||||
return package_dict
|
||||
|
||||
def add_license_to_package(self, package_dict):
|
||||
# Added by Francesco Mangiacrapa
|
||||
k_license_id = 'license_id'
|
||||
k_license_title = 'license_title'
|
||||
k_harvest_license_id = 'ckan.d4science_theme.harvest_license_id'
|
||||
k_harvest_license_title = 'ckan.d4science_theme.harvest_license_title'
|
||||
|
||||
try:
|
||||
v_license_id = self.source_config.get(k_license_id)
|
||||
log.debug(f'Read {k_license_id} as {v_license_id} from input configuration parameter')
|
||||
if v_license_id is None:
|
||||
v_license_id = config.get(k_harvest_license_id)
|
||||
log.debug(f'Read {k_harvest_license_id} as {v_license_id} from production.ini')
|
||||
if v_license_id is None:
|
||||
v_license_id = 'CC-BY-SA-4.0'
|
||||
log.debug(f'Using default {k_license_id} {v_license_id}')
|
||||
|
||||
v_license_title = self.source_config.get(k_license_title)
|
||||
log.debug(f'Read {k_license_title} as {v_license_title} from input configuration parameter')
|
||||
if v_license_title is None:
|
||||
v_license_title = config.get(k_harvest_license_title)
|
||||
log.debug(f'Read {k_harvest_license_title} as {v_license_title} from production.ini')
|
||||
if v_license_title is None:
|
||||
if v_license_id != 'CC-BY-SA-4.0':
|
||||
v_license_title = 'Unknown License Title'
|
||||
else:
|
||||
v_license_title = 'Creative Commons Attribution Share-Alike 4.0'
|
||||
|
||||
log.debug(f'Using default {k_license_title} {v_license_title}')
|
||||
|
||||
licence_v = None
|
||||
for e in package_dict['extras']:
|
||||
if ('key' in e) and (e['key'] == 'licence'):
|
||||
licence_v = e['value']
|
||||
log.debug(f'licence value in extra field has value {licence_v}')
|
||||
break
|
||||
|
||||
if licence_v is None or not licence_v or len(licence_v) == 0 or licence_v == '[]':
|
||||
package_dict[k_license_id] = v_license_id
|
||||
package_dict[k_license_title] = v_license_title
|
||||
log.debug(f'license_id has value: {package_dict[k_license_id]}')
|
||||
log.debug(f'license_title has value: {package_dict[k_license_title]}')
|
||||
|
||||
except Exception as inst:
|
||||
log.warning(f"Impossible to add the license_id: {str(inst)}")
|
||||
return package_dict
|
||||
|
||||
return package_dict
|
||||
|
||||
def add_systemtype_to_package(self, package_dict):
|
||||
|
||||
# ADDED BY FRANCESCO MANGIACRAPRA
|
||||
# Task #8726
|
||||
k = self.source_config.get('systemtypefield')
|
||||
v = self.source_config.get('systemtypevalue')
|
||||
|
||||
log.debug(f'Read systemtypefield {k} from input configuration parameter')
|
||||
if k is None:
|
||||
k = config.get('ckan.d4science_theme.systemtypefield')
|
||||
log.debug(f'Read ckan.d4science_theme.systemtypefield {k} from production.ini')
|
||||
if k is None:
|
||||
k = 'system:type'
|
||||
log.debug(f'Using default systemtypefield {k}')
|
||||
|
||||
log.debug(f'Read systemtypevalue {v} from input configuration parameter')
|
||||
if v is None:
|
||||
# Task #9281
|
||||
for e in package_dict['extras']:
|
||||
''' Setting system:type based on "gmd:hierarchyLevel/gmd:MD_ScopeCode" the codelist codes.
|
||||
# ISO 19139 XML defines an extended list of scopes (GMX codelist).
|
||||
Sticking with this codelist, "process" is not included but the generic "service" should be used IMHO.
|
||||
There is also "software" that may be applicable. Here are the values from this codelist: attribute ,attributeType ,collectionHardware ,collectionSession ,
|
||||
dataset ,series ,nonGeographicDataset ,dimensionGroup ,feature ,featureType ,propertyType ,fieldSession ,software ,service ,model ,tile ,initiative ,stereomate ,
|
||||
sensor ,platformSeries ,sensorSeries ,productionSeries ,transferAggregate ,otherAggregate'''
|
||||
|
||||
if ('key' in e) and (e['key'] == 'resource-type'):
|
||||
v = e['value']
|
||||
log.debug(f'resource-type value in extra field has value {v}')
|
||||
break
|
||||
|
||||
if v is None or not v or len(v) == 0:
|
||||
v = config.get('ckan.d4science_theme.harvestingsystemtypevalue')
|
||||
log.debug(f'Read ckan.d4science_theme.harvestingsystemtypevalue {v} from production.ini')
|
||||
if v is None or not v:
|
||||
v = 'Dataset'
|
||||
log.debug(f'Using hard-coded value for system:type {v}')
|
||||
|
||||
v = v.capitalize()
|
||||
log.debug(f"adding key: {k} capitalized value: {v}")
|
||||
package_dict['extras'].append({'key': k, 'value': v})
|
||||
return package_dict
|
||||
|
||||
# Added by Francesco Mangiacrapa
|
||||
def infer_authors(self, values):
|
||||
authors = []
|
||||
|
||||
for author in values["organisation-name-responsible-party"]:
|
||||
if author not in authors:
|
||||
authors.append(author)
|
||||
|
||||
log.info(f"{len(authors)} AUTHOR found as OrganisationName in ResponsibleParty")
|
||||
return authors
|
||||
def infer_point_of_contacts(self, values):
|
||||
point_of_contacts = []
|
||||
for responsible_party in values["identification-info-responsible-party"]:
|
||||
point_of_contact = {}
|
||||
log.debug(f"responsible_party: {responsible_party}")
|
||||
|
||||
if 'individualName' in responsible_party:
|
||||
if responsible_party['individualName']:
|
||||
point_of_contact['name'] = ', '.join(responsible_party['individualName'])
|
||||
|
||||
if 'email' in responsible_party:
|
||||
if responsible_party['email']:
|
||||
point_of_contact['email'] = ', '.join(responsible_party['email'])
|
||||
|
||||
log.debug(f"Adding point_of_contact: {point_of_contact}")
|
||||
point_of_contacts.append(point_of_contact)
|
||||
|
||||
log.info(f"{len(point_of_contacts)} Point of Contact/s found as Contact-Info in ResponsibleParty")
|
||||
return point_of_contacts
|
||||
|
||||
# Added by Francesco Mangiacrapa
|
||||
def infer_featurecataloguedescription_iso110(self, values):
|
||||
feature_catalogue_descrs = []
|
||||
for feature_catalogue in values["featurecataloguedescription-responsible-party"]:
|
||||
feature_catalogue_field = {}
|
||||
log.debug(f"Feature catalogue description: {feature_catalogue}")
|
||||
|
||||
if 'online-resource' in feature_catalogue:
|
||||
feature_resource_list = feature_catalogue['online-resource']
|
||||
log.debug(f"Feature resource list is: {feature_resource_list}")
|
||||
|
||||
for feature_resource in feature_resource_list:
|
||||
log.debug(f"Feature resource is: {feature_resource}")
|
||||
if 'url' in feature_resource:
|
||||
feature_catalogue_field['url'] = feature_resource['url']
|
||||
if 'description' in feature_resource:
|
||||
feature_catalogue_field['description'] = feature_resource['description']
|
||||
if 'name' in feature_resource:
|
||||
feature_catalogue_field['name'] = feature_resource['name']
|
||||
else:
|
||||
feature_catalogue_field['name'] = "Unnamed Resource"
|
||||
|
||||
log.debug(f"Adding feature catalogue description: {feature_catalogue_field}")
|
||||
feature_catalogue_descrs.append(feature_catalogue_field)
|
||||
|
||||
log.info(f"{len(feature_catalogue_descrs)} Feature Catalogue description added")
|
||||
return feature_catalogue_descrs
|
||||
|
||||
def add_topic_category_to_group(self, iso_values, package_dict, the_user='harvest'):
|
||||
add_iso_categories = self.source_config.get('add_topic_category_to_group')
|
||||
if add_iso_categories == "False":
|
||||
return package_dict
|
||||
|
||||
if 'topic-category' in iso_values:
|
||||
log.debug('iso_values contains topic category')
|
||||
topic_category_values = ""
|
||||
if len(iso_values['topic-category']) > 0:
|
||||
topic_category_values = ", ".join(iso_values['topic-category'])
|
||||
|
||||
package_dict['extras'].append({'key': 'topic_category', 'value': topic_category_values})
|
||||
log.debug(f'topic_category: {topic_category_values}')
|
||||
|
||||
# Adding to groups if the group as iso-category already exists
|
||||
for iso_cat in iso_values['topic-category']:
|
||||
log.debug(f"Adding topic_category: {iso_cat} as group")
|
||||
isocat_name_lower = iso_cat.lower()
|
||||
if 'groups' in package_dict:
|
||||
group_found = False
|
||||
for group in package_dict['groups']:
|
||||
if group['name'].lower() == isocat_name_lower:
|
||||
group_found = True
|
||||
break
|
||||
|
||||
if not group_found:
|
||||
context = {'model': model, 'session': Session, 'user': the_user}
|
||||
try:
|
||||
data_dict = {'id': isocat_name_lower}
|
||||
get_action('group_show')(context, data_dict)
|
||||
package_dict['groups'].append({'name': isocat_name_lower})
|
||||
log.info(f'Dataset added to group: {isocat_name_lower}')
|
||||
except NotFound as e:
|
||||
log.warning(f'Group {iso_cat} from category {iso_cat} is not available: {e}')
|
||||
else:
|
||||
log.debug(f'creating groups into dictionary and assigning to group: {iso_cat}')
|
||||
package_dict['groups'] = [{'name': isocat_name_lower}]
|
||||
else:
|
||||
log.info('iso_values does not contain topic category')
|
||||
|
||||
if 'groups' in package_dict:
|
||||
log.info(f'groups are: {package_dict["groups"]}')
|
||||
|
||||
return package_dict
|
||||
|
||||
def add_as_resources(self, package_dict, resource_list):
|
||||
if not resource_list:
|
||||
return package_dict
|
||||
|
||||
for resource in resource_list:
|
||||
the_resource = {}
|
||||
if 'name' in resource:
|
||||
the_resource['name'] = resource['name']
|
||||
if 'description' in resource:
|
||||
the_resource['description'] = resource['description']
|
||||
if 'url' in resource:
|
||||
the_resource['url'] = resource['url']
|
||||
if 'format' in resource:
|
||||
the_resource['format'] = resource['format']
|
||||
|
||||
package_dict['resources'].append(the_resource)
|
||||
log.info(f'Added resource {the_resource} to resources')
|
||||
|
||||
return package_dict
|
||||
|
||||
# Added by Francesco Mangiacrapa
|
||||
def add_as_tag(self, package_dict, tag_list):
|
||||
if not tag_list:
|
||||
return package_dict
|
||||
|
||||
for tag_name in tag_list:
|
||||
package_dict['tags'].append({'name': tag_name})
|
||||
log.info(f'Added tag {tag_name}')
|
||||
|
||||
return package_dict
|
||||
|
||||
# Added by Francesco Mangiacrapa
|
||||
def validate_tags(self, package_dict):
|
||||
new_tags = []
|
||||
# log.info("New Tags working")
|
||||
if package_dict['tags']:
|
||||
# log.info("Tags are: %s"%package_dict['tags'])
|
||||
check_duplicated_keys = {}
|
||||
for tag in package_dict['tags']:
|
||||
try:
|
||||
if tag['name'] and tag['name'] not in check_duplicated_keys:
|
||||
check_duplicated_keys[tag['name']] = 1
|
||||
purged_tag_name = re.sub('[^A-Za-z0-9 ._-]+', ' ', tag['name'])
|
||||
purged_tag_name = purged_tag_name.strip()
|
||||
new_tags.append({'name': purged_tag_name})
|
||||
# log.info(f"New Tag: {purged_tag_name}")
|
||||
else:
|
||||
log.info(f"Skipping None or Duplicated Tag: {tag['name']}")
|
||||
|
||||
except Exception as e: # Use 'as' for exception handling in Python 3
|
||||
log.error(f"error validating tag: {e}")
|
||||
|
||||
if len(new_tags) > 0:
|
||||
package_dict['tags'] = new_tags
|
||||
# log.info(f"New Tags are: {package_dict['tags']}")
|
||||
|
||||
return package_dict
|
||||
|
||||
def get_package_dict(self, iso_values, harvest_object):
|
||||
|
||||
package_dict = super(GeoNetworkHarvester, self).get_package_dict(iso_values, harvest_object)
|
||||
|
@ -92,21 +646,81 @@ class GeoNetworkHarvester(CSWHarvester, SingletonPlugin):
|
|||
harvest_job_id=str(harvest_object.job.id),
|
||||
harvest_object_id=str(harvest_object.id),
|
||||
guid=str(harvest_object.guid))
|
||||
package_dict['extras'].append({'key': key, 'value': value})
|
||||
#add check
|
||||
if key != "contact-email":
|
||||
package_dict['extras'].append({'key': key, 'value': value})
|
||||
else:
|
||||
log.debug('Skipping existing extra %s', key)
|
||||
|
||||
# Add GeoNetwork specific extras
|
||||
gn_localized_url = harvest_object.job.source.url.strip('/')
|
||||
|
||||
if gn_localized_url[-3:] == 'csw':
|
||||
gn_localized_url = gn_localized_url[:-3]
|
||||
# this code is not needed(?)
|
||||
#if gn_localized_url[-3:] == 'csw':
|
||||
# gn_localized_url = gn_localized_url[:-3]
|
||||
|
||||
log.debug('GN localized URL %s', gn_localized_url)
|
||||
#log.debug('Package dict is %r ', package_dict['extras'])
|
||||
|
||||
package_dict['extras'].append({'key': 'gn_view_metadata_url', 'value': gn_localized_url + '/metadata.show?uuid=' + harvest_object.guid})
|
||||
package_dict['extras'].append({'key': 'gn_localized_url', 'value': gn_localized_url})
|
||||
# not in d4science but in geonetwork 2.10
|
||||
#package_dict['extras'].append({'key': 'gn_view_metadata_url', 'value': gn_localized_url + '/metadata.show?uuid=' + harvest_object.guid})
|
||||
#package_dict['extras'].append({'key': 'gn_localized_url', 'value': gn_localized_url})
|
||||
|
||||
#d4science code
|
||||
package_dict = self.add_geonetwork_informations_to_package(gn_localized_url, package_dict, harvest_object, self.harvest_session)
|
||||
package_dict = self.add_item_url_to_package(gn_localized_url, package_dict)
|
||||
authors = self.infer_authors(iso_values)
|
||||
|
||||
# Adding Authors
|
||||
if authors:
|
||||
package_dict['author'] = ",".join(authors)
|
||||
log.debug(f'Author/s are: {package_dict["author"]}')
|
||||
|
||||
# Adding Logged User as Maintainer
|
||||
user_logged = super(GeoNetworkHarvester, self)._get_user_name()
|
||||
if user_logged:
|
||||
package_dict['maintainer'] = user_logged
|
||||
main_mail = config.get('ckan.admin_email')
|
||||
if main_mail:
|
||||
package_dict['maintainer_email'] = main_mail
|
||||
|
||||
# Adding Point of Contacts
|
||||
point_of_contacts = self.infer_point_of_contacts(iso_values)
|
||||
for idx, item in enumerate(point_of_contacts):
|
||||
log.debug(f"point_of_contact: {item}")
|
||||
|
||||
poc_value = ''
|
||||
if 'name' in item:
|
||||
poc_value = item['name']
|
||||
|
||||
if 'email' in item:
|
||||
if len(poc_value) > 1:
|
||||
poc_value += f', {item["email"]}'
|
||||
else:
|
||||
poc_value += item['email']
|
||||
|
||||
poc_key = 'point_of_contact'
|
||||
|
||||
if len(point_of_contacts) > 1:
|
||||
poc_key += f' {idx + 1}'
|
||||
|
||||
if poc_value and len(poc_value) > 1:
|
||||
package_dict['extras'].append({'key': poc_key, 'value': poc_value})
|
||||
log.debug(f'Added point of contact: {poc_key} {poc_value}')
|
||||
|
||||
package_dict = self.add_topic_category_to_group(iso_values, package_dict, user_logged)
|
||||
package_dict = self.add_systemtype_to_package(package_dict)
|
||||
package_dict = self.add_license_to_package(package_dict)
|
||||
fc_descriptions = self.infer_featurecataloguedescription_iso110(iso_values)
|
||||
|
||||
# Adding List of Feature Catalogue Description as CKAN RESOURCES
|
||||
package_dict = self.add_as_resources(package_dict, fc_descriptions)
|
||||
|
||||
# If the previous list is not empty, It adds the label 'Feature Catalogue Resource' as CKAN TAG
|
||||
if len(fc_descriptions) > 0:
|
||||
package_dict = self.add_as_tag(package_dict, ['Feature Catalog Resource'])
|
||||
|
||||
package_dict = self.validate_tags(package_dict)
|
||||
|
||||
# Add other elements from ISO metadata
|
||||
time_extents = self.infer_timeinstants(iso_values)
|
||||
|
@ -193,14 +807,15 @@ class GeoNetworkHarvester(CSWHarvester, SingletonPlugin):
|
|||
|
||||
def fix_resource_type(self, resources):
|
||||
for resource in resources:
|
||||
if 'OGC:WMS' in resource['resource_locator_protocol']:
|
||||
resource['format'] = 'wms'
|
||||
if 'resource_locator_protocol' in resource: #added check
|
||||
if 'OGC:WMS' in resource['resource_locator_protocol']:
|
||||
resource['format'] = 'wms'
|
||||
|
||||
if config.get('ckanext.spatial.harvest.validate_wms', False):
|
||||
# Check if the service is a view service
|
||||
url = resource['url']
|
||||
test_url = url.split('?')[0] if '?' in url else url
|
||||
if self._is_wms(test_url):
|
||||
resource['verified'] = True
|
||||
resource['verified_date'] = datetime.now().isoformat()
|
||||
if config.get('ckanext.spatial.harvest.validate_wms', False):
|
||||
# Check if the service is a view service
|
||||
url = resource['url']
|
||||
test_url = url.split('?')[0] if '?' in url else url
|
||||
if self._is_wms(test_url):
|
||||
resource['verified'] = True
|
||||
resource['verified_date'] = datetime.now().isoformat()
|
||||
|
||||
|
|
Loading…
Reference in New Issue