diff --git a/DataMinerAlgorithmsCrawler/CHANGELOG.md b/DataMinerAlgorithmsCrawler/CHANGELOG.md index 761cb85..a17b1a1 100644 --- a/DataMinerAlgorithmsCrawler/CHANGELOG.md +++ b/DataMinerAlgorithmsCrawler/CHANGELOG.md @@ -2,6 +2,9 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm # Changelog for org.gcube.data-publishing.gFeed.DataMinerAlgorithmsCrawler +## [v1.0.5] - 2020-12-15 +- Fixes [#22344](https://support.d4science.org/issues/22344#change-128440) : publish DM algorithms as Methods + ## [v1.0.4] - 2020-12-15 - Dependency management - Naming Convention \ No newline at end of file diff --git a/DataMinerAlgorithmsCrawler/pom.xml b/DataMinerAlgorithmsCrawler/pom.xml index c6784ed..f6b2a9a 100644 --- a/DataMinerAlgorithmsCrawler/pom.xml +++ b/DataMinerAlgorithmsCrawler/pom.xml @@ -8,7 +8,7 @@ DataMinerAlgorithmsCrawler DataMinerAlgorithmsCrawler - 1.0.4 + 1.0.5 Plugin for gCat-Feeder for DataMiner Algorithms publishing @@ -56,6 +56,18 @@ jersey-media-json-jackson + + org.gcube.data-catalogue + gcat-client + [2.0.0,3.0.0) + + + + org.gcube.portlets.user + uri-resolver-manager + [1.5.0-SNAPSHOT,2.0.0) + + org.projectlombok lombok diff --git a/DataMinerAlgorithmsCrawler/src/main/java/org/gcube/data/publishing/gCatFeeder/collectors/dm/Constants.java b/DataMinerAlgorithmsCrawler/src/main/java/org/gcube/data/publishing/gCatFeeder/collectors/dm/Constants.java index 4c5be96..00ca13e 100644 --- a/DataMinerAlgorithmsCrawler/src/main/java/org/gcube/data/publishing/gCatFeeder/collectors/dm/Constants.java +++ b/DataMinerAlgorithmsCrawler/src/main/java/org/gcube/data/publishing/gCatFeeder/collectors/dm/Constants.java @@ -11,7 +11,8 @@ public class Constants { // ENVIRONMENT EXPECTED PARAMETERS public static final String GUI_BASE_URL=ENVIRONMENT_PROPERTIES_BASE+"GUI_BASE_URL"; - + public static final String GATEWAY_NAME=ENVIRONMENT_PROPERTIES_BASE+"GATEWAY_NAME"; + public static final String DEFAULT_AUTHOR=ENVIRONMENT_PROPERTIES_BASE+"DEFAULT_AUTHOR"; public static final String DEFAULT_MAINTAINER=ENVIRONMENT_PROPERTIES_BASE+"DEFAULT_MAINTAINER"; diff --git a/DataMinerAlgorithmsCrawler/src/main/java/org/gcube/data/publishing/gCatFeeder/collectors/dm/DMAlgorithmsInfoCollector.java b/DataMinerAlgorithmsCrawler/src/main/java/org/gcube/data/publishing/gCatFeeder/collectors/dm/DMAlgorithmsInfoCollector.java index fd4894b..1003173 100644 --- a/DataMinerAlgorithmsCrawler/src/main/java/org/gcube/data/publishing/gCatFeeder/collectors/dm/DMAlgorithmsInfoCollector.java +++ b/DataMinerAlgorithmsCrawler/src/main/java/org/gcube/data/publishing/gCatFeeder/collectors/dm/DMAlgorithmsInfoCollector.java @@ -1,14 +1,18 @@ package org.gcube.data.publishing.gCatFeeder.collectors.dm; -import java.util.Collections; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; +import java.time.LocalDateTime; +import java.time.Month; +import java.time.ZoneId; +import java.time.chrono.IsoChronology; +import java.time.format.DateTimeFormatter; +import java.time.format.DateTimeFormatterBuilder; +import java.time.format.ResolverStyle; +import java.time.temporal.ChronoField; +import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.gcube.common.resources.gcore.ServiceEndpoint; + import org.gcube.data.analysis.dataminermanagercl.server.DataMinerService; import org.gcube.data.analysis.dataminermanagercl.server.dmservice.SClient; import org.gcube.data.analysis.dataminermanagercl.shared.process.Operator; @@ -23,16 +27,49 @@ import org.gcube.data.publishing.gCatfeeder.collectors.DataCollector; import org.gcube.data.publishing.gCatfeeder.collectors.model.faults.CollectorFault; import lombok.extern.slf4j.Slf4j; +import org.gcube.portlets.user.uriresolvermanager.exception.IllegalArgumentException; +import org.gcube.portlets.user.uriresolvermanager.exception.UriResolverMapException; + @Slf4j public class DMAlgorithmsInfoCollector implements DataCollector { - private static final Pattern p = Pattern.compile("\\{Published by (.*)\\((.*(\\.)?.*\\)).*\\}$"); + /* + * i.e. "Basic statistic max min average {Published by Giancarlo Panichi (giancarlo.panichi) on 2018/07/20 10:24 GMT}" + */ + + private static final Pattern descriptionPattern = Pattern.compile("\\{Published by (.*)\\((.*(\\.)?.*\\)) on (.*)\\}$"); + + + static final DateTimeFormatter versionDateParser= DateTimeFormatter.ofPattern("uuuu/MM/dd HH:mm 'GMT'[Z]") + .withResolverStyle(ResolverStyle.STRICT) + .withLocale(Locale.getDefault()) + .withZone(ZoneId.systemDefault()); + +/* static final DateTimeFormatter versionDateParser=new DateTimeFormatterBuilder() + .parseCaseInsensitive().parseLenient() + .appendValue(ChronoField.YEAR,4) + .appendLiteral('/') + .appendValue(ChronoField.MONTH_OF_YEAR,2) + .appendLiteral('/') + .appendValue(ChronoField.DAY_OF_MONTH,2) + .appendLiteral(' ') + .appendValue(ChronoField.HOUR_OF_DAY,2) + .appendLiteral(':') + .appendValue(ChronoField.MINUTE_OF_HOUR,2) + .appendLiteral(' ') + .appendOffset("+HHMM", "GMT") + .toFormatter().withChronology(IsoChronology.INSTANCE) + .withResolverStyle(ResolverStyle.SMART); */ + + - private Map env=null; - - + + + + + public void setEnvironmentConfiguration(EnvironmentConfiguration envConfig) { if(envConfig!=null) { log.debug("Current Environment Configuration is : "+envConfig.getCurrentConfiguration()); @@ -74,7 +111,7 @@ public class DMAlgorithmsInfoCollector implements DataCollector transform(Collection collectedData) { HashSet toReturn=new HashSet<>(); - boolean specifyProfile=true; + boolean specifyProfile=false; //never for(InternalAlgorithmDescriptor desc:collectedData) { toReturn.add(desc.asCKANModel(specifyProfile)); specifyProfile=false; //only once diff --git a/DataMinerAlgorithmsCrawler/src/main/java/org/gcube/data/publishing/gCatFeeder/collectors/dm/URIResolver.java b/DataMinerAlgorithmsCrawler/src/main/java/org/gcube/data/publishing/gCatFeeder/collectors/dm/URIResolver.java new file mode 100644 index 0000000..b570793 --- /dev/null +++ b/DataMinerAlgorithmsCrawler/src/main/java/org/gcube/data/publishing/gCatFeeder/collectors/dm/URIResolver.java @@ -0,0 +1,123 @@ +package org.gcube.data.publishing.gCatFeeder.collectors.dm; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.HashMap; +import java.util.Map; + + +import org.gcube.com.fasterxml.jackson.databind.ObjectMapper; +import org.gcube.data.publishing.gCatFeeder.utils.ContextUtils; +import org.gcube.portlets.user.uriresolvermanager.UriResolverManager; +import org.gcube.portlets.user.uriresolvermanager.exception.IllegalArgumentException; +import org.gcube.portlets.user.uriresolvermanager.exception.UriResolverMapException; + +public class URIResolver { + + + private static final String CTLG_RESOLVER_NAME="CTLG"; + + // private static final String CATALOGUE_CONTEXT = "gcube_scope"; + private static final String ENTITY_CONTEXT = "entity_context"; + private static final String ENTITY_NAME = "entity_name"; + + private static final String DATASET = "dataset"; + + protected ObjectMapper mapper; + + public URIResolver() { + this.mapper = new ObjectMapper(); + } + + protected StringBuilder getStringBuilder(InputStream inputStream) throws IOException { + StringBuilder result = new StringBuilder(); + try(BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream))) { + String line; + while((line = reader.readLine()) != null) { + result.append(line); + } + } + return result; + } + + + /* + { + "gcube_scope" : "/gcube/devsec/devVRE", + "entity_context" : "dataset", + "entity_name" : "sarda-sarda" + } + */ + public String getCatalogueItemURL(String name) throws UriResolverMapException, IllegalArgumentException { + UriResolverManager resolver = new UriResolverManager(CTLG_RESOLVER_NAME); + Map params = new HashMap(); + params.put(ENTITY_NAME, name); + params.put(ENTITY_CONTEXT,DATASET); + params.put("gcube_scope", ContextUtils.getCurrentScope()); + String shortLink = resolver.getLink(params, false); + return shortLink; + } + + /*public String getCatalogueItemURL(String name) { + try { + String uriResolverURL = getConfigurationFromIS(); + + ObjectNode requestContent = mapper.createObjectNode(); + requestContent.put(CATALOGUE_CONTEXT, ContextUtils.getCurrentScope()); + + requestContent.put(ENTITY_TYPE, DATASET); + requestContent.put(ENTITY_NAME, name); + + GXHTTPStringRequest gxhttpStringRequest = GXHTTPStringRequest.newRequest(uriResolverURL); + gxhttpStringRequest.from(CTLG_RESOLVER_NAME); + gxhttpStringRequest.header(HttpHeaders.CONTENT_TYPE, MediaType.APPLICATION_JSON); + gxhttpStringRequest.isExternalCall(true); + String body = mapper.writeValueAsString(requestContent); + HttpURLConnection httpURLConnection = gxhttpStringRequest.post(body); + + if(httpURLConnection.getResponseCode() != 200) { + try{ + IOUtils.copy(httpURLConnection.getInputStream(),System.out); + }catch(Throwable t){ + System.out.println("No message"); + } + throw new InternalServerErrorException("Unable to get Item URL via URI Resolver. Code : "+ + httpURLConnection.getResponseCode()); + } + + String url = getStringBuilder(httpURLConnection.getInputStream()).toString(); + + return url; + } catch(WebApplicationException e) { + throw e; + } catch(Exception e) { + throw new WebApplicationException(e); + } + }*/ + + + /* + static String getConfigurationFromIS() { + try { + + StringBuilder toReturn=new StringBuilder(); + ServiceEndpoint serviceEndpoint = ISUtils.queryForServiceEndpointsByName("Service","HTTP-URI-Resolver").get(0); + serviceEndpoint.profile().accessPoints(). + forEach(a->{ + if(a.name().equals(CTLG_RESOLVER_NAME)) + toReturn.append(a.address()); + }); + if(toReturn.length()>0) return toReturn.toString(); + else throw new Exception("Access point for "+CTLG_RESOLVER_NAME+" not found "); + } catch(WebApplicationException e) { + throw e; + } catch(Exception e) { + throw new InternalServerErrorException("Error while getting configuration on IS", e); + } + + } + + */ +} diff --git a/DataMinerAlgorithmsCrawler/src/main/java/org/gcube/data/publishing/gCatFeeder/collectors/dm/model/InternalAlgorithmDescriptor.java b/DataMinerAlgorithmsCrawler/src/main/java/org/gcube/data/publishing/gCatFeeder/collectors/dm/model/InternalAlgorithmDescriptor.java index f7cfd6f..188ddcd 100644 --- a/DataMinerAlgorithmsCrawler/src/main/java/org/gcube/data/publishing/gCatFeeder/collectors/dm/model/InternalAlgorithmDescriptor.java +++ b/DataMinerAlgorithmsCrawler/src/main/java/org/gcube/data/publishing/gCatFeeder/collectors/dm/model/InternalAlgorithmDescriptor.java @@ -1,5 +1,6 @@ package org.gcube.data.publishing.gCatFeeder.collectors.dm.model; +import java.time.LocalDateTime; import java.util.ArrayList; import java.util.HashSet; import java.util.List; @@ -25,7 +26,10 @@ public class InternalAlgorithmDescriptor implements CustomData { private UserIdentity author; private UserIdentity maintainer; - + + private String version; + private LocalDateTime creationDate; + //category info private String categoryBriefDescription; @@ -38,8 +42,10 @@ public class InternalAlgorithmDescriptor implements CustomData { private Set outputParameters=new HashSet<>(); private String guiLink; + private String gatewayName; private String wpsLink; - + + private Boolean privateFlag; diff --git a/DataMinerAlgorithmsCrawler/src/main/java/org/gcube/data/publishing/gCatFeeder/collectors/dm/model/ckan/GCatModel.java b/DataMinerAlgorithmsCrawler/src/main/java/org/gcube/data/publishing/gCatFeeder/collectors/dm/model/ckan/GCatModel.java index b81cf2a..f306d2a 100644 --- a/DataMinerAlgorithmsCrawler/src/main/java/org/gcube/data/publishing/gCatFeeder/collectors/dm/model/ckan/GCatModel.java +++ b/DataMinerAlgorithmsCrawler/src/main/java/org/gcube/data/publishing/gCatFeeder/collectors/dm/model/ckan/GCatModel.java @@ -2,9 +2,14 @@ package org.gcube.data.publishing.gCatFeeder.collectors.dm.model.ckan; import java.io.ByteArrayOutputStream; import java.net.URL; +import java.text.SimpleDateFormat; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; +import java.time.format.DateTimeFormatterBuilder; import java.util.ArrayList; import org.gcube.data.publishing.gCatFeeder.collectors.dm.DataMinerCollectorProperties; +import org.gcube.data.publishing.gCatFeeder.collectors.dm.URIResolver; import org.gcube.data.publishing.gCatFeeder.collectors.dm.model.InternalAlgorithmDescriptor; import org.gcube.data.publishing.gCatFeeder.collectors.dm.model.Parameter; import org.gcube.data.publishing.gCatFeeder.collectors.dm.model.UserIdentity; @@ -25,6 +30,9 @@ import lombok.extern.slf4j.Slf4j; @Slf4j public class GCatModel implements CatalogueFormatData { + private static final DateTimeFormatter dateFormatter=DateTimeFormatter.ISO_DATE; + + private static ObjectMapper mapper=new ObjectMapper(); private static String profileXML=null; @@ -35,13 +43,29 @@ public class GCatModel implements CatalogueFormatData { profileXML=toSet; } + + + public static String getItemUrl(String name) { + try{ + URIResolver uriResolver = new URIResolver(); + log.debug("Evaluating item url for {}",name); + String catalogueItemURL = uriResolver.getCatalogueItemURL(name); + log.info("Item URL for {} is {}",name,catalogueItemURL); + return catalogueItemURL; + }catch(Exception e){ + log.warn("Unable to evaluate item URL for "+name,e); + return null; + } + } + public GCatModel(InternalAlgorithmDescriptor desc) { item=new CkanItem(); - // item.setAuthor(desc.getAuthor()); + item.setAuthor(desc.getAuthor().getFirstName()+" "+desc.getAuthor().getLastName()); item.setTitle(desc.getName()+" in "+ContextUtils.getCurrentScopeName()); item.setLicense_id("CC-BY-NC-SA-4.0"); - // item.setMaintainer(desc.getMaintainer()); + item.setMaintainer(desc.getMaintainer().getFirstName()+" "+desc.getMaintainer().getLastName()); + item.setName(item.getTitle().toLowerCase().toLowerCase().replaceAll(" ", "_")); for(String tag:desc.getTags()) { item.getTags().add(new CkanItem.Tag(fixTag(tag))); @@ -54,21 +78,53 @@ public class GCatModel implements CatalogueFormatData { item.setPrivateFlag(desc.getPrivateFlag()); for(Parameter param: desc.getInputParameters()) - item.getExtras().add(new CKanExtraField(profileID+":Input Parameter", - String.format("%1$s [%2$s] %3$s : %4$s", + item.getExtras().add(new CKanExtraField("TechnicalDetails:input", + String.format("%1$s [%2$s] %3$s : %4$s", param.getName(),param.getType(), ((param.getValue()!=null&&!param.getValue().isEmpty())?"default : "+param.getValue():""), param.getDescription()))); for(Parameter param: desc.getOutputParameters()) - item.getExtras().add(new CKanExtraField(profileID+":Output Parameter", - String.format("%1$s [%2$s] %3$s : %4$s", + item.getExtras().add(new CKanExtraField("TechnicalDetails:output", + String.format("%1$s [%2$s] %3$s : %4$s", param.getName(),param.getType(), ((param.getValue()!=null&&!param.getValue().isEmpty())?"default : "+param.getValue():""), param.getDescription()))); + item.getExtras().add(new CKanExtraField("Identity:Creator", desc.getAuthor().asStringValue())); + + + item.getExtras().add(new CKanExtraField("Identity:CreationDate", dateFormatter.format(desc.getCreationDate()))); + + item.getExtras().add(new CKanExtraField("AccessMode:UsageMode", "as-a-Service via Blue-Cloud Infrastructure")); + item.getExtras().add(new CKanExtraField("AccessMode:Availability", "On-Line")); + + item.getExtras().add(new CKanExtraField("TechnicalDetails:Hosting Environment", "gCube SmartGear")); + item.getExtras().add(new CKanExtraField("TechnicalDetails:Dependencies on Other SW", "gCube DataMiner")); + + item.getExtras().add(new CKanExtraField("Rights:Field/Scope of use", "Any use")); + item.getExtras().add(new CKanExtraField("Rights:Basic rights", "Communication")); + item.getExtras().add(new CKanExtraField("Rights:Basic rights", "Making available to the public")); + item.getExtras().add(new CKanExtraField("Rights:Basic rights", "Distribution")); + + item.getExtras().add(new CKanExtraField("Attribution:Attribution requirements", + String.format("Cite as: %1$s (%2$d): %3$s. %4$s. %5$s. %6$s. %7$s. Retrieved from the %8$s (%9$s) operated by D4Science.org www.d4science.org", + desc.getAuthor().asStringValue(), + LocalDateTime.now().getYear(), + desc.getName(), + desc.getVersion(), + "Blue-Cloud", + "DataMiner Process", + getItemUrl(item.getName()), + desc.getGatewayName(), + desc.getGuiLink()))); + + + + + //Algorithm Description // item.getExtras().add(new CKanExtraField(profileID+":Process Description", desc.getDescription())); @@ -77,8 +133,8 @@ public class GCatModel implements CatalogueFormatData { // Algorithm Users - item.getExtras().add(new CKanExtraField(profileID+":Process Author",desc.getAuthor().asStringValue())); - item.getExtras().add(new CKanExtraField(profileID+":Process Maintainer",desc.getAuthor().asStringValue())); + //item.getExtras().add(new CKanExtraField(profileID+":Process Author",desc.getAuthor().asStringValue())); + //item.getExtras().add(new CKanExtraField(profileID+":Process Maintainer",desc.getAuthor().asStringValue())); if(desc.getGuiLink()!=null) { try { @@ -112,23 +168,6 @@ public class GCatModel implements CatalogueFormatData { } - /** - * (Common) Title - * (Common) Description - * (Common) Tags: free list of keywords - * (Common) License - * (Common) Visibility: either public or private - * (Common) Version - * (Common) Author: the creator of metadata. Only one occurrence is supported; - * (Common) Maintainer: - * (Method specific) Creator: the author of the method (with email and ORCID). Repeatable field; - * (Method specific) Creation date: when the method has been released; - * (Method specific) Input: Repeatable field; - * (Method specific) Output: Repeatable field; - * (Method specific) RelatedPaper: a reference to an associated paper; - * (Method specific) Restrictions On Use: an optional text - * (Method specific) Attribution requirements: the text to use to acknowledge method usage; - */ static final String fixTag(String toFix) { diff --git a/DataMinerAlgorithmsCrawler/src/main/resources/org/gcube/data/publishing/gCatFeeder/collectors/dm/config.properties b/DataMinerAlgorithmsCrawler/src/main/resources/org/gcube/data/publishing/gCatFeeder/collectors/dm/config.properties index 6213182..6a6f419 100644 --- a/DataMinerAlgorithmsCrawler/src/main/resources/org/gcube/data/publishing/gCatFeeder/collectors/dm/config.properties +++ b/DataMinerAlgorithmsCrawler/src/main/resources/org/gcube/data/publishing/gCatFeeder/collectors/dm/config.properties @@ -1,3 +1,3 @@ -default_author=Gianpaolo Coro +default_author=Gianpaolo.Coro gui_param_name=OperatorId -ckan_resource_type=DataMiner Process \ No newline at end of file +ckan_resource_type=Method \ No newline at end of file diff --git a/DataMinerAlgorithmsCrawler/src/main/resources/org/gcube/data/publishing/gCatFeeder/collectors/dm/profile.xml b/DataMinerAlgorithmsCrawler/src/main/resources/org/gcube/data/publishing/gCatFeeder/collectors/dm/profile.xml index d32a8e3..417b979 100644 --- a/DataMinerAlgorithmsCrawler/src/main/resources/org/gcube/data/publishing/gCatFeeder/collectors/dm/profile.xml +++ b/DataMinerAlgorithmsCrawler/src/main/resources/org/gcube/data/publishing/gCatFeeder/collectors/dm/profile.xml @@ -1,41 +1,196 @@ - - - Input Parameter - false - String - Input parameter expected for the execution of the process - - - Output Parameter - false - String - Output parameter expected from the execution of the process - - - - Process Author + + + External Identifier false String * - - The name of the author, with email and ORCID. The format should - be: family, given[, email][, ORCID]. Example: Smith, John, - js@acme.org, orcid.org/0000-0002-1825-0097 + This applies only to methods that have been already published. Insert here a DOI, an handle, and any other Identifier assigned when publishing the dataset alsewhere. + + + Creator + true + String + * + The name of the creator, with email and ORCID. The format should be: family, given[, email][, ORCID]. Example: Smith, John, js@acme.org, orcid.org/0000-0002-1825-0097 - ^[a-zA-ZÀ-ÿ .'-]+, [a-zA-ZÀ-ÿ .'-]+[,]*([a-zA-Z0-9_!#$%’*+=?`{|}~^.-]+@[a-zA-Z0-9À-ÿ.-]+)?[,]*(orcid.org\/0000-000(1-[5-9]|2-[0-9]|3-[0-4])\d\d\d-\d\d\d[\dX])?$ + ^[a-zA-ZÀ-ÿ .'-]+, [a-zA-ZÀ-ÿ .'-]+[, ]*([a-zA-Z0-9_!#$%’*+=?`{|}~^.-]+@[a-zA-Z0-9À-ÿ.-]+)?[, ]*(orcid.org\/0000-000(1-[5-9]|2-[0-9]|3-[0-4])\d\d\d-\d\d\d[\dX])?$ - - Process Maintainer + + CreationDate + true + Time + 1 + The date of creation of the dataset (different from the date of registration of the dataset automatically added by the system). Use ISO 8601 Date Format: YYYY-MM-DD[ HH:MM] Ex. 1998-11-10 or 2015-05-29 11:55 + + ^(\d{4}\-(0?[1-9]|1[012])\-(0?[1-9]|[12][0-9]|3[01]))+([ ]+(\d{2}(:?\d{2})?)?)?$ + + + + Owner false String * - - The name of the author, with email and ORCID. The format should - be: family, given[, email][, ORCID]. Example: Smith, John, - js@acme.org, orcid.org/0000-0002-1825-0097 + The owner of the method (free text). + + + RelatedPaper + false + String + 1 + Insert a complete reference to an associated work. + + + Semantic Coverage + false + String + * + Tagging e.g. Functional analysis, Environment analysis and visualisation, ... + + + UsageMode + true + String + 1 + How the method is expected to be exploited. + + Download + as-a-Application via Blue-Cloud Infrastructure + as-a-Application via third-party Infrastructure + as-a-Service via Blue-Cloud Infrastructure + as-a-Service via third-party Infrastructure + + + + Availability + false + String + 1 + How the availability to the resource is offered. On-line means that the method can be executed through the Virtual Laboratory Gateway. On-site means that the method can only be executed by visiting the hosting provider. + + On-Line + On-Site + + + + Hosting Environment + false + String + 1 + E.g. Linux, Microsoft Azure, Amazon EC2 + + + ProgrammingLanguage + false + String + 1 + The primary language used to implement the method. + + + Dependencies on Other SW + false + String + * + E.g. this sowftware requires an Hadoop cluster to run + + + input + false + String + * + inputParametersType. See WPS specifications + + + output + false + String + * + outputType. See WPS specifications + + + IP/Copyrights + false + String + 1 + Whether software is covered by any rights: copyright, related rights, know how, proprietary, etc. + + + Field/Scope of use + true + String + 1 + + Any use + Non-commercial only + Research only + Non-commercial research only + Private use + Use for developing and providing a service + + + + Basic rights + true + String + 1 + + Temporary download of a single copy only + Download + Copying + Distribution + Modification + Communication + Making available to the public + Other rights + + + + Restrictions on use + false + String + 1 + Any restrictions on how where the dataset may be used + + + Sublicense rights + false + String + 1 + Any restrictions on how where the dataset may be used + + No + Yes + + + + Requirement of non-disclosure (confidentiality mark) + false + String + 1 + Requirement of non-disclosure (confidentiality mark). Whether the dataset bears confidentiality mark/may be used and shared subject to the obligation of non-disclosure + + + Embargo period + false + Time_Interval + 1 + Period of time during which the resource may be used. Use ISO 8601 Date Format: YYYY-MM-DD[ HH:MM] Ex. 2016-07-31 or 2015-05-10 12:00 - ^[a-zA-ZÀ-ÿ .'-]+, [a-zA-ZÀ-ÿ .'-]+[,]*([a-zA-Z0-9_!#$%’*+=?`{|}~^.-]+@[a-zA-Z0-9À-ÿ.-]+)?[,]*(orcid.org\/0000-000(1-[5-9]|2-[0-9]|3-[0-4])\d\d\d-\d\d\d[\dX])?$ + ^(\d{4}\-(0?[1-9]|1[012])\-(0?[1-9]|[12][0-9]|3[01]))+([ ]+(\d{2}(:?\d{2})?)?)?$ + + Attribution requirements + false + String + 1 + The text to acknowledge the resource when using it + + + Distribution requirements + false + String + 1 + The text to acknowledge the resource when distributing it + \ No newline at end of file diff --git a/DataMinerAlgorithmsCrawler/src/test/java/org/gcube/data/publishing/gCatFeeder/collectors/dm/ItemUrlTests.java b/DataMinerAlgorithmsCrawler/src/test/java/org/gcube/data/publishing/gCatFeeder/collectors/dm/ItemUrlTests.java new file mode 100644 index 0000000..4028495 --- /dev/null +++ b/DataMinerAlgorithmsCrawler/src/test/java/org/gcube/data/publishing/gCatFeeder/collectors/dm/ItemUrlTests.java @@ -0,0 +1,19 @@ +package org.gcube.data.publishing.gCatFeeder.collectors.dm; + +import org.gcube.common.authorization.library.provider.SecurityTokenProvider; +import org.gcube.data.publishing.gCatFeeder.tests.BaseCollectorTest; +import org.gcube.data.publishing.gCatFeeder.tests.TokenSetter; +import org.gcube.portlets.user.uriresolvermanager.exception.IllegalArgumentException; +import org.gcube.portlets.user.uriresolvermanager.exception.UriResolverMapException; +import org.junit.Test; + +public class ItemUrlTests extends BaseCollectorTest { + + @Test + public void getItemURL() throws UriResolverMapException, IllegalArgumentException { + //SecurityTokenProvider.instance.set("***REMOVED***"); + TokenSetter.set("/gcube/devsec/devVRE"); + String name = "fake"; + System.out.println(new URIResolver().getCatalogueItemURL(name)); + } +} diff --git a/DataMinerAlgorithmsCrawler/src/test/java/org/gcube/data/publishing/gCatFeeder/collectors/dm/TranslationTest.java b/DataMinerAlgorithmsCrawler/src/test/java/org/gcube/data/publishing/gCatFeeder/collectors/dm/TranslationTest.java index bccf28d..0baba31 100644 --- a/DataMinerAlgorithmsCrawler/src/test/java/org/gcube/data/publishing/gCatFeeder/collectors/dm/TranslationTest.java +++ b/DataMinerAlgorithmsCrawler/src/test/java/org/gcube/data/publishing/gCatFeeder/collectors/dm/TranslationTest.java @@ -1,13 +1,18 @@ package org.gcube.data.publishing.gCatFeeder.collectors.dm; import java.io.IOException; +import java.time.LocalDateTime; +import java.time.ZonedDateTime; import java.util.Collection; import java.util.Set; +import org.gcube.common.authorization.library.provider.SecurityTokenProvider; import org.gcube.data.publishing.gCatFeeder.collectors.dm.model.InternalAlgorithmDescriptor; +import org.gcube.data.publishing.gCatFeeder.collectors.dm.model.ckan.GCatModel; import org.gcube.data.publishing.gCatFeeder.model.CatalogueFormatData; import org.gcube.data.publishing.gCatFeeder.model.InternalConversionException; import org.gcube.data.publishing.gCatFeeder.tests.BaseCollectorTest; +import org.gcube.data.publishing.gCatFeeder.tests.TokenSetter; import org.gcube.data.publishing.gCatfeeder.collectors.CollectorPlugin; import org.gcube.data.publishing.gCatfeeder.collectors.DataCollector; import org.gcube.data.publishing.gCatfeeder.collectors.DataTransformer; @@ -20,6 +25,8 @@ import com.fasterxml.jackson.core.JsonGenerationException; import com.fasterxml.jackson.databind.JsonMappingException; import com.fasterxml.jackson.databind.ObjectMapper; +import static org.junit.Assert.*; + public class TranslationTest extends BaseCollectorTest{ @@ -38,12 +45,38 @@ public class TranslationTest extends BaseCollectorTest{ System.out.println(mapper.writeValueAsString(obj)+"\n"); for(String destinationcatalogue : (Set)plugin.getSupportedCatalogueTypes()) { - DataTransformer transformer=plugin.getTransformerByCatalogueType(destinationcatalogue); + DataTransformer transformer= + plugin.getTransformerByCatalogueType(destinationcatalogue); + System.out.println("Starting Transformation "+transformer.toString()); + for(Object data:transformer.transform(collected)) System.out.println(((CatalogueFormatData)data).toCatalogueFormat()); } } - + + + @Test + public void testParseDescription(){ + String s="balablabalba"; + assertEquals("n/a",DMAlgorithmsInfoCollector.parseDescriptionForDate(s)); + assertEquals(null, DMAlgorithmsInfoCollector.parseDescriptionForUser(s)); + s="Basic statistic max min average {Published by Giancarlo Panichi (giancarlo.panichi) on 2018/07/20 10:24 GMT}"; + assertEquals("2018/07/20 10:24 GMT",DMAlgorithmsInfoCollector.parseDescriptionForDate(s)); + assertEquals("Giancarlo Panichi ",DMAlgorithmsInfoCollector.parseDescriptionForUser(s)); + + + + System.out.println(DMAlgorithmsInfoCollector.versionDateParser.format(ZonedDateTime.now())); + System.out.println(DMAlgorithmsInfoCollector.versionDateParser.parse(DMAlgorithmsInfoCollector.parseDescriptionForDate(s))); + } + + @Test + public void testEnvironment(){ + Assume.assumeTrue(isTestInfrastructureEnabled()); + //assertNotNull(DMAlgorithmsInfoCollector.getWPSBasePath()); + //assertNotNull(GCatModel.getItemUrl("fake")); + System.out.println(getEnvironmentConfiguration().getCurrentConfiguration()); + } } diff --git a/gCat-Controller/src/test/java/org/gcube/data/publishing/gCatFeeder/catalogues/gCat/Interactions.java b/gCat-Controller/src/test/java/org/gcube/data/publishing/gCatFeeder/catalogues/gCat/Interactions.java index 5055d6b..eecea6a 100644 --- a/gCat-Controller/src/test/java/org/gcube/data/publishing/gCatFeeder/catalogues/gCat/Interactions.java +++ b/gCat-Controller/src/test/java/org/gcube/data/publishing/gCatFeeder/catalogues/gCat/Interactions.java @@ -10,6 +10,7 @@ import org.gcube.data.publishing.gCatFeeder.model.CatalogueFormatData; import org.gcube.data.publishing.gCatFeeder.model.CatalogueInstanceDescriptor; import org.gcube.data.publishing.gCatFeeder.model.InternalConversionException; import org.gcube.data.publishing.gCatFeeder.tests.BaseCataloguePluginTest; +import org.gcube.data.publishing.gCatFeeder.tests.TokenSetter; import org.gcube.data.publishing.gCataFeeder.catalogues.gCat.GCatPlugin; import org.junit.Assert; import org.junit.Assume; diff --git a/gCat-Controller/src/test/resources/org/gcube/data/publishing/gCatFeeder/catalogues/gCat/full-algorithm.json b/gCat-Controller/src/test/resources/org/gcube/data/publishing/gCatFeeder/catalogues/gCat/full-algorithm.json index 85bf340..0d6b645 100644 --- a/gCat-Controller/src/test/resources/org/gcube/data/publishing/gCatFeeder/catalogues/gCat/full-algorithm.json +++ b/gCat-Controller/src/test/resources/org/gcube/data/publishing/gCatFeeder/catalogues/gCat/full-algorithm.json @@ -1,57 +1,128 @@ { - "profileID": "DataMiner Process", - "profile": "\r\n\t\r\n\t\tInput Parameter<\/fieldName>\r\n\t\tfalse<\/mandatory>\r\n\t\tString<\/dataType>\r\n\t\tInput parameter expected for the execution of the process<\/note>\r\n\t<\/metadatafield>\t\r\n\t\r\n\t\tOutput Parameter<\/fieldName>\r\n\t\tfalse<\/mandatory>\r\n\t\tString<\/dataType>\r\n\t\tOutput parameter expected from the execution of the process\r\n\t\t<\/note>\r\n\t<\/metadatafield>\r\n\t\r\n\t\tProcess Author<\/fieldName>\r\n\t\tfalse<\/mandatory>\r\n\t\tString<\/dataType>\r\n\t\t*<\/maxOccurs>\r\n\t\t\r\n\t\tThe name of the author, with email and ORCID. The format should\r\n\t\t\tbe: family, given[, email][, ORCID]. Example: Smith, John,\r\n\t\t\tjs@acme.org, orcid.org\/0000-0002-1825-0097<\/note>\r\n\t\t\r\n\t\t\t^[a-zA-Z\u00C0-\u00FF .'-]+, [a-zA-Z\u00C0-\u00FF .'-]+[,\r\n\t\t\t\t]*([a-zA-Z0-9_!#$%\u2019*+=?`{|}~^.-]+@[a-zA-Z0-9\u00C0-\u00FF.-]+)?[,\r\n\t\t\t\t]*(orcid.org\\\/0000-000(1-[5-9]|2-[0-9]|3-[0-4])\\d\\d\\d-\\d\\d\\d[\\dX])?$<\/regularExpression>\r\n\t\t<\/validator>\r\n\t<\/metadatafield>\r\n\t\r\n\t\tProcess Maintainer<\/fieldName>\r\n\t\tfalse<\/mandatory>\r\n\t\tString<\/dataType>\r\n\t\t*<\/maxOccurs>\r\n\t\t\r\n\t\tThe name of the author, with email and ORCID. The format should\r\n\t\t\tbe: family, given[, email][, ORCID]. Example: Smith, John,\r\n\t\t\tjs@acme.org, orcid.org\/0000-0002-1825-0097<\/note>\r\n\t\t\r\n\t\t\t^[a-zA-Z\u00C0-\u00FF .'-]+, [a-zA-Z\u00C0-\u00FF .'-]+[,\r\n\t\t\t\t]*([a-zA-Z0-9_!#$%\u2019*+=?`{|}~^.-]+@[a-zA-Z0-9\u00C0-\u00FF.-]+)?[,\r\n\t\t\t\t]*(orcid.org\\\/0000-000(1-[5-9]|2-[0-9]|3-[0-4])\\d\\d\\d-\\d\\d\\d[\\dX])?$<\/regularExpression>\r\n\t\t<\/validator>\r\n\t<\/metadatafield>\r\n<\/metadataformat>", - "item": - { - "name": "timeextraction_in_prevre", - "title": "Timeextraction in preVRE", - "version": null, - "license_id": "CC-BY-NC-SA-4.0", - "author": null, - "maintainer": null, - "tags": [ - { - "name": "Geo Processing" - }, - { - "name": "preVRE" - }, - { - "name": "WPS" - }, - { - "name": "Analytics" - }], + "item": { + "author": "Gianpaolo Coro", "extras": [ - { - "key": "system:type", - "value": "DataMiner Process" - }, - { - "key": "DataMiner Process:Process Description", - "value": "An algorithm to extract a time series of values associated to a geospatial features repository (e.g. NETCDF, ASC, GeoTiff files etc. ). The algorithm analyses the time series and automatically searches for hidden periodicities. It produces one chart of the time series, one table containing the time series values and possibly the spectrogram." - }, - { - "key": "DataMiner Process:Process Author", - "value": "Sinibaldi,Fabio" - }, - { - "key": "DataMiner Process:Process Maintainer", - "value": "Sinibaldi,Fabio" - }], - "private": true + { + "key": "system:type", + "value": "Method" + }, + { + "key": "TechnicalDetails:input", + "value": "FeaturesColumnNames [COLUMN_LIST] : column Names for the features [a sequence of names of columns from OccurrencePointsTable separated by | ]" + }, + { + "key": "TechnicalDetails:input", + "value": "maxIterations [OBJECT] : XMeans max number of overall iterations of the clustering learning" + }, + { + "key": "TechnicalDetails:input", + "value": "minClusters [OBJECT] : minimum number of expected clusters" + }, + { + "key": "TechnicalDetails:input", + "value": "maxClusters [OBJECT] : maximum number of clusters to produce" + }, + { + "key": "TechnicalDetails:input", + "value": "OccurrencePointsClusterLabel [OBJECT] : table name of the resulting distribution" + }, + { + "key": "TechnicalDetails:input", + "value": "OccurrencePointsTable [TABULAR] default : : Occurrence Points Table. Max 4000 points [a http link to a table in UTF-8 encoding following this template: (GENERIC) A generic comma separated csv file in UTF-8 encoding]" + }, + { + "key": "TechnicalDetails:input", + "value": "min_points [OBJECT] : number of points which define an outlier set" + }, + { + "key": "TechnicalDetails:output", + "value": "non_deterministic_output [FILE] : NonDeterministicOutput" + }, + { + "key": "TechnicalDetails:output", + "value": "OutputTable [TABULAR] default : : Output cluster table [a http link to a table in UTF-8 ecoding following this template: (CLUSTER) http://goo.gl/PnKhhb]" + }, + { + "key": "Identity:Creator", + "value": "Coro, Gianpaolo" + }, + { + "key": "Identity:CreationDate", + "value": "1900-01-01" + }, + { + "key": "AccessMode:UsageMode", + "value": "as-a-Service via Blue-Cloud Infrastructure" + }, + { + "key": "AccessMode:Availability", + "value": "On-Line" + }, + { + "key": "TechnicalDetails:Hosting Environment", + "value": "gCube SmartGear" + }, + { + "key": "TechnicalDetails:Dependencies on Other SW", + "value": "gCube DataMiner" + }, + { + "key": "Rights:Field/Scope of use", + "value": "Any use" + }, + { + "key": "Rights:Basic rights", + "value": "Communication" + }, + { + "key": "Rights:Basic rights", + "value": "Making available to the public" + }, + { + "key": "Rights:Basic rights", + "value": "Distribution" + }, + { + "key": "Attribution:Attribution requirements", + "value": "Cite as: Coro, Gianpaolo (2021): Xmeans. n.d.. Blue-Cloud. DataMiner Process. https://data.dev.d4science.org/ctlg/devVRE/xmeans_in_devvre. Retrieved from the DEVGateway (https://next.d4science.org/group/devvre/dataminer?OperatorId=org.gcube.dataanalysis.wps.statisticalmanager.synchserver.mappedclasses.clusterers.XMEANS) operated by D4Science.org www.d4science.org" + } + ], + "license_id": "CC-BY-NC-SA-4.0", + "maintainer": "Gianpaolo Coro", + "name": "xmeans_in_devvre", + "notes": "A clustering algorithm for occurrence points that relies on the X-Means algorithm, i.e. an extended version of the K-Means algorithm improved by an Improve-Structure part. A Maximum of 4000 points is allowed.", + "private": false, + "tags": [ + { + "name": "Data Clustering" + }, + { + "name": "devVRE" + }, + { + "name": "WPS" + }, + { + "name": "Analytics" + } + ], + "title": "Xmeans in devVRE", + "version": null }, + "profile": "\n\t\n\t\tExternal Identifier\n\t\tfalse\n\t\tString\n\t\t*\n\t\tThis applies only to methods that have been already published. Insert here a DOI, an handle, and any other Identifier assigned when publishing the dataset alsewhere.\n\t\n\t\n\t\tCreator\n\t\ttrue\n\t\tString\n\t\t*\n\t\tThe name of the creator, with email and ORCID. The format should be: family, given[, email][, ORCID]. Example: Smith, John, js@acme.org, orcid.org/0000-0002-1825-0097\n\t\t\n\t\t\t^[a-zA-ZÀ-ÿ .'-]+, [a-zA-ZÀ-ÿ .'-]+[, ]*([a-zA-Z0-9_!#$%’*+=?`{|}~^.-]+@[a-zA-Z0-9À-ÿ.-]+)?[, ]*(orcid.org\\/0000-000(1-[5-9]|2-[0-9]|3-[0-4])\\d\\d\\d-\\d\\d\\d[\\dX])?$\n\t\t\n\t\n\t\n\t\tCreationDate\n\t\ttrue\n\t\tTime\n\t\t1\n\t\tThe date of creation of the dataset (different from the date of registration of the dataset automatically added by the system). Use ISO 8601 Date Format: YYYY-MM-DD[ HH:MM] Ex. 1998-11-10 or 2015-05-29 11:55\n\t\t\n\t\t\t^(\\d{4}\\-(0?[1-9]|1[012])\\-(0?[1-9]|[12][0-9]|3[01]))+([ ]+(\\d{2}(:?\\d{2})?)?)?$\n\t\t\n\t\n\t\n\t\tOwner\n\t\tfalse\n\t\tString\n\t\t*\n\t\tThe owner of the method (free text).\n\t\n\t\n\t\tRelatedPaper\n\t\tfalse\n\t\tString\n\t\t1\n\t\tInsert a complete reference to an associated work.\n\t\n\t\n\t\tSemantic Coverage\n\t\tfalse\n\t\tString\n\t\t*\n\t\tTagging e.g. Functional analysis, Environment analysis and visualisation, ...\n\t\n\t\n\t\tUsageMode\n\t\ttrue\n\t\tString\n\t\t1\n\t\tHow the method is expected to be exploited.\n\t\t\n\t\t\tDownload\n\t\t\tas-a-Application via Blue-Cloud Infrastructure\n\t\t\tas-a-Application via third-party Infrastructure\n\t\t\tas-a-Service via Blue-Cloud Infrastructure\n\t\t\tas-a-Service via third-party Infrastructure\n\t\t\n\t\n\t\n\t\tAvailability\n\t\tfalse\n\t\tString\n\t\t1\n\t\tHow the availability to the resource is offered. On-line means that the method can be executed through the Virtual Laboratory Gateway. On-site means that the method can only be executed by visiting the hosting provider.\n\t\t\n\t\t\tOn-Line\n\t\t\tOn-Site\n\t\t\n\t\n\t\n\t\tHosting Environment\n\t\tfalse\n\t\tString\n\t\t1\n\t\tE.g. Linux, Microsoft Azure, Amazon EC2\n\t\n\t\n\t\tProgrammingLanguage\n\t\tfalse\n\t\tString\n\t\t1\n\t\tThe primary language used to implement the method. \n\t\n\t\n\t\tDependencies on Other SW\n\t\tfalse\n\t\tString\n\t\t*\n\t\tE.g. this sowftware requires an Hadoop cluster to run\n\t\n\t\n\t\tinput\n\t\tfalse\n\t\tString\n\t\t*\n\t\tinputParametersType. See WPS specifications\n\t\n\t\n\t\toutput\n\t\tfalse\n\t\tString\n\t\t*\n\t\toutputType. See WPS specifications \n\t\n\t\n\t\tIP/Copyrights\n\t\tfalse\n\t\tString\n\t\t1\n\t\tWhether software is covered by any rights: copyright, related rights, know how, proprietary, etc.\n\t\n\t\n\t\tField/Scope of use\n\t\ttrue\n\t\tString\n\t\t1\n\t\t\n\t\t\tAny use\n\t\t\tNon-commercial only\n\t\t\tResearch only\n\t\t\tNon-commercial research only\n\t\t\tPrivate use\n\t\t\tUse for developing and providing a service\n\t\t\n\t\n\t\n\t\tBasic rights\n\t\ttrue\n\t\tString\n\t\t1\n\t\t\n\t\t\tTemporary download of a single copy only\n\t\t\tDownload\n\t\t\tCopying\n\t\t\tDistribution\n\t\t\tModification\n\t\t\tCommunication\n\t\t\tMaking available to the public\n\t\t\tOther rights\n\t\t\n\t\n\t\n\t\tRestrictions on use\n\t\tfalse\n\t\tString\n\t\t1\n\t\tAny restrictions on how where the dataset may be used \n\t\n\t\n\t\tSublicense rights\n\t\tfalse\n\t\tString\n\t\t1\n\t\tAny restrictions on how where the dataset may be used\n\t\t\n\t\t\tNo\n\t\t\tYes\n\t\t\n\t\n\t\n\t\tRequirement of non-disclosure (confidentiality mark)\n\t\tfalse\n\t\tString\n\t\t1\n\t\tRequirement of non-disclosure (confidentiality mark). Whether the dataset bears confidentiality mark/may be used and shared subject to the obligation of non-disclosure\n\t\n\t\n\t\tEmbargo period\n\t\tfalse\n\t\tTime_Interval\n\t\t1\n\t\tPeriod of time during which the resource may be used. Use ISO 8601 Date Format: YYYY-MM-DD[ HH:MM] Ex. 2016-07-31 or 2015-05-10 12:00\n\t\t\n\t\t\t^(\\d{4}\\-(0?[1-9]|1[012])\\-(0?[1-9]|[12][0-9]|3[01]))+([ ]+(\\d{2}(:?\\d{2})?)?)?$\n\t\t\n\t\n\t\n\t\tAttribution requirements\n\t\tfalse\n\t\tString\n\t\t1\n\t\tThe text to acknowledge the resource when using it\n\t\n\t\n\t\tDistribution requirements\n\t\tfalse\n\t\tString\n\t\t1\n\t\tThe text to acknowledge the resource when distributing it\n\t\n", + "profileID": "Method", "resources": [ - { - "name": "Gateway Link", - "url": "https://pre.d4science.org/group/prevre/dataminer-manager?OperatorId=org.gcube.dataanalysis.wps.statisticalmanager.synchserver.mappedclasses.transducerers.TIMEEXTRACTION", - "format": "HTTP", - "description": "Link to the GUI designed to operate with DataMiner" - }, - { - "name": "WPS Link", - "url": "https://dataminer1-pre.d4science.org/wps/WebProcessingService?Request=DescribeProcess&Version=1.0.0&Service=WPS&Identifier=org.gcube.dataanalysis.wps.statisticalmanager.synchserver.mappedclasses.transducerers.TIMEEXTRACTION", - "format": "WPS", - "description": "WPS Link to the DataMiner Process" - }] + { + "description": "Link to the GUI designed to operate with DataMiner", + "format": "https", + "name": "Gateway Link", + "url": "https://next.d4science.org/group/devvre/dataminer?OperatorId=org.gcube.dataanalysis.wps.statisticalmanager.synchserver.mappedclasses.clusterers.XMEANS" + }, + { + "description": "WPS Link to the Method", + "format": "WPS", + "name": "WPS Link", + "url": "https://dataminer1.dev.d4science.org:443/wps/WebProcessingService?Request=DescribeProcess&Version=1.0.0&Service=WPS&Identifier=org.gcube.dataanalysis.wps.statisticalmanager.synchserver.mappedclasses.clusterers.XMEANS" + } + ] } \ No newline at end of file diff --git a/gCat-Feeder/src/test/resources/logback.xml b/gCat-Feeder/src/test/resources/logback.xml index 6ea2f23..cdeec9f 100644 --- a/gCat-Feeder/src/test/resources/logback.xml +++ b/gCat-Feeder/src/test/resources/logback.xml @@ -8,7 +8,7 @@ - +