From 847046a06d5e05642c786fda31792ca51df24649 Mon Sep 17 00:00:00 2001 From: Fabio Sinibaldi Date: Wed, 24 Jun 2020 16:47:01 +0200 Subject: [PATCH] Added split operation --- CHANGELOG.md | 2 +- .../publishing/ckan2zenodo/Translator.java | 108 ++--- .../ckan2zenodo/model/parsing/Mapping.java | 4 + .../model/zenodo/DateInterval.java | 5 +- .../org/gcube/tests/TransformationTests.java | 21 +- src/test/resources/blue_cloud_dataset.json | 239 ++++++++++ src/test/resources/blue_cloud_dataset.xml | 410 ++++++++++++++++++ 7 files changed, 736 insertions(+), 53 deletions(-) create mode 100644 src/test/resources/blue_cloud_dataset.json create mode 100644 src/test/resources/blue_cloud_dataset.xml diff --git a/CHANGELOG.md b/CHANGELOG.md index 27b4181..49c69e7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,7 @@ ### Enhancements - Default Ckan2Zenodo translation to comply with mandatory Zenodo fields (https://support.d4science.org/issues/19489) - Ckan2Zenodo library to provide means to apply default translation only (https://support.d4science.org/issues/19490) - +-Support to "split" on source values This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). \ No newline at end of file diff --git a/src/main/java/org/gcube/data/publishing/ckan2zenodo/Translator.java b/src/main/java/org/gcube/data/publishing/ckan2zenodo/Translator.java index e7d1982..4fcdd02 100644 --- a/src/main/java/org/gcube/data/publishing/ckan2zenodo/Translator.java +++ b/src/main/java/org/gcube/data/publishing/ckan2zenodo/Translator.java @@ -44,14 +44,14 @@ public class Translator { @NonNull private List mappings; - + @NonNull private ResourceFilter resourceFilter; public Translator() { this(Collections.EMPTY_LIST,new ResourceFilter(Collections.EMPTY_LIST)); } - + public Translator(List mappings) { this(mappings,new ResourceFilter(Collections.EMPTY_LIST)); } @@ -68,7 +68,7 @@ public class Translator { // UPLOAD TYPE meta.setUpload_type(UploadType.other); - + // TITLE meta.setTitle(toTransform.getTitle()); // DESCRIPTION @@ -95,22 +95,22 @@ public class Translator { meta.setRelated_identifiers(relatedIdentifiers); - - + + //CONTRIBUTORS & CREATORS ArrayList contributors=new ArrayList<>(); ArrayList creators=new ArrayList<>(); - - + + String authorName=toTransform.getAuthor(); if(authorName!=null) { Contributor author=new Contributor(Contributor.Type.Producer); author.setName(authorName); contributors.add(author); - - + + creators.add(new Creator(authorName)); - + } String maintainerName=toTransform.getAuthor(); @@ -118,20 +118,20 @@ public class Translator { Contributor maintainer=new Contributor(Contributor.Type.DataCurator); maintainer.setName(maintainerName); contributors.add(maintainer); - - + + creators.add(new Creator(authorName)); } - + // D4Science as contributor Contributor d4Science=new Contributor(Contributor.Type.HostingInstitution); d4Science.setName("D4Science"); contributors.add(d4Science); - + meta.setContributors(contributors); meta.setCreators(creators); - - + + // VERSION meta.setVersion(toTransform.getVersion()); @@ -139,8 +139,8 @@ public class Translator { // DATES // PUBLICATION DATE = now meta.setPublication_date(Date.from(Instant.now())); - - + + deposition.setMetadata(meta); // profile specific mappings @@ -158,34 +158,44 @@ public class Translator { try { // extract source List sourceValues=new ArrayList<>(); - + for(Value v: mapping.getSource().getValues()) { - + + String actualValue=null; switch(v.getType()) { case constant : { - sourceValues.add(v.getValue()); + actualValue=v.getValue(); break; } case jsonPath : { for(String s: ((Collection) sourceCtx.read(v.getValue()))){ if(s!=null) { s=s.trim(); - if(!s.isEmpty())sourceValues.add(s); + if(!s.isEmpty())actualValue=s; } - + } break; } } - + + + // Adding to actual values + if(actualValue!=null) { + if(v.getSplit()!=null) + for(String toAdd:actualValue.split(v.getSplit())) + sourceValues.add(toAdd.trim()); + else sourceValues.add(actualValue); + } + if(!sourceValues.isEmpty()) break; - + } - - - - - + + + + + for(String sourceValue:sourceValues) { String resultingValue=sourceValue; @@ -214,16 +224,16 @@ public class Translator { // apply value mappings resultingValue =mapping.getValueMapping().getOrDefault(sourceValue, resultingValue); - + // check if targetPath exists List targetElementFound=targetCtx.read(mapping.getTargetPath()); if(targetElementFound==null || targetElementFound.size()==0 || targetElementFound.get(0)==null) { -// targetCtx=targetCtx.add(mapping.getTargetPath(),Collections.singletonList("nothing")); - Parsing.addElement(targetCtx, mapping.getTargetPath()); + // targetCtx=targetCtx.add(mapping.getTargetPath(),Collections.singletonList("nothing")); + targetCtx=Parsing.addElement(targetCtx, mapping.getTargetPath()); } - + // apply resulting value - + targetCtx=targetCtx.put(mapping.getTargetPath(),mapping.getTargetElement(),resultingValue); } }catch(Throwable t) { @@ -239,22 +249,22 @@ public class Translator { } } - + public List filterResources(CkanItemDescriptor source) throws TransformationException{ try { - ObjectMapper mapper=Parsing.getMapper(); - DocumentContext sourceCtx=JsonPath.using(Parsing.JSON_PATH_ALWAYS_LIST_CONFIG).parse(source.getContent()); - HashSet toReturn=new HashSet(); - for(Filter f:resourceFilter.getFilters()) { - JSONArray filtered=sourceCtx.read(f.getConditions().get(0)); - for(Object obj:filtered) { - Map map=(Map) obj; - - toReturn.add(mapper.readValue((new JSONObject(map)).toJSONString(), CkanResource.class)); - } - } - - return new ArrayList(toReturn); + ObjectMapper mapper=Parsing.getMapper(); + DocumentContext sourceCtx=JsonPath.using(Parsing.JSON_PATH_ALWAYS_LIST_CONFIG).parse(source.getContent()); + HashSet toReturn=new HashSet(); + for(Filter f:resourceFilter.getFilters()) { + JSONArray filtered=sourceCtx.read(f.getConditions().get(0)); + for(Object obj:filtered) { + Map map=(Map) obj; + + toReturn.add(mapper.readValue((new JSONObject(map)).toJSONString(), CkanResource.class)); + } + } + + return new ArrayList(toReturn); }catch(Throwable t) { log.error("Unable to filter resources. ",t); throw new TransformationException("Unable to filter "+source.getName()+" resources",t); diff --git a/src/main/java/org/gcube/data/publishing/ckan2zenodo/model/parsing/Mapping.java b/src/main/java/org/gcube/data/publishing/ckan2zenodo/model/parsing/Mapping.java index d1c5302..aa84c30 100644 --- a/src/main/java/org/gcube/data/publishing/ckan2zenodo/model/parsing/Mapping.java +++ b/src/main/java/org/gcube/data/publishing/ckan2zenodo/model/parsing/Mapping.java @@ -41,14 +41,18 @@ public class Mapping { public static enum Type{ jsonPath,constant } + + private String split; @NonNull private Type type; @NonNull private String value; } + private List values=new ArrayList<>(); } + @NonNull private Source source; @NonNull diff --git a/src/main/java/org/gcube/data/publishing/ckan2zenodo/model/zenodo/DateInterval.java b/src/main/java/org/gcube/data/publishing/ckan2zenodo/model/zenodo/DateInterval.java index 6e4564b..037a5cf 100644 --- a/src/main/java/org/gcube/data/publishing/ckan2zenodo/model/zenodo/DateInterval.java +++ b/src/main/java/org/gcube/data/publishing/ckan2zenodo/model/zenodo/DateInterval.java @@ -4,7 +4,6 @@ import java.util.Date; import lombok.Data; import lombok.NoArgsConstructor; -import lombok.NonNull; @Data @NoArgsConstructor @@ -12,8 +11,10 @@ public class DateInterval { public static enum Type{ Collected, Valid, Withdrawn } - + //TODO multiple patterns +// @JsonFormat(pattern = Commons.ISO_DATE_PATTERN) private Date start; +// @JsonFormat(pattern = Commons.ISO_DATE_PATTERN) private Date end; // @NonNull private Type type; diff --git a/src/test/java/org/gcube/tests/TransformationTests.java b/src/test/java/org/gcube/tests/TransformationTests.java index 702c871..92a75a9 100644 --- a/src/test/java/org/gcube/tests/TransformationTests.java +++ b/src/test/java/org/gcube/tests/TransformationTests.java @@ -1,5 +1,9 @@ package org.gcube.tests; +import java.util.HashMap; +import java.util.Map; +import java.util.Map.Entry; + import org.gcube.common.resources.gcore.GenericResource; import org.gcube.common.resources.gcore.Resources; import org.gcube.data.publishing.ckan2zenodo.Translator; @@ -14,9 +18,14 @@ import com.fasterxml.jackson.databind.ObjectMapper; public class TransformationTests { static ObjectMapper mapper=null; + + static Map mappings=new HashMap(); + @BeforeClass public static void init () { mapper=TestCommons.getMapper(); + + mappings.put("/blue_cloud_dataset.json", "/blue_cloud_dataset.xml"); } @@ -36,7 +45,17 @@ public class TransformationTests { GenericResource res=Resources.unmarshal(GenericResource.class, TransformationTests.class.getResourceAsStream("/ResearchObject.xml")); Translator t=new Translator(IS.readMappings(res)); // TestCommons.readAndTransform("/ResearchObject.json", t); - TestCommons.readAndTransform("/crop_parameters.json",t); +// TestCommons.readAndTransform("/crop_parameters.json",t); + + + + for(Entry entry:mappings.entrySet()) { + GenericResource resource=Resources.unmarshal(GenericResource.class, TransformationTests.class.getResourceAsStream(entry.getValue())); + Translator translator=new Translator(IS.readMappings(resource)); + TestCommons.readAndTransform(entry.getKey(), translator); + } + + } @Test diff --git a/src/test/resources/blue_cloud_dataset.json b/src/test/resources/blue_cloud_dataset.json new file mode 100644 index 0000000..769618c --- /dev/null +++ b/src/test/resources/blue_cloud_dataset.json @@ -0,0 +1,239 @@ +{ + "author": "Pagano Pasquale", + "author_email": "pasquale.pagano@isti.cnr.it", + "creator_user_id": "a8e26a67-352b-4188-9b14-5e142d484266", + "extras": [ + { + "key": "AccessMode:Accessibility", + "value": "Both" + }, + { + "key": "AccessMode:AccessibilityMode", + "value": "OnLine Access" + }, + { + "key": "AccessMode:AccessibilityMode", + "value": "API Access" + }, + { + "key": "AccessMode:AccessibilityMode", + "value": "Download" + }, + { + "key": "AccessMode:Availability", + "value": "On-Line" + }, + { + "key": "Attribution:Attribution requirements", + "value": "put the following text" + }, + { + "key": "Attribution:Display requirements", + "value": "visualize the following text" + }, + { + "key": "Attribution:Distribution requirements", + "value": "add the following text" + }, + { + "key": "Coverage:Semantic Coverage", + "value": "Ocean analysis; Ocean colour" + }, + { + "key": "Coverage:SpatialCoverage", + "value": "" + }, + { + "key": "Coverage:TimeCoverage", + "value": "2020-01-01 /2020-06-15 " + }, + { + "key": "DataProtection:Consent obtained also covers the envisaged transfer of the personal data outside the EU", + "value": "Yes" + }, + { + "key": "DataProtection:Consent of the data subject", + "value": "N/A (Not appliable)" + }, + { + "key": "DataProtection:DataProtectionDirective", + "value": "" + }, + { + "key": "DataProtection:Personal data was manifestly made public by the data subject", + "value": "N/A (Not appliable)" + }, + { + "key": "DataProtection:PersonalData", + "value": "No" + }, + { + "key": "DataProtection:PersonalSensitiveData", + "value": "N/A (Not appliable)" + }, + { + "key": "Identity:CreationDate", + "value": "2020-06-15 " + }, + { + "key": "Identity:Creator", + "value": "Pagano, Pasquale" + }, + { + "key": "Identity:External Identifier", + "value": "" + }, + { + "key": "Identity:Owner", + "value": "Pagano, Pasquale" + }, + { + "key": "Identity:RelatedPaper", + "value": "" + }, + { + "key": "Item URL", + "value": "http://data.d4science.org/ctlg/MarineEnvironmentalIndicators/my_title" + }, + { + "key": "Rights:Basic rights", + "value": "Download" + }, + { + "key": "Rights:Basic rights", + "value": "Copying" + }, + { + "key": "Rights:Basic rights", + "value": "Distribution" + }, + { + "key": "Rights:Basic rights", + "value": "Modification" + }, + { + "key": "Rights:Basic rights", + "value": "Communication" + }, + { + "key": "Rights:Field/Scope of use", + "value": "Any use" + }, + { + "key": "Rights:Field/Scope of use", + "value": "Non-commercial only" + }, + { + "key": "Rights:IP/Copyrights", + "value": "" + }, + { + "key": "Rights:License term", + "value": "2020-06-15 /2020-12-01 " + }, + { + "key": "Rights:Requirement of non-disclosure (confidentiality mark) ", + "value": "" + }, + { + "key": "Rights:Restrictions on use", + "value": "" + }, + { + "key": "Rights:Sublicense rights", + "value": "Yes" + }, + { + "key": "Rights:Territory of use", + "value": "World Wide" + }, + { + "key": "TechnicalDetails:DiskSize", + "value": "1000000000" + }, + { + "key": "TechnicalDetails:Format", + "value": "NetCDF" + }, + { + "key": "TechnicalDetails:FormatSchema", + "value": "" + }, + { + "key": "TechnicalDetails:Language", + "value": "Select Language" + }, + { + "key": "TechnicalDetails:ManifestationType", + "value": "Virtual" + }, + { + "key": "TechnicalDetails:ProcessingDegree", + "value": "Primary" + }, + { + "key": "TechnicalDetails:Size", + "value": "100 milion grid cells" + }, + { + "key": "system:type", + "value": "Dataset" + } + ], + "groups": [], + "id": "7b01241e-bdec-477a-9a0f-c606c78528c8", + "isopen": true, + "license_id": "EUDatagrid", + "license_title": "EU DataGrid Software License", + "license_url": "http://www.opensource.org/licenses/EUDatagrid", + "maintainer": "Pagano Pasquale", + "maintainer_email": "pasquale.pagano@isti.cnr.it", + "metadata_created": "2020-06-15T09:25:13.052848", + "metadata_modified": "2020-06-15T09:25:14.149494", + "name": "my_title", + "notes": "-- Marine-ID is managed and operated by IFREMER. Best is to contact Gilbert \r\nMaudire as BC contact for more details about Marine-ID. Marine-ID is AAI in \r\nSeaDataNet and also for IFREMER CORIOLIS data service. As part of \r\nSeaDataCloud we have established interoperability from Marine-ID to \r\nB2ACCESS. Further work is ongoing in SeaDataCloud for interoperability from \r\nsocial IDs (OpenID, FaceBook, ..) and EduGAIN to Marine-ID.", + "num_resources": 0, + "num_tags": 2, + "organization": { + "approval_status": "approved", + "created": "2020-05-27T14:12:30.843210", + "description": "A Virtual Lab to implement the Blue-Cloud Demonstrator 3. It will provide a web service for Environmental Agencies and research users.", + "id": "e9e46183-2fcd-4386-899b-18fdaa97d228", + "image_url": "https://blue-cloud.d4science.org/image/layout_set_logo?img_id=238284217", + "is_organization": true, + "name": "marineenvironmentalindicators", + "revision_id": "955a9655-893c-4f18-bdf5-42806d668f00", + "state": "active", + "title": "Marine Environmental Indicators", + "type": "organization" + }, + "owner_org": "e9e46183-2fcd-4386-899b-18fdaa97d228", + "private": false, + "rating": 0.0, + "ratings_count": 0, + "relationships_as_object": [], + "relationships_as_subject": [], + "resources": [], + "revision_id": "0f06ac29-53f0-4f47-9019-4e5ab578cbd8", + "state": "active", + "tags": [ + { + "display_name": "MarineID", + "id": "48e6484c-6ab6-4114-9a82-3d278f1c5077", + "name": "MarineID", + "state": "active", + "vocabulary_id": null + }, + { + "display_name": "Ocean", + "id": "10084e09-9640-4dd6-95af-bc4279144216", + "name": "Ocean", + "state": "active", + "vocabulary_id": null + } + ], + "title": "My title", + "type": "dataset", + "url": null, + "version": "1" +} \ No newline at end of file diff --git a/src/test/resources/blue_cloud_dataset.xml b/src/test/resources/blue_cloud_dataset.xml new file mode 100644 index 0000000..26eb82d --- /dev/null +++ b/src/test/resources/blue_cloud_dataset.xml @@ -0,0 +1,410 @@ + + 4adeaca2-8e32-4507-8937-d891629998e2 + GenericResource + + + Ckan-Zenodo-Mappings + Dataset + Simple mappings tests + + + + $.resources[?(@.format)] + + + + + + + + + dataset + + $.metadata + upload_type + + + + + + $.extras[?(@.key=='Identity:External Identifier')].value + + $.metadata.relatedIdentifier[1] + identifier + + + + + isAlternateIdentifier + + $.metadata.relatedIdentifier[1] + type + + + + + + + $.extras[?(@.key=='Identity:Creator')].value + + $.metadata.creators[2] + name + + + + + + + + $.extras[?(@.key=='Identity:Owner')].value + + $.metadata.contributors[3] + name + + + + + ContactPerson + + $.metadata.contributors[3] + type + + + + + + + + $.extras[?(@.key=='Identity:RelatedPaper')].value + + $.metadata.relatedIdentifier[2] + identifier + + + + + isCitedBy + + $.metadata.relatedIdentifier[2] + type + + + + + + + $.extras[?(@.key=='Coverage:Semantic Coverage')].value + + $.metadata + keywords[*] + + + + + + $.extras[?(@.key=='Coverage:TimeCoverage')].value + + $.metadata.dates[0] + start + + .* + + + [/ ] + + + + + + $.extras[?(@.key=='Coverage:TimeCoverage')].value + + $.metadata.dates[0] + end + + .* + + + [/ ] + + + + + + Valid + + $.metadata.dates[0] + type + + + + + + + + + + $.extras[?(@.key=='AccessMode:Accessibility')].value + + $.metadata + access_conditions + + ^ + AccessMode.Accessibility : + + + $ + ; + + + + + + + $.extras[?(@.key=='AccessMode:Availability')].value + + $.metadata + access_conditions + + + + + + + $.extras[?(@.key=='AccessMode:AccessibilityMode')].value + + $.metadata + access_conditions + + + + + + + $.extras[?(@.key=='TechnicalDetails:ProcessingDegree')].value + + $.metadata + notes + + + + + + $.extras[?(@.key=='TechnicalDetails:ManifestationType')].value + + $.metadata + notes + + + + + + $.extras[?(@.key=='TechnicalDetails:Language')].value + + $.metadata + language + + + + + + $.extras[?(@.key=='TechnicalDetails:Size')].value + + $.metadata + notes + + + + + + $.extras[?(@.key=='TechnicalDetails:DiskSize')].value + + $.metadata + notes + + + + + + $.extras[?(@.key=='TechnicalDetails:Format')].value + + $.metadata + notes + + + + + + $.extras[?(@.key=='TechnicalDetails:FormatSchema')].value + + $.metadata + notes + + + + + + + $.extras[?(@.key=='DataProtection:PersonalData')].value + + $.metadata + notes + + + + + + $.extras[?(@.key=='DataProtection:PersonalSensitiveData')].value + + $.metadata + notes + + + + + + + $.extras[?(@.key=='DataProtection:Consent of the data subject')].value + + $.metadata + notes + + + + + + $.extras[?(@.key=='DataProtection:Consent obtained also covers the envisaged transfer of the personal data outside the EU')].value + + $.metadata + notes + + + + + + $.extras[?(@.key=='DataProtection:Personal data was manifestly made public by the data subject')].value + + $.metadata + notes + + + + + + $.extras[?(@.key=='DataProtection:PersonalSensitiveData')].value + + $.metadata + notes + + + + + + + $.extras[?(@.key=='Rights:IP/Copyrights')].value + + $.metadata + notes + + + + + + $.extras[?(@.key=='Rights:Field/Scope of use')].value + + $.metadata + notes + + + + + + + $.extras[?(@.key=='Rights:Basic rights')].value + + $.metadata + notes + + + + + + + $.extras[?(@.key=='Rights:Restrictions on use')].value + + $.metadata + notes + + + + + + + $.extras[?(@.key=='Rights:Sublicense rights')].value + + $.metadata + notes + + + + + + $.extras[?(@.key=='Rights:Territory of use')].value + + $.metadata + notes + + + + + + $.extras[?(@.key=='Rights:License term')].value + + $.metadata + notes + + + + + + + $.extras[?(@.key=='Rights:Requirement of non-disclosure (confidentiality mark)')].value + + $.metadata + notes + + + + + + + $.extras[?(@.key=='Attribution:Attribution requirements')].value + + $.metadata + notes + + + + + + + $.extras[?(@.key=='Attribution:Attribution requirements')].value + + $.metadata + notes + + + + + + + $.extras[?(@.key=='Attribution:Distribution requirements')].value + + $.metadata + notes + + + + + + + \ No newline at end of file