From 681f82e7c6c646b9be9226d99468da3b19adf637 Mon Sep 17 00:00:00 2001 From: "fabio.sinibaldi" Date: Tue, 3 Dec 2019 17:20:12 +0100 Subject: [PATCH] Added regexp management --- .../publishing/ckan2zenodo/Transformer.java | 32 ++++++++++-- .../ckan2zenodo/TransformerManager.java | 51 ++++++++++++------- .../publishing/ckan2zenodo/model/Mapping.java | 21 ++++++-- .../org/gcube/tests/TransformationTests.java | 2 - .../src/test/resources/ResearchObject.xml | 38 ++++++++++++++ 5 files changed, 119 insertions(+), 25 deletions(-) create mode 100644 ckan2zenodo-library/src/test/resources/ResearchObject.xml diff --git a/ckan2zenodo-library/src/main/java/org/gcube/data/publishing/ckan2zenodo/Transformer.java b/ckan2zenodo-library/src/main/java/org/gcube/data/publishing/ckan2zenodo/Transformer.java index bbc5ef7..8c2c1ec 100644 --- a/ckan2zenodo-library/src/main/java/org/gcube/data/publishing/ckan2zenodo/Transformer.java +++ b/ckan2zenodo-library/src/main/java/org/gcube/data/publishing/ckan2zenodo/Transformer.java @@ -3,17 +3,19 @@ package org.gcube.data.publishing.ckan2zenodo; import java.io.IOException; import java.util.ArrayList; import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import org.gcube.data.publishing.ckan2zenodo.commons.Parsing; import org.gcube.data.publishing.ckan2zenodo.model.CkanItemDescriptor; import org.gcube.data.publishing.ckan2zenodo.model.Mapping; +import org.gcube.data.publishing.ckan2zenodo.model.Mapping.Regexp; import org.gcube.data.publishing.ckan2zenodo.model.zenodo.Contributor; import org.gcube.data.publishing.ckan2zenodo.model.zenodo.DepositionMetadata; import org.gcube.data.publishing.ckan2zenodo.model.zenodo.DepositionMetadata.AccessRights; import org.gcube.data.publishing.ckan2zenodo.model.zenodo.RelatedIdentifier; import org.gcube.data.publishing.ckan2zenodo.model.zenodo.ZenodoDeposition; -import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import com.jayway.jsonpath.DocumentContext; import com.jayway.jsonpath.JsonPath; @@ -102,8 +104,32 @@ public class Transformer { List sourceValues=sourceCtx.read(mapping.getSource()); if(sourceValues!=null) for(String sourceValue:sourceValues) { - String targetValue =mapping.getValueMapping().getOrDefault(sourceValue, sourceValue); - targetCtx.add(mapping.getTarget(),targetValue); + String resultingValue=sourceValue; + + // apply regexps + for(Regexp regexp:mapping.getRegexp()) { + switch(regexp.getType()) { + case extract : { + Pattern p=Pattern.compile(regexp.getTarget()); + Matcher m = p.matcher(resultingValue); + if(m.find()) + resultingValue=m.group(); + else resultingValue=null; + break; + } + case replace : { + resultingValue=resultingValue.replaceAll(regexp.getTarget(), regexp.getReplacement()); + break; + } + } + + + } + + // apply value mapping + + resultingValue =mapping.getValueMapping().getOrDefault(sourceValue, sourceValue); + targetCtx.add(mapping.getTarget(),resultingValue); } } diff --git a/ckan2zenodo-library/src/main/java/org/gcube/data/publishing/ckan2zenodo/TransformerManager.java b/ckan2zenodo-library/src/main/java/org/gcube/data/publishing/ckan2zenodo/TransformerManager.java index 6d558aa..3f57558 100644 --- a/ckan2zenodo-library/src/main/java/org/gcube/data/publishing/ckan2zenodo/TransformerManager.java +++ b/ckan2zenodo-library/src/main/java/org/gcube/data/publishing/ckan2zenodo/TransformerManager.java @@ -10,6 +10,7 @@ import javax.xml.parsers.ParserConfigurationException; import org.gcube.common.resources.gcore.GenericResource; import org.gcube.data.publishing.ckan2zenodo.commons.IS; import org.gcube.data.publishing.ckan2zenodo.model.Mapping; +import org.gcube.data.publishing.ckan2zenodo.model.Mapping.Regexp; import org.w3c.dom.Element; import org.w3c.dom.NodeList; @@ -42,36 +43,52 @@ public class TransformerManager { private static ArrayList readMappings(GenericResource res){ -// ByteArrayInputStream input = new ByteArrayInputStream( -// res.profile()..toString().getBytes("UTF-8")); -// Document doc = builder.parse(input); -// XPath xPath = XPathFactory.newInstance().newXPath(); -// String expression = "/class/student"; -// NodeList nodeList = (NodeList) xPath.compile(expression).evaluate( -// doc, XPathConstants.NODESET); - + // ByteArrayInputStream input = new ByteArrayInputStream( + // res.profile()..toString().getBytes("UTF-8")); + // Document doc = builder.parse(input); + // XPath xPath = XPathFactory.newInstance().newXPath(); + // String expression = "/class/student"; + // NodeList nodeList = (NodeList) xPath.compile(expression).evaluate( + // doc, XPathConstants.NODESET); + ArrayList toReturn=new ArrayList(); - + Element root=res.profile().body(); NodeList mappings=root.getElementsByTagName("mapping"); - + for(int i = 0; i values=new HashMap<>(); NodeList valueMappings=mapping.getElementsByTagName("valueMapping"); for(int j = 0; i regularExpressions=new ArrayList<>(); + NodeList regexpDeclarations=mapping.getElementsByTagName("regexp"); + for(int j = 0; i valueMapping; - - private String regexp; + @NonNull + private List regexp; } diff --git a/ckan2zenodo-library/src/test/java/org/gcube/tests/TransformationTests.java b/ckan2zenodo-library/src/test/java/org/gcube/tests/TransformationTests.java index d1ffa05..6aa17ca 100644 --- a/ckan2zenodo-library/src/test/java/org/gcube/tests/TransformationTests.java +++ b/ckan2zenodo-library/src/test/java/org/gcube/tests/TransformationTests.java @@ -2,12 +2,10 @@ package org.gcube.tests; import java.io.IOException; import java.util.Collections; -import java.util.List; import org.gcube.data.publishing.ckan2zenodo.Fixer; import org.gcube.data.publishing.ckan2zenodo.Transformer; import org.gcube.data.publishing.ckan2zenodo.model.CkanItemDescriptor; -import org.gcube.data.publishing.ckan2zenodo.model.Mapping; import org.gcube.data.publishing.ckan2zenodo.model.zenodo.ZenodoDeposition; import org.junit.BeforeClass; import org.junit.Test; diff --git a/ckan2zenodo-library/src/test/resources/ResearchObject.xml b/ckan2zenodo-library/src/test/resources/ResearchObject.xml new file mode 100644 index 0000000..8f58471 --- /dev/null +++ b/ckan2zenodo-library/src/test/resources/ResearchObject.xml @@ -0,0 +1,38 @@ + + 4adeaca2-8e32-4507-8937-d891629998e2 + GenericResource + + + Ckan-Zenodo-Mappings + ResearchObject + Simple mappings tests + + + + $.extras[?(@.key=='Author')].value + $.metadata.contributors[0].name + + ([A-Za-z]*, [A-Za-z]*)(?=,) + + + + $.extras[?(@.key=='Author')].value + $.metadata.contributors[0].type + + .* + Producer + + + + $.extras[?(@.key=='Author')].value + $.metadata.contributors[0].orcid + + orcid.org/.* + + + + + + + + \ No newline at end of file