Added regexp management
This commit is contained in:
parent
7a8e79eff8
commit
681f82e7c6
|
@ -3,17 +3,19 @@ package org.gcube.data.publishing.ckan2zenodo;
|
|||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.gcube.data.publishing.ckan2zenodo.commons.Parsing;
|
||||
import org.gcube.data.publishing.ckan2zenodo.model.CkanItemDescriptor;
|
||||
import org.gcube.data.publishing.ckan2zenodo.model.Mapping;
|
||||
import org.gcube.data.publishing.ckan2zenodo.model.Mapping.Regexp;
|
||||
import org.gcube.data.publishing.ckan2zenodo.model.zenodo.Contributor;
|
||||
import org.gcube.data.publishing.ckan2zenodo.model.zenodo.DepositionMetadata;
|
||||
import org.gcube.data.publishing.ckan2zenodo.model.zenodo.DepositionMetadata.AccessRights;
|
||||
import org.gcube.data.publishing.ckan2zenodo.model.zenodo.RelatedIdentifier;
|
||||
import org.gcube.data.publishing.ckan2zenodo.model.zenodo.ZenodoDeposition;
|
||||
|
||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.jayway.jsonpath.DocumentContext;
|
||||
import com.jayway.jsonpath.JsonPath;
|
||||
|
@ -102,8 +104,32 @@ public class Transformer {
|
|||
List<String> sourceValues=sourceCtx.read(mapping.getSource());
|
||||
if(sourceValues!=null)
|
||||
for(String sourceValue:sourceValues) {
|
||||
String targetValue =mapping.getValueMapping().getOrDefault(sourceValue, sourceValue);
|
||||
targetCtx.add(mapping.getTarget(),targetValue);
|
||||
String resultingValue=sourceValue;
|
||||
|
||||
// apply regexps
|
||||
for(Regexp regexp:mapping.getRegexp()) {
|
||||
switch(regexp.getType()) {
|
||||
case extract : {
|
||||
Pattern p=Pattern.compile(regexp.getTarget());
|
||||
Matcher m = p.matcher(resultingValue);
|
||||
if(m.find())
|
||||
resultingValue=m.group();
|
||||
else resultingValue=null;
|
||||
break;
|
||||
}
|
||||
case replace : {
|
||||
resultingValue=resultingValue.replaceAll(regexp.getTarget(), regexp.getReplacement());
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
// apply value mapping
|
||||
|
||||
resultingValue =mapping.getValueMapping().getOrDefault(sourceValue, sourceValue);
|
||||
targetCtx.add(mapping.getTarget(),resultingValue);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -10,6 +10,7 @@ import javax.xml.parsers.ParserConfigurationException;
|
|||
import org.gcube.common.resources.gcore.GenericResource;
|
||||
import org.gcube.data.publishing.ckan2zenodo.commons.IS;
|
||||
import org.gcube.data.publishing.ckan2zenodo.model.Mapping;
|
||||
import org.gcube.data.publishing.ckan2zenodo.model.Mapping.Regexp;
|
||||
import org.w3c.dom.Element;
|
||||
import org.w3c.dom.NodeList;
|
||||
|
||||
|
@ -42,13 +43,13 @@ public class TransformerManager {
|
|||
|
||||
|
||||
private static ArrayList<Mapping> readMappings(GenericResource res){
|
||||
// ByteArrayInputStream input = new ByteArrayInputStream(
|
||||
// res.profile()..toString().getBytes("UTF-8"));
|
||||
// Document doc = builder.parse(input);
|
||||
// XPath xPath = XPathFactory.newInstance().newXPath();
|
||||
// String expression = "/class/student";
|
||||
// NodeList nodeList = (NodeList) xPath.compile(expression).evaluate(
|
||||
// doc, XPathConstants.NODESET);
|
||||
// ByteArrayInputStream input = new ByteArrayInputStream(
|
||||
// res.profile()..toString().getBytes("UTF-8"));
|
||||
// Document doc = builder.parse(input);
|
||||
// XPath xPath = XPathFactory.newInstance().newXPath();
|
||||
// String expression = "/class/student";
|
||||
// NodeList nodeList = (NodeList) xPath.compile(expression).evaluate(
|
||||
// doc, XPathConstants.NODESET);
|
||||
|
||||
ArrayList<Mapping> toReturn=new ArrayList<Mapping>();
|
||||
|
||||
|
@ -63,12 +64,28 @@ public class TransformerManager {
|
|||
HashMap<String,String> values=new HashMap<>();
|
||||
NodeList valueMappings=mapping.getElementsByTagName("valueMapping");
|
||||
for(int j = 0; i<valueMappings.getLength();j++) {
|
||||
String sourceValue=mapping.getElementsByTagName("sourceValue").item(0).getTextContent();
|
||||
String targetValue=mapping.getElementsByTagName("targetValue").item(0).getTextContent();
|
||||
Element codelistMapping=(Element) valueMappings.item(j);
|
||||
String sourceValue=codelistMapping.getElementsByTagName("sourceValue").item(0).getTextContent();
|
||||
String targetValue=codelistMapping.getElementsByTagName("targetValue").item(0).getTextContent();
|
||||
values.put(sourceValue, targetValue);
|
||||
}
|
||||
|
||||
toReturn.add(new Mapping(source,target,values));
|
||||
ArrayList<Regexp> regularExpressions=new ArrayList<>();
|
||||
NodeList regexpDeclarations=mapping.getElementsByTagName("regexp");
|
||||
for(int j = 0; i<regexpDeclarations.getLength();j++) {
|
||||
Element regexpElement=(Element) regexpDeclarations.item(j);
|
||||
String regexpTarget=regexpElement.getElementsByTagName("target").item(0).getTextContent();
|
||||
String typeName=regexpElement.getAttribute("type");
|
||||
Regexp regexp=new Regexp(Regexp.Type.valueOf(typeName),regexpTarget);
|
||||
if(regexp.getType().equals(Regexp.Type.replace))
|
||||
regexp.setReplacement(regexpElement.getElementsByTagName("replacement").item(0).getTextContent());
|
||||
regularExpressions.add(regexp);
|
||||
|
||||
}
|
||||
|
||||
|
||||
toReturn.add(new Mapping(source,target,values,regularExpressions));
|
||||
|
||||
}
|
||||
|
||||
return toReturn;
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
package org.gcube.data.publishing.ckan2zenodo.model;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.NonNull;
|
||||
|
@ -9,16 +10,30 @@ import lombok.Setter;
|
|||
|
||||
@RequiredArgsConstructor
|
||||
@Getter
|
||||
@Setter
|
||||
public class Mapping {
|
||||
|
||||
@RequiredArgsConstructor
|
||||
@Getter
|
||||
public static class Regexp{
|
||||
public static enum Type{
|
||||
replace,extract
|
||||
}
|
||||
@NonNull
|
||||
private Type type;
|
||||
@NonNull
|
||||
private String target;
|
||||
@Setter
|
||||
private String replacement;
|
||||
}
|
||||
|
||||
|
||||
@NonNull
|
||||
private String source;
|
||||
@NonNull
|
||||
private String target;
|
||||
@NonNull
|
||||
private HashMap<String,String> valueMapping;
|
||||
|
||||
private String regexp;
|
||||
@NonNull
|
||||
private List<Regexp> regexp;
|
||||
|
||||
}
|
||||
|
|
|
@ -2,12 +2,10 @@ package org.gcube.tests;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
import org.gcube.data.publishing.ckan2zenodo.Fixer;
|
||||
import org.gcube.data.publishing.ckan2zenodo.Transformer;
|
||||
import org.gcube.data.publishing.ckan2zenodo.model.CkanItemDescriptor;
|
||||
import org.gcube.data.publishing.ckan2zenodo.model.Mapping;
|
||||
import org.gcube.data.publishing.ckan2zenodo.model.zenodo.ZenodoDeposition;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
|
|
@ -0,0 +1,38 @@
|
|||
<Resource version="0.4.x">
|
||||
<ID>4adeaca2-8e32-4507-8937-d891629998e2</ID>
|
||||
<Type>GenericResource</Type>
|
||||
<Scopes></Scopes>
|
||||
<Profile>
|
||||
<SecondaryType>Ckan-Zenodo-Mappings</SecondaryType>
|
||||
<Name>ResearchObject</Name>
|
||||
<Description>Simple mappings tests</Description>
|
||||
<Body>
|
||||
<mappings>
|
||||
<mapping>
|
||||
<source>$.extras[?(@.key=='Author')].value</source>
|
||||
<target>$.metadata.contributors[0].name</target>
|
||||
<regexp type="extract">
|
||||
<target>([A-Za-z]*, [A-Za-z]*)(?=,)</target>
|
||||
</regexp>
|
||||
</mapping>
|
||||
<mapping>
|
||||
<source>$.extras[?(@.key=='Author')].value</source>
|
||||
<target>$.metadata.contributors[0].type</target>
|
||||
<regexp type="replace">
|
||||
<target>.*</target>
|
||||
<replacement>Producer</replacement>
|
||||
</regexp>
|
||||
</mapping>
|
||||
<mapping>
|
||||
<source>$.extras[?(@.key=='Author')].value</source>
|
||||
<target>$.metadata.contributors[0].orcid</target>
|
||||
<regexp type="extract">
|
||||
<target>orcid.org/.*</target>
|
||||
</regexp>
|
||||
</mapping>
|
||||
</mappings>
|
||||
|
||||
|
||||
</Body>
|
||||
</Profile>
|
||||
</Resource>
|
Loading…
Reference in New Issue