Added regexp management
This commit is contained in:
parent
7a8e79eff8
commit
681f82e7c6
|
@ -3,17 +3,19 @@ package org.gcube.data.publishing.ckan2zenodo;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
import org.gcube.data.publishing.ckan2zenodo.commons.Parsing;
|
import org.gcube.data.publishing.ckan2zenodo.commons.Parsing;
|
||||||
import org.gcube.data.publishing.ckan2zenodo.model.CkanItemDescriptor;
|
import org.gcube.data.publishing.ckan2zenodo.model.CkanItemDescriptor;
|
||||||
import org.gcube.data.publishing.ckan2zenodo.model.Mapping;
|
import org.gcube.data.publishing.ckan2zenodo.model.Mapping;
|
||||||
|
import org.gcube.data.publishing.ckan2zenodo.model.Mapping.Regexp;
|
||||||
import org.gcube.data.publishing.ckan2zenodo.model.zenodo.Contributor;
|
import org.gcube.data.publishing.ckan2zenodo.model.zenodo.Contributor;
|
||||||
import org.gcube.data.publishing.ckan2zenodo.model.zenodo.DepositionMetadata;
|
import org.gcube.data.publishing.ckan2zenodo.model.zenodo.DepositionMetadata;
|
||||||
import org.gcube.data.publishing.ckan2zenodo.model.zenodo.DepositionMetadata.AccessRights;
|
import org.gcube.data.publishing.ckan2zenodo.model.zenodo.DepositionMetadata.AccessRights;
|
||||||
import org.gcube.data.publishing.ckan2zenodo.model.zenodo.RelatedIdentifier;
|
import org.gcube.data.publishing.ckan2zenodo.model.zenodo.RelatedIdentifier;
|
||||||
import org.gcube.data.publishing.ckan2zenodo.model.zenodo.ZenodoDeposition;
|
import org.gcube.data.publishing.ckan2zenodo.model.zenodo.ZenodoDeposition;
|
||||||
|
|
||||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
import com.jayway.jsonpath.DocumentContext;
|
import com.jayway.jsonpath.DocumentContext;
|
||||||
import com.jayway.jsonpath.JsonPath;
|
import com.jayway.jsonpath.JsonPath;
|
||||||
|
@ -102,8 +104,32 @@ public class Transformer {
|
||||||
List<String> sourceValues=sourceCtx.read(mapping.getSource());
|
List<String> sourceValues=sourceCtx.read(mapping.getSource());
|
||||||
if(sourceValues!=null)
|
if(sourceValues!=null)
|
||||||
for(String sourceValue:sourceValues) {
|
for(String sourceValue:sourceValues) {
|
||||||
String targetValue =mapping.getValueMapping().getOrDefault(sourceValue, sourceValue);
|
String resultingValue=sourceValue;
|
||||||
targetCtx.add(mapping.getTarget(),targetValue);
|
|
||||||
|
// apply regexps
|
||||||
|
for(Regexp regexp:mapping.getRegexp()) {
|
||||||
|
switch(regexp.getType()) {
|
||||||
|
case extract : {
|
||||||
|
Pattern p=Pattern.compile(regexp.getTarget());
|
||||||
|
Matcher m = p.matcher(resultingValue);
|
||||||
|
if(m.find())
|
||||||
|
resultingValue=m.group();
|
||||||
|
else resultingValue=null;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case replace : {
|
||||||
|
resultingValue=resultingValue.replaceAll(regexp.getTarget(), regexp.getReplacement());
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// apply value mapping
|
||||||
|
|
||||||
|
resultingValue =mapping.getValueMapping().getOrDefault(sourceValue, sourceValue);
|
||||||
|
targetCtx.add(mapping.getTarget(),resultingValue);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -10,6 +10,7 @@ import javax.xml.parsers.ParserConfigurationException;
|
||||||
import org.gcube.common.resources.gcore.GenericResource;
|
import org.gcube.common.resources.gcore.GenericResource;
|
||||||
import org.gcube.data.publishing.ckan2zenodo.commons.IS;
|
import org.gcube.data.publishing.ckan2zenodo.commons.IS;
|
||||||
import org.gcube.data.publishing.ckan2zenodo.model.Mapping;
|
import org.gcube.data.publishing.ckan2zenodo.model.Mapping;
|
||||||
|
import org.gcube.data.publishing.ckan2zenodo.model.Mapping.Regexp;
|
||||||
import org.w3c.dom.Element;
|
import org.w3c.dom.Element;
|
||||||
import org.w3c.dom.NodeList;
|
import org.w3c.dom.NodeList;
|
||||||
|
|
||||||
|
@ -63,12 +64,28 @@ public class TransformerManager {
|
||||||
HashMap<String,String> values=new HashMap<>();
|
HashMap<String,String> values=new HashMap<>();
|
||||||
NodeList valueMappings=mapping.getElementsByTagName("valueMapping");
|
NodeList valueMappings=mapping.getElementsByTagName("valueMapping");
|
||||||
for(int j = 0; i<valueMappings.getLength();j++) {
|
for(int j = 0; i<valueMappings.getLength();j++) {
|
||||||
String sourceValue=mapping.getElementsByTagName("sourceValue").item(0).getTextContent();
|
Element codelistMapping=(Element) valueMappings.item(j);
|
||||||
String targetValue=mapping.getElementsByTagName("targetValue").item(0).getTextContent();
|
String sourceValue=codelistMapping.getElementsByTagName("sourceValue").item(0).getTextContent();
|
||||||
|
String targetValue=codelistMapping.getElementsByTagName("targetValue").item(0).getTextContent();
|
||||||
values.put(sourceValue, targetValue);
|
values.put(sourceValue, targetValue);
|
||||||
}
|
}
|
||||||
|
|
||||||
toReturn.add(new Mapping(source,target,values));
|
ArrayList<Regexp> regularExpressions=new ArrayList<>();
|
||||||
|
NodeList regexpDeclarations=mapping.getElementsByTagName("regexp");
|
||||||
|
for(int j = 0; i<regexpDeclarations.getLength();j++) {
|
||||||
|
Element regexpElement=(Element) regexpDeclarations.item(j);
|
||||||
|
String regexpTarget=regexpElement.getElementsByTagName("target").item(0).getTextContent();
|
||||||
|
String typeName=regexpElement.getAttribute("type");
|
||||||
|
Regexp regexp=new Regexp(Regexp.Type.valueOf(typeName),regexpTarget);
|
||||||
|
if(regexp.getType().equals(Regexp.Type.replace))
|
||||||
|
regexp.setReplacement(regexpElement.getElementsByTagName("replacement").item(0).getTextContent());
|
||||||
|
regularExpressions.add(regexp);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
toReturn.add(new Mapping(source,target,values,regularExpressions));
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return toReturn;
|
return toReturn;
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
package org.gcube.data.publishing.ckan2zenodo.model;
|
package org.gcube.data.publishing.ckan2zenodo.model;
|
||||||
|
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
import lombok.Getter;
|
import lombok.Getter;
|
||||||
import lombok.NonNull;
|
import lombok.NonNull;
|
||||||
|
@ -9,16 +10,30 @@ import lombok.Setter;
|
||||||
|
|
||||||
@RequiredArgsConstructor
|
@RequiredArgsConstructor
|
||||||
@Getter
|
@Getter
|
||||||
@Setter
|
|
||||||
public class Mapping {
|
public class Mapping {
|
||||||
|
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
@Getter
|
||||||
|
public static class Regexp{
|
||||||
|
public static enum Type{
|
||||||
|
replace,extract
|
||||||
|
}
|
||||||
|
@NonNull
|
||||||
|
private Type type;
|
||||||
|
@NonNull
|
||||||
|
private String target;
|
||||||
|
@Setter
|
||||||
|
private String replacement;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@NonNull
|
@NonNull
|
||||||
private String source;
|
private String source;
|
||||||
@NonNull
|
@NonNull
|
||||||
private String target;
|
private String target;
|
||||||
@NonNull
|
@NonNull
|
||||||
private HashMap<String,String> valueMapping;
|
private HashMap<String,String> valueMapping;
|
||||||
|
@NonNull
|
||||||
private String regexp;
|
private List<Regexp> regexp;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,12 +2,10 @@ package org.gcube.tests;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import org.gcube.data.publishing.ckan2zenodo.Fixer;
|
import org.gcube.data.publishing.ckan2zenodo.Fixer;
|
||||||
import org.gcube.data.publishing.ckan2zenodo.Transformer;
|
import org.gcube.data.publishing.ckan2zenodo.Transformer;
|
||||||
import org.gcube.data.publishing.ckan2zenodo.model.CkanItemDescriptor;
|
import org.gcube.data.publishing.ckan2zenodo.model.CkanItemDescriptor;
|
||||||
import org.gcube.data.publishing.ckan2zenodo.model.Mapping;
|
|
||||||
import org.gcube.data.publishing.ckan2zenodo.model.zenodo.ZenodoDeposition;
|
import org.gcube.data.publishing.ckan2zenodo.model.zenodo.ZenodoDeposition;
|
||||||
import org.junit.BeforeClass;
|
import org.junit.BeforeClass;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
|
@ -0,0 +1,38 @@
|
||||||
|
<Resource version="0.4.x">
|
||||||
|
<ID>4adeaca2-8e32-4507-8937-d891629998e2</ID>
|
||||||
|
<Type>GenericResource</Type>
|
||||||
|
<Scopes></Scopes>
|
||||||
|
<Profile>
|
||||||
|
<SecondaryType>Ckan-Zenodo-Mappings</SecondaryType>
|
||||||
|
<Name>ResearchObject</Name>
|
||||||
|
<Description>Simple mappings tests</Description>
|
||||||
|
<Body>
|
||||||
|
<mappings>
|
||||||
|
<mapping>
|
||||||
|
<source>$.extras[?(@.key=='Author')].value</source>
|
||||||
|
<target>$.metadata.contributors[0].name</target>
|
||||||
|
<regexp type="extract">
|
||||||
|
<target>([A-Za-z]*, [A-Za-z]*)(?=,)</target>
|
||||||
|
</regexp>
|
||||||
|
</mapping>
|
||||||
|
<mapping>
|
||||||
|
<source>$.extras[?(@.key=='Author')].value</source>
|
||||||
|
<target>$.metadata.contributors[0].type</target>
|
||||||
|
<regexp type="replace">
|
||||||
|
<target>.*</target>
|
||||||
|
<replacement>Producer</replacement>
|
||||||
|
</regexp>
|
||||||
|
</mapping>
|
||||||
|
<mapping>
|
||||||
|
<source>$.extras[?(@.key=='Author')].value</source>
|
||||||
|
<target>$.metadata.contributors[0].orcid</target>
|
||||||
|
<regexp type="extract">
|
||||||
|
<target>orcid.org/.*</target>
|
||||||
|
</regexp>
|
||||||
|
</mapping>
|
||||||
|
</mappings>
|
||||||
|
|
||||||
|
|
||||||
|
</Body>
|
||||||
|
</Profile>
|
||||||
|
</Resource>
|
Loading…
Reference in New Issue