Mapping.Source as complex object

This commit is contained in:
Fabio Sinibaldi 2019-12-05 11:56:51 +01:00
parent 681f82e7c6
commit 3d0e0c84aa
10 changed files with 328 additions and 185 deletions

View File

@ -1,6 +1,5 @@
package org.gcube.data.publishing.ckan2zenodo;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
@ -10,6 +9,7 @@ import org.gcube.data.publishing.ckan2zenodo.commons.Parsing;
import org.gcube.data.publishing.ckan2zenodo.model.CkanItemDescriptor;
import org.gcube.data.publishing.ckan2zenodo.model.Mapping;
import org.gcube.data.publishing.ckan2zenodo.model.Mapping.Regexp;
import org.gcube.data.publishing.ckan2zenodo.model.faults.TransformationException;
import org.gcube.data.publishing.ckan2zenodo.model.zenodo.Contributor;
import org.gcube.data.publishing.ckan2zenodo.model.zenodo.DepositionMetadata;
import org.gcube.data.publishing.ckan2zenodo.model.zenodo.DepositionMetadata.AccessRights;
@ -30,22 +30,22 @@ public class Transformer {
@NonNull
private List<Mapping> mappings;
public ZenodoDeposition transform(CkanItemDescriptor toTransform, ZenodoDeposition deposition) throws IOException {
public ZenodoDeposition transform(CkanItemDescriptor toTransform, ZenodoDeposition deposition) throws TransformationException {
log.debug("Transforming "+toTransform+". Existing Deposition is : "+deposition);
if(deposition==null) deposition=new ZenodoDeposition();
// default mappings
DepositionMetadata meta=deposition.getMetadata();
if(meta==null)
meta=new DepositionMetadata();
meta.setTitle(toTransform.getTitle());
meta.setDescription(toTransform.getNotes());
// Access Right
if(toTransform.isOpen())
meta.setAccess_right(AccessRights.open);
@ -53,18 +53,18 @@ public class Transformer {
meta.setAccess_right(AccessRights.restricted);
meta.setAccess_conditions("Visit the VRE "+toTransform.getVRE()+" to access it.");
}
meta.setLicense(toTransform.getLicenseId());
meta.setKeywords(new ArrayList<String>(toTransform.getTags()));
//Related Identifiers
String itemUrl=toTransform.getItemUrl();
ArrayList<RelatedIdentifier> relatedIdentifiers=new ArrayList<>();
if(itemUrl!=null) relatedIdentifiers.add(new RelatedIdentifier(itemUrl,RelatedIdentifier.Relation.compiles));
meta.setRelated_identifiers(relatedIdentifiers);
//Contributors
ArrayList<Contributor> contributors=new ArrayList<>();
String authorName=toTransform.getAuthor();
@ -73,7 +73,7 @@ public class Transformer {
author.setName(authorName);
contributors.add(author);
}
String maintainerName=toTransform.getAuthor();
if(maintainerName!=null) {
Contributor maintainer=new Contributor(Contributor.Type.DataCurator);
@ -81,59 +81,83 @@ public class Transformer {
contributors.add(maintainer);
}
meta.setContributors(contributors);
meta.setVersion(toTransform.getVersion());
// Dates
deposition.setMetadata(meta);
// profile specific mappings
return applyMappings(toTransform, deposition);
}
private ZenodoDeposition applyMappings(CkanItemDescriptor source, ZenodoDeposition target) throws IOException {
ObjectMapper mapper=Parsing.getMapper();
DocumentContext sourceCtx=JsonPath.using(Parsing.JSON_PATH_ALWAYS_LIST_CONFIG).parse(source.getContent());
DocumentContext targetCtx=JsonPath.using(Parsing.JSON_PATH_ALWAYS_LIST_CONFIG).parse(mapper.writeValueAsString(target));
for(Mapping mapping:mappings) {
List<String> sourceValues=sourceCtx.read(mapping.getSource());
if(sourceValues!=null)
for(String sourceValue:sourceValues) {
String resultingValue=sourceValue;
// apply regexps
for(Regexp regexp:mapping.getRegexp()) {
switch(regexp.getType()) {
case extract : {
Pattern p=Pattern.compile(regexp.getTarget());
Matcher m = p.matcher(resultingValue);
if(m.find())
resultingValue=m.group();
else resultingValue=null;
break;
}
case replace : {
resultingValue=resultingValue.replaceAll(regexp.getTarget(), regexp.getReplacement());
break;
}
}
private ZenodoDeposition applyMappings(CkanItemDescriptor source, ZenodoDeposition target) throws TransformationException {
try{
ObjectMapper mapper=Parsing.getMapper();
DocumentContext sourceCtx=JsonPath.using(Parsing.JSON_PATH_ALWAYS_LIST_CONFIG).parse(source.getContent());
DocumentContext targetCtx=JsonPath.using(Parsing.JSON_PATH_ALWAYS_LIST_CONFIG).parse(mapper.writeValueAsString(target));
for(Mapping mapping:mappings) {
try {
// extract source
List<String> sourceValues=new ArrayList<>();
switch(mapping.getSource().getType()) {
case constant : {
sourceValues.add(mapping.getSource().getValue());
break;
}
// apply value mapping
resultingValue =mapping.getValueMapping().getOrDefault(sourceValue, sourceValue);
targetCtx.add(mapping.getTarget(),resultingValue);
case jsonPath : {
sourceValues.addAll(sourceCtx.read(mapping.getSource().getValue()));
break;
}
}
for(String sourceValue:sourceValues) {
String resultingValue=sourceValue;
// apply regexps
for(Regexp regexp:mapping.getRegexp()) {
switch(regexp.getType()) {
case extract : {
Pattern p=Pattern.compile(regexp.getTarget());
Matcher m = p.matcher(resultingValue);
if(m.find())
resultingValue=m.group();
else resultingValue=null;
break;
}
case replace : {
if(resultingValue!=null) {
String replacement=regexp.getReplacement()!=null?regexp.getReplacement():"";
resultingValue=resultingValue.replaceAll(regexp.getTarget(), replacement);
break;
}
}
}
}
// apply value mappings
resultingValue =mapping.getValueMapping().getOrDefault(sourceValue, resultingValue);
// apply resulting value
targetCtx.put(mapping.getTargetPath(),mapping.getTargetElement(),resultingValue);
}
}catch(Throwable t) {
throw new TransformationException("Exception while applying "+mapping,t);
}
}
return mapper.readValue(targetCtx.jsonString(), ZenodoDeposition.class);
}catch(Throwable t) {
log.error("Unable to transform "+source+" using previous "+target,t);
throw new TransformationException("Unable to translate "+source.getName(),t);
}
return mapper.readValue(targetCtx.jsonString(), ZenodoDeposition.class);
}
}

View File

@ -35,60 +35,12 @@ public class TransformerManager {
public Transformer getByProfile(String profile) throws Exception {
for(GenericResource r: IS.queryForGenericResources("Ckan-Zenodo-Mappings")){
if (r.profile().name().equals(profile))
return new Transformer(readMappings(r));
return new Transformer(IS.readMappings(r));
}
throw new Exception("No transformer found for profile "+profile);
}
private static ArrayList<Mapping> readMappings(GenericResource res){
// ByteArrayInputStream input = new ByteArrayInputStream(
// res.profile()..toString().getBytes("UTF-8"));
// Document doc = builder.parse(input);
// XPath xPath = XPathFactory.newInstance().newXPath();
// String expression = "/class/student";
// NodeList nodeList = (NodeList) xPath.compile(expression).evaluate(
// doc, XPathConstants.NODESET);
ArrayList<Mapping> toReturn=new ArrayList<Mapping>();
Element root=res.profile().body();
NodeList mappings=root.getElementsByTagName("mapping");
for(int i = 0; i<mappings.getLength();i++) {
Element mapping=(Element) mappings.item(i);
String source=mapping.getElementsByTagName("source").item(0).getTextContent();
String target=mapping.getElementsByTagName("target").item(0).getTextContent();
HashMap<String,String> values=new HashMap<>();
NodeList valueMappings=mapping.getElementsByTagName("valueMapping");
for(int j = 0; i<valueMappings.getLength();j++) {
Element codelistMapping=(Element) valueMappings.item(j);
String sourceValue=codelistMapping.getElementsByTagName("sourceValue").item(0).getTextContent();
String targetValue=codelistMapping.getElementsByTagName("targetValue").item(0).getTextContent();
values.put(sourceValue, targetValue);
}
ArrayList<Regexp> regularExpressions=new ArrayList<>();
NodeList regexpDeclarations=mapping.getElementsByTagName("regexp");
for(int j = 0; i<regexpDeclarations.getLength();j++) {
Element regexpElement=(Element) regexpDeclarations.item(j);
String regexpTarget=regexpElement.getElementsByTagName("target").item(0).getTextContent();
String typeName=regexpElement.getAttribute("type");
Regexp regexp=new Regexp(Regexp.Type.valueOf(typeName),regexpTarget);
if(regexp.getType().equals(Regexp.Type.replace))
regexp.setReplacement(regexpElement.getElementsByTagName("replacement").item(0).getTextContent());
regularExpressions.add(regexp);
}
toReturn.add(new Mapping(source,target,values,regularExpressions));
}
return toReturn;
}
}

View File

@ -3,22 +3,30 @@ package org.gcube.data.publishing.ckan2zenodo.commons;
import static org.gcube.resources.discovery.icclient.ICFactory.clientFor;
import static org.gcube.resources.discovery.icclient.ICFactory.queryFor;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import org.gcube.common.resources.gcore.GenericResource;
import org.gcube.common.resources.gcore.ServiceEndpoint;
import org.gcube.data.publishing.ckan2zenodo.model.Mapping;
import org.gcube.data.publishing.ckan2zenodo.model.Mapping.Regexp;
import org.gcube.data.publishing.ckan2zenodo.model.Mapping.Source;
import org.gcube.data.publishing.ckan2zenodo.model.faults.ConfigurationException;
import org.gcube.resources.discovery.client.api.DiscoveryClient;
import org.gcube.resources.discovery.client.queries.api.SimpleQuery;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import lombok.extern.slf4j.Slf4j;
@Slf4j
public class IS {
public static List<GenericResource> queryForGenericResources(String secondaryType){
log.debug("Querying for Service Endpoints [secondary type : {} ]",secondaryType);
SimpleQuery query = queryFor(GenericResource.class);
query.addCondition("$resource/Profile/SecondaryType/text() eq '"+secondaryType+"'");
@ -30,16 +38,68 @@ public class IS {
public static List<ServiceEndpoint> queryForServiceEndpoints(String category, String platformName){
log.debug("Querying for Service Endpoints [category : {} , platformName : {}]",category,platformName);
SimpleQuery query = queryFor(ServiceEndpoint.class);
query.addCondition("$resource/Profile/Category/text() eq '"+category+"'")
.addCondition("$resource/Profile/Platform/Name/text() eq '"+platformName+"'");
DiscoveryClient<ServiceEndpoint> client = clientFor(ServiceEndpoint.class);
return client.submit(query);
}
public static ArrayList<Mapping> readMappings(GenericResource res) throws ConfigurationException{
ArrayList<Mapping> toReturn=new ArrayList<Mapping>();
try {
Element root=res.profile().body();
NodeList mappings=root.getElementsByTagName("mapping");
for(int i = 0; i<mappings.getLength();i++) {
Element mapping=(Element) mappings.item(i);
Element sourceElement=(Element) mapping.getElementsByTagName("source").item(0);
Source source=new Source(Source.Type.valueOf(sourceElement.getAttribute("type")),
sourceElement.getElementsByTagName("value").item(0).getTextContent());
String targetPath=mapping.getElementsByTagName("targetPath").item(0).getTextContent();
String targetElement=mapping.getElementsByTagName("targetElement").item(0).getTextContent();
HashMap<String,String> values=new HashMap<>();
NodeList valueMappings=mapping.getElementsByTagName("valueMapping");
for(int j = 0; j<valueMappings.getLength();j++) {
Element codelistMapping=(Element) valueMappings.item(j);
String sourceValue=codelistMapping.getElementsByTagName("sourceValue").item(0).getTextContent();
String targetValue=codelistMapping.getElementsByTagName("targetValue").item(0).getTextContent();
values.put(sourceValue, targetValue);
}
ArrayList<Regexp> regularExpressions=new ArrayList<>();
NodeList regexpDeclarations=mapping.getElementsByTagName("regexp");
for(int j = 0; j<regexpDeclarations.getLength();j++) {
Element regexpElement=(Element) regexpDeclarations.item(j);
String regexpTarget=regexpElement.getElementsByTagName("target").item(0).getTextContent();
String typeName=regexpElement.getAttribute("type");
Regexp regexp=new Regexp(Regexp.Type.valueOf(typeName),regexpTarget);
if(regexp.getType().equals(Regexp.Type.replace))
regexp.setReplacement(regexpElement.getElementsByTagName("replacement").item(0).getTextContent());
regularExpressions.add(regexp);
}
toReturn.add(new Mapping(source,targetPath,targetElement,values,regularExpressions));
}
return toReturn;
}catch(Throwable t) {
log.debug("Error while parsing mapping from resource "+res.id()+" name : "+res.profile().name(),t);
throw new ConfigurationException("Invaild mapping resource "+res.id()+" name : "+res.profile().name(),t);
}
}
}

View File

@ -7,13 +7,16 @@ import lombok.Getter;
import lombok.NonNull;
import lombok.RequiredArgsConstructor;
import lombok.Setter;
import lombok.ToString;
@RequiredArgsConstructor
@Getter
@ToString
public class Mapping {
@RequiredArgsConstructor
@Getter
@ToString
public static class Regexp{
public static enum Type{
replace,extract
@ -25,12 +28,25 @@ public class Mapping {
@Setter
private String replacement;
}
@RequiredArgsConstructor
@Getter
@ToString
public static class Source{
public static enum Type{
jsonPath,constant
}
@NonNull
private Type type;
@NonNull
private String value;
}
@NonNull
private String source;
private Source source;
@NonNull
private String target;
private String targetPath;
@NonNull
private String targetElement;
@NonNull
private HashMap<String,String> valueMapping;
@NonNull

View File

@ -0,0 +1,29 @@
package org.gcube.data.publishing.ckan2zenodo.model.faults;
public class ConfigurationException extends Exception {
public ConfigurationException() {
// TODO Auto-generated constructor stub
}
public ConfigurationException(String arg0) {
super(arg0);
// TODO Auto-generated constructor stub
}
public ConfigurationException(Throwable arg0) {
super(arg0);
// TODO Auto-generated constructor stub
}
public ConfigurationException(String arg0, Throwable arg1) {
super(arg0, arg1);
// TODO Auto-generated constructor stub
}
public ConfigurationException(String arg0, Throwable arg1, boolean arg2, boolean arg3) {
super(arg0, arg1, arg2, arg3);
// TODO Auto-generated constructor stub
}
}

View File

@ -0,0 +1,30 @@
package org.gcube.data.publishing.ckan2zenodo.model.faults;
public class TransformationException extends Exception {
public TransformationException() {
// TODO Auto-generated constructor stub
}
public TransformationException(String message) {
super(message);
// TODO Auto-generated constructor stub
}
public TransformationException(Throwable cause) {
super(cause);
// TODO Auto-generated constructor stub
}
public TransformationException(String message, Throwable cause) {
super(message, cause);
// TODO Auto-generated constructor stub
}
public TransformationException(String message, Throwable cause, boolean enableSuppression,
boolean writableStackTrace) {
super(message, cause, enableSuppression, writableStackTrace);
// TODO Auto-generated constructor stub
}
}

View File

@ -4,7 +4,11 @@ import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import org.gcube.data.publishing.ckan2zenodo.Fixer;
import org.gcube.data.publishing.ckan2zenodo.Transformer;
import org.gcube.data.publishing.ckan2zenodo.model.CkanItemDescriptor;
import org.gcube.data.publishing.ckan2zenodo.model.ZenodoCredentials;
import org.gcube.data.publishing.ckan2zenodo.model.zenodo.ZenodoDeposition;
import com.fasterxml.jackson.annotation.JsonInclude.Include;
import com.fasterxml.jackson.databind.DeserializationFeature;
@ -37,5 +41,19 @@ public class TestCommons {
return mapper;
}
static final ZenodoDeposition readAndTransform(String jsonFile, Transformer transformer,ZenodoDeposition...depositions) throws Exception {
try{
String json=TestCommons.convertStreamToString(TransformationTests.class.getResourceAsStream(jsonFile));
CkanItemDescriptor desc=new CkanItemDescriptor(json);
System.out.println("Going to transform : "+desc.getContent());
System.out.println("Result : ");
ZenodoDeposition dep=transformer.transform(desc, depositions!=null?depositions[0]:null);
System.out.println(dep);
System.out.println("As JSON : ");
System.out.println(Fixer.fixSending(getMapper().writeValueAsString(dep)));
return dep;
}catch(Throwable t) {
throw new Exception("Errors with json file "+jsonFile,t);
}
}
}

View File

@ -3,8 +3,11 @@ package org.gcube.tests;
import java.io.IOException;
import java.util.Collections;
import org.gcube.common.resources.gcore.GenericResource;
import org.gcube.common.resources.gcore.Resources;
import org.gcube.data.publishing.ckan2zenodo.Fixer;
import org.gcube.data.publishing.ckan2zenodo.Transformer;
import org.gcube.data.publishing.ckan2zenodo.commons.IS;
import org.gcube.data.publishing.ckan2zenodo.model.CkanItemDescriptor;
import org.gcube.data.publishing.ckan2zenodo.model.zenodo.ZenodoDeposition;
import org.junit.BeforeClass;
@ -25,33 +28,19 @@ public class TransformationTests {
@Test
public void transform() throws Exception {
Transformer basic=new Transformer(Collections.EMPTY_LIST);
readAndTransform("/simpleItem.json",basic);
readAndTransform("/FSKXModel.json",basic);
readAndTransform("/ResearchObject.json",basic);
TestCommons.readAndTransform("/simpleItem.json",basic);
TestCommons.readAndTransform("/FSKXModel.json",basic);
TestCommons.readAndTransform("/ResearchObject.json",basic);
}
@Test
public void transformWithMappings() throws IOException {
public void transformWithMappings() throws Exception {
GenericResource res=Resources.unmarshal(GenericResource.class, TransformationTests.class.getResourceAsStream("/ResearchObject.xml"));
TestCommons.readAndTransform("/ResearchObject.json", new Transformer(IS.readMappings(res)));
}
private static final void readAndTransform(String jsonFile, Transformer transformer) throws Exception {
try{
String json=TestCommons.convertStreamToString(TransformationTests.class.getResourceAsStream(jsonFile));
CkanItemDescriptor desc=new CkanItemDescriptor(json);
System.out.println("Going to transform : "+desc.getContent());
System.out.println("Result : ");
ZenodoDeposition dep=transformer.transform(desc, null);
System.out.println(dep);
System.out.println("As JSON : ");
System.out.println(Fixer.fixSending(mapper.writeValueAsString(dep)));
}catch(Throwable t) {
throw new Exception("Errors with json file "+jsonFile,t);
}
}
}

View File

@ -6,10 +6,14 @@ import java.nio.file.Paths;
import java.util.Arrays;
import java.util.Date;
import org.gcube.common.resources.gcore.GenericResource;
import org.gcube.common.resources.gcore.Resources;
import org.gcube.data.publishing.ckan2zenodo.Transformer;
import org.gcube.data.publishing.ckan2zenodo.clients.Zenodo;
import org.gcube.data.publishing.ckan2zenodo.commons.IS;
import org.gcube.data.publishing.ckan2zenodo.model.CkanItemDescriptor;
import org.gcube.data.publishing.ckan2zenodo.model.ZenodoCredentials;
import org.gcube.data.publishing.ckan2zenodo.model.faults.ConfigurationException;
import org.gcube.data.publishing.ckan2zenodo.model.faults.ZenodoException;
import org.gcube.data.publishing.ckan2zenodo.model.zenodo.Creator;
import org.gcube.data.publishing.ckan2zenodo.model.zenodo.DepositionMetadata;
@ -60,14 +64,15 @@ public class ZenodoTests {
}
// @Test
// public void createFromSimpleItem() throws JsonProcessingException, ZenodoException {
// Zenodo z=new Zenodo(credentials);
// String json=TestCommons.convertStreamToString(this.getClass().getResourceAsStream("/simpleItem.json"));
// Transformer defaultTransformer=new Transformer();
// CkanItemDescriptor desc=new CkanItemDescriptor("sampleontable_in_prevre", json);
// ZenodoDeposition dep=z.createNew();
// defaultTransformer.transform(desc, dep);
// System.out.println(z.updateMetadata(dep.getId(), dep.getMetadata()));
// }
@Test
public void createFromSimpleItem() throws ConfigurationException, Exception {
Zenodo z=new Zenodo(credentials);
GenericResource res=Resources.unmarshal(GenericResource.class, TransformationTests.class.getResourceAsStream("/ResearchObject.xml"));
ZenodoDeposition dep=z.createNew();
dep=TestCommons.readAndTransform("/ResearchObject.json", new Transformer(IS.readMappings(res)),dep);
System.out.println(z.updateMetadata(dep.getId(), dep.getMetadata()));
}
}

View File

@ -1,38 +1,58 @@
<Resource version="0.4.x">
<ID>4adeaca2-8e32-4507-8937-d891629998e2</ID>
<Type>GenericResource</Type>
<Scopes></Scopes>
<Profile>
<SecondaryType>Ckan-Zenodo-Mappings</SecondaryType>
<Name>ResearchObject</Name>
<Description>Simple mappings tests</Description>
<Body>
<mappings>
<mapping>
<source>$.extras[?(@.key=='Author')].value</source>
<target>$.metadata.contributors[0].name</target>
<regexp type="extract">
<target>([A-Za-z]*, [A-Za-z]*)(?=,)</target>
</regexp>
</mapping>
<mapping>
<source>$.extras[?(@.key=='Author')].value</source>
<target>$.metadata.contributors[0].type</target>
<regexp type="replace">
<target>.*</target>
<replacement>Producer</replacement>
</regexp>
</mapping>
<mapping>
<source>$.extras[?(@.key=='Author')].value</source>
<target>$.metadata.contributors[0].orcid</target>
<regexp type="extract">
<target>orcid.org/.*</target>
</regexp>
</mapping>
</mappings>
</Body>
</Profile>
<ID>4adeaca2-8e32-4507-8937-d891629998e2</ID>
<Type>GenericResource</Type>
<Scopes></Scopes>
<Profile>
<SecondaryType>Ckan-Zenodo-Mappings</SecondaryType>
<Name>ResearchObject</Name>
<Description>Simple mappings tests</Description>
<Body>
<mappings>
<mapping>
<source type="constant">
<value>dataset</value>
</source>
<targetPath>$.metadata</targetPath>
<targetElement>upload_type</targetElement>
</mapping>
<mapping>
<source type="jsonPath">
<value>$.extras[?(@.key=='Author')].value</value>
</source>
<targetPath>$.metadata.contributors[0]</targetPath>
<targetElement>name</targetElement>
<regexp type="extract">
<target>([A-Za-z]*, [A-Za-z]*)(?=,)</target>
</regexp>
</mapping>
<mapping>
<source type="constant">
<value>Producer</value>
</source>
<targetPath>$.metadata.contributors[0]</targetPath>
<targetElement>type</targetElement>
</mapping>
<!-- <mapping>
<source type="jsonPath">
<value>$.extras[?(@.key=='Author')].value</value>
</source>
<targetPath>$.metadata.contributors[0]</targetPath>
<targetElement>orcid</targetElement>
<regexp type="extract">
<target>(https://)?orcid.org/.*</target>
</regexp>
<regexp type="replace">
<target>https://orcid.org/</target>
<replacement></replacement>
</regexp>
</mapping>-->
</mappings>
</Body>
</Profile>
</Resource>