Profile specific mappings

This commit is contained in:
Fabio Sinibaldi 2019-12-03 12:50:36 +01:00
parent 7553dd1964
commit f71d00cc20
7 changed files with 227 additions and 23 deletions

View File

@ -10,12 +10,11 @@ import java.time.temporal.ChronoField;
import java.time.temporal.TemporalAccessor;
import java.util.List;
import org.gcube.data.publishing.ckan2zenodo.commons.Parsing;
import org.gcube.data.publishing.ckan2zenodo.model.zenodo.Commons;
import com.jayway.jsonpath.Configuration;
import com.jayway.jsonpath.DocumentContext;
import com.jayway.jsonpath.JsonPath;
import com.jayway.jsonpath.Option;
import lombok.extern.slf4j.Slf4j;
@ -30,24 +29,19 @@ public class Fixer {
private static Configuration PATH_CONFIGURATION=null;
private static Configuration ALWAYS_LIST_CONFIG=null;
static {
INCOMING_FORMATTER=DateTimeFormatter.ofPattern("[yyyy-MM-dd['T'HH:mm:ss[.SSSSSS[z][Z][XXX]]]]");
INTERNAL_FORMATTER=DateTimeFormatter.ofPattern(Commons.ISO_DATE_PATTERN);
PATH_CONFIGURATION = Configuration.builder().options(Option.AS_PATH_LIST,Option.SUPPRESS_EXCEPTIONS,Option.DEFAULT_PATH_LEAF_TO_NULL).build();
ALWAYS_LIST_CONFIG= Configuration.builder().options(Option.ALWAYS_RETURN_LIST,Option.SUPPRESS_EXCEPTIONS,Option.DEFAULT_PATH_LEAF_TO_NULL).build();
}
public static final String fixIncoming(String toFix) {
DocumentContext ctx=JsonPath.using(ALWAYS_LIST_CONFIG).parse(toFix);
DocumentContext pathCtx=JsonPath.using(PATH_CONFIGURATION).parse(toFix);
DocumentContext ctx=JsonPath.using(Parsing.JSON_PATH_ALWAYS_LIST_CONFIG).parse(toFix);
DocumentContext pathCtx=JsonPath.using(Parsing.JSON_PATH_PATHS_CONFIGURATION).parse(toFix);
ctx=fixIncomingDate(ctx,pathCtx, "$.created");
ctx=fixIncomingDate(ctx,pathCtx, "$.modified");
@ -59,8 +53,8 @@ public class Fixer {
public static String fixSending(String toFix) {
DocumentContext ctx=JsonPath.using(ALWAYS_LIST_CONFIG).parse(toFix);
DocumentContext pathCtx=JsonPath.using(PATH_CONFIGURATION).parse(toFix);
DocumentContext ctx=JsonPath.using(Parsing.JSON_PATH_ALWAYS_LIST_CONFIG).parse(toFix);
DocumentContext pathCtx=JsonPath.using(Parsing.JSON_PATH_PATHS_CONFIGURATION).parse(toFix);
ctx=fixOutgoingDate(ctx,pathCtx, "$.created");
ctx=fixOutgoingDate(ctx,pathCtx, "$.modified");

View File

@ -1,24 +1,37 @@
package org.gcube.data.publishing.ckan2zenodo;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import org.gcube.data.publishing.ckan2zenodo.commons.Parsing;
import org.gcube.data.publishing.ckan2zenodo.model.CkanItemDescriptor;
import org.gcube.data.publishing.ckan2zenodo.model.Mapping;
import org.gcube.data.publishing.ckan2zenodo.model.zenodo.Contributor;
import org.gcube.data.publishing.ckan2zenodo.model.zenodo.DepositionMetadata;
import org.gcube.data.publishing.ckan2zenodo.model.zenodo.RelatedIdentifier;
import org.gcube.data.publishing.ckan2zenodo.model.zenodo.DepositionMetadata.AccessRights;
import org.gcube.data.publishing.ckan2zenodo.model.zenodo.RelatedIdentifier;
import org.gcube.data.publishing.ckan2zenodo.model.zenodo.ZenodoDeposition;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.jayway.jsonpath.DocumentContext;
import com.jayway.jsonpath.JsonPath;
import lombok.NonNull;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@RequiredArgsConstructor
public class Transformer {
private HashMap<String,String> mappings=new HashMap<>();
@NonNull
private List<Mapping> mappings;
public ZenodoDeposition transform(CkanItemDescriptor toTransform, ZenodoDeposition deposition) {
public ZenodoDeposition transform(CkanItemDescriptor toTransform, ZenodoDeposition deposition) throws IOException {
log.debug("Transforming "+toTransform+". Existing Deposition is : "+deposition);
if(deposition==null) deposition=new ZenodoDeposition();
@ -48,6 +61,8 @@ public class Transformer {
ArrayList<RelatedIdentifier> relatedIdentifiers=new ArrayList<>();
if(itemUrl!=null) relatedIdentifiers.add(new RelatedIdentifier(itemUrl,RelatedIdentifier.Relation.compiles));
meta.setRelated_identifiers(relatedIdentifiers);
//Contributors
ArrayList<Contributor> contributors=new ArrayList<>();
String authorName=toTransform.getAuthor();
@ -74,8 +89,25 @@ public class Transformer {
deposition.setMetadata(meta);
// profile specific mappings
return applyMappings(toTransform, deposition);
}
private ZenodoDeposition applyMappings(CkanItemDescriptor source, ZenodoDeposition target) throws IOException {
ObjectMapper mapper=Parsing.getMapper();
DocumentContext sourceCtx=JsonPath.using(Parsing.JSON_PATH_ALWAYS_LIST_CONFIG).parse(source.getContent());
DocumentContext targetCtx=JsonPath.using(Parsing.JSON_PATH_ALWAYS_LIST_CONFIG).parse(mapper.writeValueAsString(target));
return deposition;
for(Mapping mapping:mappings) {
List<String> sourceValues=sourceCtx.read(mapping.getSource());
if(sourceValues!=null)
for(String sourceValue:sourceValues) {
String targetValue =mapping.getValueMapping().getOrDefault(sourceValue, sourceValue);
targetCtx.add(mapping.getTarget(),targetValue);
}
}
return mapper.readValue(targetCtx.jsonString(), ZenodoDeposition.class);
}
}

View File

@ -1,9 +1,77 @@
package org.gcube.data.publishing.ckan2zenodo;
import java.util.ArrayList;
import java.util.HashMap;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.gcube.common.resources.gcore.GenericResource;
import org.gcube.data.publishing.ckan2zenodo.commons.IS;
import org.gcube.data.publishing.ckan2zenodo.model.Mapping;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import lombok.Synchronized;
public class TransformerManager {
public Transformer getByProfile(String profile) {
throw new RuntimeException("Implement this");
private static DocumentBuilder builder=null;
@Synchronized
private static DocumentBuilder getBuilder() throws ParserConfigurationException {
if(builder==null) {
DocumentBuilderFactory factory =DocumentBuilderFactory.newInstance();
builder = factory.newDocumentBuilder();
}
return builder;
}
public Transformer getByProfile(String profile) throws Exception {
for(GenericResource r: IS.queryForGenericResources("Ckan-Zenodo-Mappings")){
if (r.profile().name().equals(profile))
return new Transformer(readMappings(r));
}
throw new Exception("No transformer found for profile "+profile);
}
private static ArrayList<Mapping> readMappings(GenericResource res){
// ByteArrayInputStream input = new ByteArrayInputStream(
// res.profile()..toString().getBytes("UTF-8"));
// Document doc = builder.parse(input);
// XPath xPath = XPathFactory.newInstance().newXPath();
// String expression = "/class/student";
// NodeList nodeList = (NodeList) xPath.compile(expression).evaluate(
// doc, XPathConstants.NODESET);
ArrayList<Mapping> toReturn=new ArrayList<Mapping>();
Element root=res.profile().body();
NodeList mappings=root.getElementsByTagName("mapping");
for(int i = 0; i<mappings.getLength();i++) {
Element mapping=(Element) mappings.item(i);
String source=mapping.getElementsByTagName("source").item(0).getTextContent();
String target=mapping.getElementsByTagName("target").item(0).getTextContent();
HashMap<String,String> values=new HashMap<>();
NodeList valueMappings=mapping.getElementsByTagName("valueMapping");
for(int j = 0; i<valueMappings.getLength();j++) {
String sourceValue=mapping.getElementsByTagName("sourceValue").item(0).getTextContent();
String targetValue=mapping.getElementsByTagName("targetValue").item(0).getTextContent();
values.put(sourceValue, targetValue);
}
toReturn.add(new Mapping(source,target,values));
}
return toReturn;
}
}

View File

@ -0,0 +1,45 @@
package org.gcube.data.publishing.ckan2zenodo.commons;
import static org.gcube.resources.discovery.icclient.ICFactory.clientFor;
import static org.gcube.resources.discovery.icclient.ICFactory.queryFor;
import java.util.List;
import org.gcube.common.resources.gcore.GenericResource;
import org.gcube.common.resources.gcore.ServiceEndpoint;
import org.gcube.resources.discovery.client.api.DiscoveryClient;
import org.gcube.resources.discovery.client.queries.api.SimpleQuery;
import lombok.extern.slf4j.Slf4j;
@Slf4j
public class IS {
public static List<GenericResource> queryForGenericResources(String secondaryType){
log.debug("Querying for Service Endpoints [secondary type : {} ]",secondaryType);
SimpleQuery query = queryFor(GenericResource.class);
query.addCondition("$resource/Profile/SecondaryType/text() eq '"+secondaryType+"'");
DiscoveryClient<GenericResource> client = clientFor(GenericResource.class);
return client.submit(query);
}
public static List<ServiceEndpoint> queryForServiceEndpoints(String category, String platformName){
log.debug("Querying for Service Endpoints [category : {} , platformName : {}]",category,platformName);
SimpleQuery query = queryFor(ServiceEndpoint.class);
query.addCondition("$resource/Profile/Category/text() eq '"+category+"'")
.addCondition("$resource/Profile/Platform/Name/text() eq '"+platformName+"'");
DiscoveryClient<ServiceEndpoint> client = clientFor(ServiceEndpoint.class);
return client.submit(query);
}
}

View File

@ -0,0 +1,28 @@
package org.gcube.data.publishing.ckan2zenodo.commons;
import com.fasterxml.jackson.annotation.JsonInclude.Include;
import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.SerializationFeature;
import com.jayway.jsonpath.Configuration;
import com.jayway.jsonpath.Option;
public class Parsing {
public static Configuration JSON_PATH_ALWAYS_LIST_CONFIG=null;
public static Configuration JSON_PATH_PATHS_CONFIGURATION=null;
static {
JSON_PATH_ALWAYS_LIST_CONFIG= Configuration.builder().options(Option.ALWAYS_RETURN_LIST,Option.SUPPRESS_EXCEPTIONS,Option.DEFAULT_PATH_LEAF_TO_NULL).build();
JSON_PATH_PATHS_CONFIGURATION = Configuration.builder().options(Option.AS_PATH_LIST,Option.SUPPRESS_EXCEPTIONS,Option.DEFAULT_PATH_LEAF_TO_NULL).build();
}
public static ObjectMapper getMapper() {
ObjectMapper mapper=new ObjectMapper();
mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES,false);
mapper.configure(SerializationFeature.WRITE_EMPTY_JSON_ARRAYS, false);
mapper.setSerializationInclusion(Include.NON_NULL);
return mapper;
}
}

View File

@ -0,0 +1,20 @@
package org.gcube.data.publishing.ckan2zenodo.model;
import java.util.HashMap;
import lombok.Getter;
import lombok.NonNull;
import lombok.RequiredArgsConstructor;
@RequiredArgsConstructor
@Getter
public class Mapping {
@NonNull
private String source;
@NonNull
private String target;
@NonNull
private HashMap<String,String> valueMapping;
}

View File

@ -1,5 +1,8 @@
package org.gcube.tests;
import java.io.IOException;
import java.util.Collections;
import org.gcube.data.publishing.ckan2zenodo.Fixer;
import org.gcube.data.publishing.ckan2zenodo.Transformer;
import org.gcube.data.publishing.ckan2zenodo.model.CkanItemDescriptor;
@ -21,10 +24,10 @@ public class TransformationTests {
@Test
public void transform() throws JsonProcessingException {
public void transform() throws IOException {
String json=TestCommons.convertStreamToString(this.getClass().getResourceAsStream("/simpleItem.json"));
Transformer defaultTransformer=new Transformer();
Transformer defaultTransformer=new Transformer(Collections.EMPTY_LIST);
CkanItemDescriptor desc=new CkanItemDescriptor("sampleontable_in_prevre", json);
System.out.println("Going to transform : "+desc.getContent());
System.out.println("Result : ");
@ -36,4 +39,18 @@ public class TransformationTests {
}
@Test
public void transformWithMappings() throws IOException {
String json=TestCommons.convertStreamToString(this.getClass().getResourceAsStream("/simpleItem.json"));
Transformer defaultTransformer=new Transformer(Collections.EMPTY_LIST);
CkanItemDescriptor desc=new CkanItemDescriptor("sampleontable_in_prevre", json);
System.out.println("Going to transform : "+desc.getContent());
System.out.println("Result : ");
ZenodoDeposition dep=defaultTransformer.transform(desc, null);
System.out.println(dep);
System.out.println("As JSON : ");
System.out.println(Fixer.fixSending(mapper.writeValueAsString(dep)));
}
}