ckan2zenodo-library/src/main/java/org/gcube/data/publishing/ckan2zenodo/Translator.java

362 lines
11 KiB
Java
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package org.gcube.data.publishing.ckan2zenodo;
import java.time.Instant;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.gcube.data.publishing.ckan2zenodo.commons.Parsing;
import org.gcube.data.publishing.ckan2zenodo.model.CkanItemDescriptor;
import org.gcube.data.publishing.ckan2zenodo.model.CkanResource;
import org.gcube.data.publishing.ckan2zenodo.model.faults.TransformationException;
import org.gcube.data.publishing.ckan2zenodo.model.parsing.Filter;
import org.gcube.data.publishing.ckan2zenodo.model.parsing.Mapping;
import org.gcube.data.publishing.ckan2zenodo.model.parsing.Mappings;
import org.gcube.data.publishing.ckan2zenodo.model.parsing.Regexp;
import org.gcube.data.publishing.ckan2zenodo.model.parsing.ResourceFilter;
import org.gcube.data.publishing.ckan2zenodo.model.parsing.TargetElement;
import org.gcube.data.publishing.ckan2zenodo.model.parsing.Value;
import org.gcube.data.publishing.ckan2zenodo.model.zenodo.Contributor;
import org.gcube.data.publishing.ckan2zenodo.model.zenodo.Creator;
import org.gcube.data.publishing.ckan2zenodo.model.zenodo.DepositionMetadata;
import org.gcube.data.publishing.ckan2zenodo.model.zenodo.DepositionMetadata.AccessRights;
import org.gcube.data.publishing.ckan2zenodo.model.zenodo.DepositionMetadata.UploadType;
import org.gcube.data.publishing.ckan2zenodo.model.zenodo.RelatedIdentifier;
import org.gcube.data.publishing.ckan2zenodo.model.zenodo.ZenodoDeposition;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.jayway.jsonpath.DocumentContext;
import com.jayway.jsonpath.JsonPath;
import lombok.NonNull;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import net.minidev.json.JSONArray;
import net.minidev.json.JSONObject;
@Slf4j
@RequiredArgsConstructor
public class Translator {
@NonNull
private List<Mapping> mappings;
@NonNull
private ResourceFilter resourceFilter;
public Translator() {
this(Collections.EMPTY_LIST,ResourceFilter.PASS_ALL);
}
public Translator(List<Mapping> mappings) {
this(mappings,ResourceFilter.PASS_ALL);
}
public Translator(Mappings m) {
this(m.getMappings(),m.getResourceFilters());
}
public ZenodoDeposition transform(CkanItemDescriptor toTransform, ZenodoDeposition deposition) throws TransformationException {
log.debug("Transforming "+toTransform+". Existing Deposition is : "+deposition);
if(deposition==null) deposition=new ZenodoDeposition();
// default mappings
DepositionMetadata meta=deposition.getMetadata();
if(meta==null)
meta=new DepositionMetadata();
// UPLOAD TYPE
meta.setUpload_type(UploadType.other);
// TITLE
meta.setTitle(toTransform.getTitle());
// DESCRIPTION
meta.setDescription(toTransform.getNotes());
// ACCESS RIGHTS
if(toTransform.isOpen())
meta.setAccess_right(AccessRights.open);
else {
meta.setAccess_right(AccessRights.restricted);
meta.setAccess_conditions("Visit the VRE "+toTransform.getVRE()+" to access it.");
}
// LICENSE
meta.setLicense(toTransform.getLicenseId());
// TAGS
meta.setKeywords(new ArrayList<String>(toTransform.getTags()));
//RELATED IDENTIFIER
String itemUrl=toTransform.getItemUrl();
ArrayList<RelatedIdentifier> relatedIdentifiers=new ArrayList<>();
if(itemUrl!=null) relatedIdentifiers.add(new RelatedIdentifier(itemUrl,RelatedIdentifier.Relation.isCompiledBy));
meta.setRelated_identifiers(relatedIdentifiers);
//CONTRIBUTORS & CREATORS
ArrayList<Contributor> contributors=new ArrayList<>();
ArrayList<Creator> creators=new ArrayList<>();
String authorName=toTransform.getAuthor();
if(authorName!=null) {
Contributor author=new Contributor(Contributor.Type.Producer);
author.setName(authorName);
contributors.add(author);
creators.add(new Creator(authorName));
}
String maintainerName=toTransform.getAuthor();
if(maintainerName!=null) {
Contributor maintainer=new Contributor(Contributor.Type.DataCurator);
maintainer.setName(maintainerName);
contributors.add(maintainer);
creators.add(new Creator(authorName));
}
// D4Science as contributor
Contributor d4Science=new Contributor(Contributor.Type.HostingInstitution);
d4Science.setName("D4Science");
contributors.add(d4Science);
meta.setContributors(contributors);
meta.setCreators(creators);
// VERSION
meta.setVersion(toTransform.getVersion());
// DATES
// PUBLICATION DATE = now
meta.setPublication_date(Date.from(Instant.now()));
deposition.setMetadata(meta);
// profile specific mappings
return applyMappings(toTransform, deposition);
}
private ZenodoDeposition applyMappings(CkanItemDescriptor source, ZenodoDeposition target) throws TransformationException {
try{
ObjectMapper mapper=Parsing.getMapper();
DocumentContext sourceCtx=JsonPath.using(Parsing.JSON_PATH_ALWAYS_LIST_CONFIG).parse(source.getContent());
DocumentContext targetCtx=JsonPath.using(Parsing.JSON_PATH_ALWAYS_LIST_CONFIG).parse(mapper.writeValueAsString(target));
// FOR EACH MAPPING
// IF source
// For each source value ; NB GET FIRST MATCHING Value path
// APPLY TRANSFORMATIONS
// FIND/INIT TARGET PATH (OPTS isArray, replace)
// SET TARGET ELEMENT (OPTS replace, regexp)
// IF SINGLE -> property
// iF MULITPLE -> object
// ELSE TODO
for(Mapping mapping:mappings) {
log.debug("Applying {} ",mapping);
try {
// "EVALUATE SURCE VALUES"
List<String> sourceValues=new ArrayList<>();
for(Value v: mapping.getSource().getValues()) {
List<String> actualValues=new ArrayList<String>();
switch(v.getType()) {
case constant : {
actualValues.add(v.getValue());
break;
}
case jsonPath : {
for(String s: ((Collection<? extends String>) sourceCtx.read(v.getValue()))){
if(s!=null) {
s=s.trim();
if(!s.isEmpty())actualValues.add(s);
}
}
break;
}
}
// Applygin splits
for(String foundVal:actualValues) {
if(v.getSplit()!=null)
for(String toAdd:foundVal.split(v.getSplit()))
sourceValues.add(toAdd.trim());
else sourceValues.add(foundVal);
}
if(!sourceValues.isEmpty()) break;
}
log.debug("Found matching "+sourceValues);
// ************** INIT TARGET PATH
// CHECK INIT TARGET PATH
ArrayList<Map<String,Object>> resultingValueList=new ArrayList<Map<String,Object>>();
if(!sourceValues.isEmpty()) {
List<String> targetElementFound=targetCtx.read(mapping.getTargetPath().getValue());
if(targetElementFound==null || targetElementFound.size()==0 || targetElementFound.get(0)==null ||
!mapping.getTargetPath().getAppend()) {
// targetCtx=targetCtx.add(mapping.getTargetPath(),Collections.singletonList("nothing"));
JsonPath path=JsonPath.compile(mapping.getTargetPath().getValue());
switch(mapping.getTargetPath().getType()) {
case array :
targetCtx=targetCtx.set(path,new Object[sourceValues.size()]);
break;
case map :
targetCtx=targetCtx.set(path,null);
break;
}
}
}
// ************** PREPARE VALUE FOR EACH SOURCE VALUE
for(String sourceValue:sourceValues) {
String resultingValue=sourceValue;
log.debug("Managing "+resultingValue);
// apply regexps
for(Regexp regexp:mapping.getRegexp()) {
switch(regexp.getType()) {
case extract : {
Pattern p=Pattern.compile(regexp.getTarget());
Matcher m = p.matcher(resultingValue);
if(m.find())
resultingValue=m.group();
else resultingValue=null;
break;
}
case replace : {
if(resultingValue!=null) {
String replacement=regexp.getReplacement()!=null?regexp.getReplacement():"";
resultingValue=resultingValue.replaceAll(regexp.getTarget(), replacement);
break;
}
}
}
}
// apply value mappings
resultingValue =mapping.getValueMapping().getOrDefault(sourceValue, resultingValue);
// NEW : Multiple Target Elements can be simple or structure
Map<String,Object> resultingTargetElements=new HashMap<String, Object>();
for(TargetElement el:mapping.getTargetElements()) {
if(el.getConstant()!=null) resultingValue=el.getConstant();
else
if(el.getAppend()){
String original=((List<String>)targetCtx.read(mapping.getTargetPath()+"."+el.getValue())).get(0);
if(original!=null && !original.isEmpty())
resultingValue=original+resultingValue;
}
resultingTargetElements.put(el.getValue(), resultingValue);
}
resultingValueList.add(resultingTargetElements);
// close loop on values
}
JsonPath path=JsonPath.compile(mapping.getTargetPath().getValue());
switch(mapping.getTargetPath().getType()) {
case array :
targetCtx=targetCtx.set(path, resultingValueList);
break;
case map :
for(Map<String,Object> resultingTargetElements : resultingValueList) {
log.debug("Applying "+resultingTargetElements+ " to "+mapping.getTargetPath());
for(Entry<String,Object> e:resultingTargetElements.entrySet())
targetCtx=targetCtx.put(path, e.getKey(),e.getValue());
}
break;
}
// for(Map<String,Object> resultingTargetElements : resultingValueList) {
// log.debug("Applying "+resultingTargetElements+ " to "+mapping.getTargetPath());
// // Apply object
//
//
// switch(mapping.getTargetPath().getType()) {
// case array :
// targetCtx=targetCtx.add(path, resultingTargetElements);
// break;
// case map :
// targetCtx.set(path, resultingTargetElements);
// break;
// }
// }
}catch(Throwable t) {
throw new TransformationException("Exception while applying "+mapping,t);
}
}
String serializedOutput=targetCtx.jsonString();
log.debug("Mapping complete. Going to return : "+serializedOutput);
return mapper.readValue(serializedOutput, ZenodoDeposition.class);
}catch(Throwable t) {
log.error("Unable to translate "+source+" using previous "+target,t);
throw new TransformationException("Unable to translate "+source.getName(),t);
}
}
public List<CkanResource> filterResources(CkanItemDescriptor source) throws TransformationException{
ObjectMapper mapper=Parsing.getMapper();
DocumentContext sourceCtx=JsonPath.using(Parsing.JSON_PATH_ALWAYS_LIST_CONFIG).parse(source.getContent());
try {
HashSet<CkanResource> toReturn=new HashSet();
for(Filter f:resourceFilter.getFilters()) {
JSONArray filtered=sourceCtx.read(f.getConditions().get(0));
for(Object obj:filtered) {
Map<String,String> map=(Map<String, String>) obj;
toReturn.add(mapper.readValue((new JSONObject(map)).toJSONString(), CkanResource.class));
}
}
return new ArrayList<CkanResource>(toReturn);
}catch(Throwable t) {
log.error("Unable to filter resources. ",t);
throw new TransformationException("Unable to filter "+source.getName()+" resources",t);
}
}
}