362 lines
11 KiB
Java
362 lines
11 KiB
Java
package org.gcube.data.publishing.ckan2zenodo;
|
||
|
||
import java.time.Instant;
|
||
import java.util.ArrayList;
|
||
import java.util.Collection;
|
||
import java.util.Collections;
|
||
import java.util.Date;
|
||
import java.util.HashMap;
|
||
import java.util.HashSet;
|
||
import java.util.List;
|
||
import java.util.Map;
|
||
import java.util.Map.Entry;
|
||
import java.util.regex.Matcher;
|
||
import java.util.regex.Pattern;
|
||
|
||
import org.gcube.data.publishing.ckan2zenodo.commons.Parsing;
|
||
import org.gcube.data.publishing.ckan2zenodo.model.CkanItemDescriptor;
|
||
import org.gcube.data.publishing.ckan2zenodo.model.CkanResource;
|
||
import org.gcube.data.publishing.ckan2zenodo.model.faults.TransformationException;
|
||
import org.gcube.data.publishing.ckan2zenodo.model.parsing.Filter;
|
||
import org.gcube.data.publishing.ckan2zenodo.model.parsing.Mapping;
|
||
import org.gcube.data.publishing.ckan2zenodo.model.parsing.Mappings;
|
||
import org.gcube.data.publishing.ckan2zenodo.model.parsing.Regexp;
|
||
import org.gcube.data.publishing.ckan2zenodo.model.parsing.ResourceFilter;
|
||
import org.gcube.data.publishing.ckan2zenodo.model.parsing.TargetElement;
|
||
import org.gcube.data.publishing.ckan2zenodo.model.parsing.Value;
|
||
import org.gcube.data.publishing.ckan2zenodo.model.zenodo.Contributor;
|
||
import org.gcube.data.publishing.ckan2zenodo.model.zenodo.Creator;
|
||
import org.gcube.data.publishing.ckan2zenodo.model.zenodo.DepositionMetadata;
|
||
import org.gcube.data.publishing.ckan2zenodo.model.zenodo.DepositionMetadata.AccessRights;
|
||
import org.gcube.data.publishing.ckan2zenodo.model.zenodo.DepositionMetadata.UploadType;
|
||
import org.gcube.data.publishing.ckan2zenodo.model.zenodo.RelatedIdentifier;
|
||
import org.gcube.data.publishing.ckan2zenodo.model.zenodo.ZenodoDeposition;
|
||
|
||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||
import com.jayway.jsonpath.DocumentContext;
|
||
import com.jayway.jsonpath.JsonPath;
|
||
|
||
import lombok.NonNull;
|
||
import lombok.RequiredArgsConstructor;
|
||
import lombok.extern.slf4j.Slf4j;
|
||
import net.minidev.json.JSONArray;
|
||
import net.minidev.json.JSONObject;
|
||
|
||
@Slf4j
|
||
@RequiredArgsConstructor
|
||
public class Translator {
|
||
|
||
@NonNull
|
||
private List<Mapping> mappings;
|
||
|
||
@NonNull
|
||
private ResourceFilter resourceFilter;
|
||
|
||
public Translator() {
|
||
this(Collections.EMPTY_LIST,ResourceFilter.PASS_ALL);
|
||
}
|
||
|
||
public Translator(List<Mapping> mappings) {
|
||
this(mappings,ResourceFilter.PASS_ALL);
|
||
}
|
||
|
||
public Translator(Mappings m) {
|
||
this(m.getMappings(),m.getResourceFilters());
|
||
}
|
||
|
||
public ZenodoDeposition transform(CkanItemDescriptor toTransform, ZenodoDeposition deposition) throws TransformationException {
|
||
log.debug("Transforming "+toTransform+". Existing Deposition is : "+deposition);
|
||
|
||
if(deposition==null) deposition=new ZenodoDeposition();
|
||
|
||
// default mappings
|
||
DepositionMetadata meta=deposition.getMetadata();
|
||
if(meta==null)
|
||
meta=new DepositionMetadata();
|
||
|
||
// UPLOAD TYPE
|
||
meta.setUpload_type(UploadType.other);
|
||
|
||
// TITLE
|
||
meta.setTitle(toTransform.getTitle());
|
||
// DESCRIPTION
|
||
meta.setDescription(toTransform.getNotes());
|
||
|
||
// ACCESS RIGHTS
|
||
if(toTransform.isOpen())
|
||
meta.setAccess_right(AccessRights.open);
|
||
else {
|
||
meta.setAccess_right(AccessRights.restricted);
|
||
meta.setAccess_conditions("Visit the VRE "+toTransform.getVRE()+" to access it.");
|
||
}
|
||
|
||
// LICENSE
|
||
meta.setLicense(toTransform.getLicenseId());
|
||
|
||
// TAGS
|
||
meta.setKeywords(new ArrayList<String>(toTransform.getTags()));
|
||
|
||
//RELATED IDENTIFIER
|
||
String itemUrl=toTransform.getItemUrl();
|
||
ArrayList<RelatedIdentifier> relatedIdentifiers=new ArrayList<>();
|
||
if(itemUrl!=null) relatedIdentifiers.add(new RelatedIdentifier(itemUrl,RelatedIdentifier.Relation.isCompiledBy));
|
||
|
||
meta.setRelated_identifiers(relatedIdentifiers);
|
||
|
||
|
||
|
||
//CONTRIBUTORS & CREATORS
|
||
ArrayList<Contributor> contributors=new ArrayList<>();
|
||
ArrayList<Creator> creators=new ArrayList<>();
|
||
|
||
|
||
String authorName=toTransform.getAuthor();
|
||
if(authorName!=null) {
|
||
Contributor author=new Contributor(Contributor.Type.Producer);
|
||
author.setName(authorName);
|
||
contributors.add(author);
|
||
|
||
|
||
creators.add(new Creator(authorName));
|
||
|
||
}
|
||
|
||
String maintainerName=toTransform.getAuthor();
|
||
if(maintainerName!=null) {
|
||
Contributor maintainer=new Contributor(Contributor.Type.DataCurator);
|
||
maintainer.setName(maintainerName);
|
||
contributors.add(maintainer);
|
||
|
||
|
||
creators.add(new Creator(authorName));
|
||
}
|
||
|
||
// D4Science as contributor
|
||
Contributor d4Science=new Contributor(Contributor.Type.HostingInstitution);
|
||
d4Science.setName("D4Science");
|
||
contributors.add(d4Science);
|
||
|
||
meta.setContributors(contributors);
|
||
meta.setCreators(creators);
|
||
|
||
|
||
// VERSION
|
||
meta.setVersion(toTransform.getVersion());
|
||
|
||
|
||
// DATES
|
||
// PUBLICATION DATE = now
|
||
meta.setPublication_date(Date.from(Instant.now()));
|
||
|
||
|
||
deposition.setMetadata(meta);
|
||
|
||
// profile specific mappings
|
||
return applyMappings(toTransform, deposition);
|
||
}
|
||
|
||
|
||
private ZenodoDeposition applyMappings(CkanItemDescriptor source, ZenodoDeposition target) throws TransformationException {
|
||
try{
|
||
ObjectMapper mapper=Parsing.getMapper();
|
||
DocumentContext sourceCtx=JsonPath.using(Parsing.JSON_PATH_ALWAYS_LIST_CONFIG).parse(source.getContent());
|
||
DocumentContext targetCtx=JsonPath.using(Parsing.JSON_PATH_ALWAYS_LIST_CONFIG).parse(mapper.writeValueAsString(target));
|
||
|
||
// FOR EACH MAPPING
|
||
// IF source
|
||
// For each source value ; NB GET FIRST MATCHING Value path
|
||
// APPLY TRANSFORMATIONS
|
||
// FIND/INIT TARGET PATH (OPTS isArray, replace)
|
||
// SET TARGET ELEMENT (OPTS replace, regexp)
|
||
// IF SINGLE -> property
|
||
// iF MULITPLE -> object
|
||
// ELSE TODO
|
||
for(Mapping mapping:mappings) {
|
||
log.debug("Applying {} ",mapping);
|
||
try {
|
||
// "EVALUATE SURCE VALUES"
|
||
List<String> sourceValues=new ArrayList<>();
|
||
|
||
for(Value v: mapping.getSource().getValues()) {
|
||
List<String> actualValues=new ArrayList<String>();
|
||
switch(v.getType()) {
|
||
case constant : {
|
||
actualValues.add(v.getValue());
|
||
break;
|
||
}
|
||
case jsonPath : {
|
||
for(String s: ((Collection<? extends String>) sourceCtx.read(v.getValue()))){
|
||
if(s!=null) {
|
||
s=s.trim();
|
||
if(!s.isEmpty())actualValues.add(s);
|
||
}
|
||
|
||
}
|
||
break;
|
||
}
|
||
}
|
||
|
||
|
||
// Applygin splits
|
||
for(String foundVal:actualValues) {
|
||
if(v.getSplit()!=null)
|
||
for(String toAdd:foundVal.split(v.getSplit()))
|
||
sourceValues.add(toAdd.trim());
|
||
else sourceValues.add(foundVal);
|
||
}
|
||
|
||
|
||
|
||
if(!sourceValues.isEmpty()) break;
|
||
|
||
}
|
||
|
||
log.debug("Found matching "+sourceValues);
|
||
|
||
|
||
// ************** INIT TARGET PATH
|
||
// CHECK INIT TARGET PATH
|
||
ArrayList<Map<String,Object>> resultingValueList=new ArrayList<Map<String,Object>>();
|
||
|
||
|
||
if(!sourceValues.isEmpty()) {
|
||
List<String> targetElementFound=targetCtx.read(mapping.getTargetPath().getValue());
|
||
if(targetElementFound==null || targetElementFound.size()==0 || targetElementFound.get(0)==null ||
|
||
!mapping.getTargetPath().getAppend()) {
|
||
// targetCtx=targetCtx.add(mapping.getTargetPath(),Collections.singletonList("nothing"));
|
||
JsonPath path=JsonPath.compile(mapping.getTargetPath().getValue());
|
||
|
||
switch(mapping.getTargetPath().getType()) {
|
||
case array :
|
||
targetCtx=targetCtx.set(path,new Object[sourceValues.size()]);
|
||
break;
|
||
case map :
|
||
targetCtx=targetCtx.set(path,null);
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
|
||
// ************** PREPARE VALUE FOR EACH SOURCE VALUE
|
||
for(String sourceValue:sourceValues) {
|
||
String resultingValue=sourceValue;
|
||
log.debug("Managing "+resultingValue);
|
||
|
||
// apply regexps
|
||
for(Regexp regexp:mapping.getRegexp()) {
|
||
switch(regexp.getType()) {
|
||
case extract : {
|
||
Pattern p=Pattern.compile(regexp.getTarget());
|
||
Matcher m = p.matcher(resultingValue);
|
||
if(m.find())
|
||
resultingValue=m.group();
|
||
else resultingValue=null;
|
||
break;
|
||
}
|
||
case replace : {
|
||
if(resultingValue!=null) {
|
||
String replacement=regexp.getReplacement()!=null?regexp.getReplacement():"";
|
||
resultingValue=resultingValue.replaceAll(regexp.getTarget(), replacement);
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
|
||
|
||
}
|
||
|
||
// apply value mappings
|
||
resultingValue =mapping.getValueMapping().getOrDefault(sourceValue, resultingValue);
|
||
|
||
|
||
// NEW : Multiple Target Elements can be simple or structure
|
||
|
||
Map<String,Object> resultingTargetElements=new HashMap<String, Object>();
|
||
|
||
for(TargetElement el:mapping.getTargetElements()) {
|
||
if(el.getConstant()!=null) resultingValue=el.getConstant();
|
||
else
|
||
if(el.getAppend()){
|
||
String original=((List<String>)targetCtx.read(mapping.getTargetPath()+"."+el.getValue())).get(0);
|
||
if(original!=null && !original.isEmpty())
|
||
resultingValue=original+resultingValue;
|
||
}
|
||
|
||
resultingTargetElements.put(el.getValue(), resultingValue);
|
||
|
||
}
|
||
|
||
resultingValueList.add(resultingTargetElements);
|
||
|
||
// close loop on values
|
||
}
|
||
|
||
JsonPath path=JsonPath.compile(mapping.getTargetPath().getValue());
|
||
switch(mapping.getTargetPath().getType()) {
|
||
case array :
|
||
targetCtx=targetCtx.set(path, resultingValueList);
|
||
break;
|
||
case map :
|
||
|
||
for(Map<String,Object> resultingTargetElements : resultingValueList) {
|
||
log.debug("Applying "+resultingTargetElements+ " to "+mapping.getTargetPath());
|
||
for(Entry<String,Object> e:resultingTargetElements.entrySet())
|
||
targetCtx=targetCtx.put(path, e.getKey(),e.getValue());
|
||
|
||
}
|
||
break;
|
||
}
|
||
|
||
|
||
// for(Map<String,Object> resultingTargetElements : resultingValueList) {
|
||
// log.debug("Applying "+resultingTargetElements+ " to "+mapping.getTargetPath());
|
||
// // Apply object
|
||
//
|
||
//
|
||
// switch(mapping.getTargetPath().getType()) {
|
||
// case array :
|
||
// targetCtx=targetCtx.add(path, resultingTargetElements);
|
||
// break;
|
||
// case map :
|
||
// targetCtx.set(path, resultingTargetElements);
|
||
// break;
|
||
// }
|
||
// }
|
||
|
||
|
||
}catch(Throwable t) {
|
||
throw new TransformationException("Exception while applying "+mapping,t);
|
||
}
|
||
}
|
||
String serializedOutput=targetCtx.jsonString();
|
||
log.debug("Mapping complete. Going to return : "+serializedOutput);
|
||
return mapper.readValue(serializedOutput, ZenodoDeposition.class);
|
||
}catch(Throwable t) {
|
||
log.error("Unable to translate "+source+" using previous "+target,t);
|
||
throw new TransformationException("Unable to translate "+source.getName(),t);
|
||
}
|
||
}
|
||
|
||
|
||
public List<CkanResource> filterResources(CkanItemDescriptor source) throws TransformationException{
|
||
ObjectMapper mapper=Parsing.getMapper();
|
||
DocumentContext sourceCtx=JsonPath.using(Parsing.JSON_PATH_ALWAYS_LIST_CONFIG).parse(source.getContent());
|
||
try {
|
||
HashSet<CkanResource> toReturn=new HashSet();
|
||
for(Filter f:resourceFilter.getFilters()) {
|
||
JSONArray filtered=sourceCtx.read(f.getConditions().get(0));
|
||
for(Object obj:filtered) {
|
||
Map<String,String> map=(Map<String, String>) obj;
|
||
|
||
toReturn.add(mapper.readValue((new JSONObject(map)).toJSONString(), CkanResource.class));
|
||
}
|
||
}
|
||
|
||
return new ArrayList<CkanResource>(toReturn);
|
||
}catch(Throwable t) {
|
||
log.error("Unable to filter resources. ",t);
|
||
throw new TransformationException("Unable to filter "+source.getName()+" resources",t);
|
||
}
|
||
}
|
||
}
|