package org.gcube.data.publishing.ckan2zenodo; import java.time.Instant; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.Date; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.gcube.data.publishing.ckan2zenodo.commons.Parsing; import org.gcube.data.publishing.ckan2zenodo.model.CkanItemDescriptor; import org.gcube.data.publishing.ckan2zenodo.model.CkanResource; import org.gcube.data.publishing.ckan2zenodo.model.faults.TransformationException; import org.gcube.data.publishing.ckan2zenodo.model.parsing.Filter; import org.gcube.data.publishing.ckan2zenodo.model.parsing.Mapping; import org.gcube.data.publishing.ckan2zenodo.model.parsing.Mappings; import org.gcube.data.publishing.ckan2zenodo.model.parsing.Regexp; import org.gcube.data.publishing.ckan2zenodo.model.parsing.ResourceFilter; import org.gcube.data.publishing.ckan2zenodo.model.parsing.TargetElement; import org.gcube.data.publishing.ckan2zenodo.model.parsing.Value; import org.gcube.data.publishing.ckan2zenodo.model.zenodo.Contributor; import org.gcube.data.publishing.ckan2zenodo.model.zenodo.Creator; import org.gcube.data.publishing.ckan2zenodo.model.zenodo.DepositionMetadata; import org.gcube.data.publishing.ckan2zenodo.model.zenodo.DepositionMetadata.AccessRights; import org.gcube.data.publishing.ckan2zenodo.model.zenodo.DepositionMetadata.UploadType; import org.gcube.data.publishing.ckan2zenodo.model.zenodo.RelatedIdentifier; import org.gcube.data.publishing.ckan2zenodo.model.zenodo.ZenodoDeposition; import com.fasterxml.jackson.databind.ObjectMapper; import com.jayway.jsonpath.DocumentContext; import com.jayway.jsonpath.JsonPath; import lombok.NonNull; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import net.minidev.json.JSONArray; import net.minidev.json.JSONObject; @Slf4j @RequiredArgsConstructor public class Translator { @NonNull private List mappings; @NonNull private ResourceFilter resourceFilter; public Translator() { this(Collections.EMPTY_LIST,ResourceFilter.PASS_ALL); } public Translator(List mappings) { this(mappings,ResourceFilter.PASS_ALL); } public Translator(Mappings m) { this(m.getMappings(),m.getResourceFilters()); } public ZenodoDeposition transform(CkanItemDescriptor toTransform, ZenodoDeposition deposition) throws TransformationException { log.debug("Transforming "+toTransform+". Existing Deposition is : "+deposition); if(deposition==null) deposition=new ZenodoDeposition(); // default mappings DepositionMetadata meta=deposition.getMetadata(); if(meta==null) meta=new DepositionMetadata(); // UPLOAD TYPE meta.setUpload_type(UploadType.other); // TITLE meta.setTitle(toTransform.getTitle()); // DESCRIPTION meta.setDescription(toTransform.getNotes()); // ACCESS RIGHTS if(toTransform.isOpen()) meta.setAccess_right(AccessRights.open); else { meta.setAccess_right(AccessRights.restricted); meta.setAccess_conditions("Visit the VRE "+toTransform.getVRE()+" to access it."); } // LICENSE meta.setLicense(toTransform.getLicenseId()); // TAGS meta.setKeywords(new ArrayList(toTransform.getTags())); //RELATED IDENTIFIER String itemUrl=toTransform.getItemUrl(); ArrayList relatedIdentifiers=new ArrayList<>(); if(itemUrl!=null) relatedIdentifiers.add(new RelatedIdentifier(itemUrl,RelatedIdentifier.Relation.isCompiledBy)); meta.setRelated_identifiers(relatedIdentifiers); //CONTRIBUTORS & CREATORS ArrayList contributors=new ArrayList<>(); ArrayList creators=new ArrayList<>(); String authorName=toTransform.getAuthor(); if(authorName!=null) { Contributor author=new Contributor(Contributor.Type.Producer); author.setName(authorName); contributors.add(author); creators.add(new Creator(authorName)); } String maintainerName=toTransform.getAuthor(); if(maintainerName!=null) { Contributor maintainer=new Contributor(Contributor.Type.DataCurator); maintainer.setName(maintainerName); contributors.add(maintainer); creators.add(new Creator(authorName)); } // D4Science as contributor Contributor d4Science=new Contributor(Contributor.Type.HostingInstitution); d4Science.setName("D4Science"); contributors.add(d4Science); meta.setContributors(contributors); meta.setCreators(creators); // VERSION meta.setVersion(toTransform.getVersion()); // DATES // PUBLICATION DATE = now meta.setPublication_date(Date.from(Instant.now())); deposition.setMetadata(meta); // profile specific mappings return applyMappings(toTransform, deposition); } private ZenodoDeposition applyMappings(CkanItemDescriptor source, ZenodoDeposition target) throws TransformationException { try{ ObjectMapper mapper=Parsing.getMapper(); DocumentContext sourceCtx=JsonPath.using(Parsing.JSON_PATH_ALWAYS_LIST_CONFIG).parse(source.getContent()); DocumentContext targetCtx=JsonPath.using(Parsing.JSON_PATH_ALWAYS_LIST_CONFIG).parse(mapper.writeValueAsString(target)); // FOR EACH MAPPING // IF source // For each source value ; NB GET FIRST MATCHING Value path // APPLY TRANSFORMATIONS // FIND/INIT TARGET PATH (OPTS isArray, replace) // SET TARGET ELEMENT (OPTS replace, regexp) // IF SINGLE -> property // iF MULITPLE -> object // ELSE TODO for(Mapping mapping:mappings) { log.debug("Applying {} ",mapping); try { // "EVALUATE SURCE VALUES" List sourceValues=new ArrayList<>(); for(Value v: mapping.getSource().getValues()) { List actualValues=new ArrayList(); switch(v.getType()) { case constant : { actualValues.add(v.getValue()); break; } case jsonPath : { for(String s: ((Collection) sourceCtx.read(v.getValue()))){ if(s!=null) { s=s.trim(); if(!s.isEmpty())actualValues.add(s); } } break; } } // Applygin splits for(String foundVal:actualValues) { if(v.getSplit()!=null) for(String toAdd:foundVal.split(v.getSplit())) sourceValues.add(toAdd.trim()); else sourceValues.add(foundVal); } if(!sourceValues.isEmpty()) break; } log.debug("Found matching "+sourceValues); // ************** INIT TARGET PATH // CHECK INIT TARGET PATH ArrayList> resultingValueList=new ArrayList>(); if(!sourceValues.isEmpty()) { List targetElementFound=targetCtx.read(mapping.getTargetPath().getValue()); if(targetElementFound==null || targetElementFound.size()==0 || targetElementFound.get(0)==null || !mapping.getTargetPath().getAppend()) { // targetCtx=targetCtx.add(mapping.getTargetPath(),Collections.singletonList("nothing")); JsonPath path=JsonPath.compile(mapping.getTargetPath().getValue()); switch(mapping.getTargetPath().getType()) { case array : targetCtx=targetCtx.set(path,new Object[sourceValues.size()]); break; case map : targetCtx=targetCtx.set(path,null); break; } } } // ************** PREPARE VALUE FOR EACH SOURCE VALUE for(String sourceValue:sourceValues) { String resultingValue=sourceValue; log.debug("Managing "+resultingValue); // apply regexps for(Regexp regexp:mapping.getRegexp()) { switch(regexp.getType()) { case extract : { Pattern p=Pattern.compile(regexp.getTarget()); Matcher m = p.matcher(resultingValue); if(m.find()) resultingValue=m.group(); else resultingValue=null; break; } case replace : { if(resultingValue!=null) { String replacement=regexp.getReplacement()!=null?regexp.getReplacement():""; resultingValue=resultingValue.replaceAll(regexp.getTarget(), replacement); break; } } } } // apply value mappings resultingValue =mapping.getValueMapping().getOrDefault(sourceValue, resultingValue); // NEW : Multiple Target Elements can be simple or structure Map resultingTargetElements=new HashMap(); for(TargetElement el:mapping.getTargetElements()) { if(el.getConstant()!=null) resultingValue=el.getConstant(); else if(el.getAppend()){ String original=((List)targetCtx.read(mapping.getTargetPath()+"."+el.getValue())).get(0); if(original!=null && !original.isEmpty()) resultingValue=original+resultingValue; } resultingTargetElements.put(el.getValue(), resultingValue); } resultingValueList.add(resultingTargetElements); // close loop on values } JsonPath path=JsonPath.compile(mapping.getTargetPath().getValue()); switch(mapping.getTargetPath().getType()) { case array : targetCtx=targetCtx.set(path, resultingValueList); break; case map : for(Map resultingTargetElements : resultingValueList) { log.debug("Applying "+resultingTargetElements+ " to "+mapping.getTargetPath()); for(Entry e:resultingTargetElements.entrySet()) targetCtx=targetCtx.put(path, e.getKey(),e.getValue()); } break; } // for(Map resultingTargetElements : resultingValueList) { // log.debug("Applying "+resultingTargetElements+ " to "+mapping.getTargetPath()); // // Apply object // // // switch(mapping.getTargetPath().getType()) { // case array : // targetCtx=targetCtx.add(path, resultingTargetElements); // break; // case map : // targetCtx.set(path, resultingTargetElements); // break; // } // } }catch(Throwable t) { throw new TransformationException("Exception while applying "+mapping,t); } } String serializedOutput=targetCtx.jsonString(); log.debug("Mapping complete. Going to return : "+serializedOutput); return mapper.readValue(serializedOutput, ZenodoDeposition.class); }catch(Throwable t) { log.error("Unable to translate "+source+" using previous "+target,t); throw new TransformationException("Unable to translate "+source.getName(),t); } } public List filterResources(CkanItemDescriptor source) throws TransformationException{ ObjectMapper mapper=Parsing.getMapper(); DocumentContext sourceCtx=JsonPath.using(Parsing.JSON_PATH_ALWAYS_LIST_CONFIG).parse(source.getContent()); try { HashSet toReturn=new HashSet(); for(Filter f:resourceFilter.getFilters()) { JSONArray filtered=sourceCtx.read(f.getConditions().get(0)); for(Object obj:filtered) { Map map=(Map) obj; toReturn.add(mapper.readValue((new JSONObject(map)).toJSONString(), CkanResource.class)); } } return new ArrayList(toReturn); }catch(Throwable t) { log.error("Unable to filter resources. ",t); throw new TransformationException("Unable to filter "+source.getName()+" resources",t); } } }