package org.gcube.data.publishing.ckan2zenodo; import java.time.Instant; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.Date; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.gcube.data.publishing.ckan2zenodo.commons.Parsing; import org.gcube.data.publishing.ckan2zenodo.model.CkanItemDescriptor; import org.gcube.data.publishing.ckan2zenodo.model.CkanResource; import org.gcube.data.publishing.ckan2zenodo.model.faults.TransformationException; import org.gcube.data.publishing.ckan2zenodo.model.parsing.Mapping; import org.gcube.data.publishing.ckan2zenodo.model.parsing.Mapping.Regexp; import org.gcube.data.publishing.ckan2zenodo.model.parsing.Mapping.Source.Value; import org.gcube.data.publishing.ckan2zenodo.model.parsing.ResourceFilter; import org.gcube.data.publishing.ckan2zenodo.model.parsing.ResourceFilter.Filter; import org.gcube.data.publishing.ckan2zenodo.model.zenodo.Contributor; import org.gcube.data.publishing.ckan2zenodo.model.zenodo.Creator; import org.gcube.data.publishing.ckan2zenodo.model.zenodo.DepositionMetadata; import org.gcube.data.publishing.ckan2zenodo.model.zenodo.DepositionMetadata.AccessRights; import org.gcube.data.publishing.ckan2zenodo.model.zenodo.DepositionMetadata.UploadType; import org.gcube.data.publishing.ckan2zenodo.model.zenodo.RelatedIdentifier; import org.gcube.data.publishing.ckan2zenodo.model.zenodo.ZenodoDeposition; import com.fasterxml.jackson.databind.ObjectMapper; import com.jayway.jsonpath.DocumentContext; import com.jayway.jsonpath.JsonPath; import lombok.NonNull; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import net.minidev.json.JSONArray; import net.minidev.json.JSONObject; @Slf4j @RequiredArgsConstructor public class Translator { @NonNull private List mappings; @NonNull private ResourceFilter resourceFilter; public Translator() { this(Collections.EMPTY_LIST,new ResourceFilter(Collections.EMPTY_LIST)); } public Translator(List mappings) { this(mappings,new ResourceFilter(Collections.EMPTY_LIST)); } public ZenodoDeposition transform(CkanItemDescriptor toTransform, ZenodoDeposition deposition) throws TransformationException { log.debug("Transforming "+toTransform+". Existing Deposition is : "+deposition); if(deposition==null) deposition=new ZenodoDeposition(); // default mappings DepositionMetadata meta=deposition.getMetadata(); if(meta==null) meta=new DepositionMetadata(); // UPLOAD TYPE meta.setUpload_type(UploadType.other); // TITLE meta.setTitle(toTransform.getTitle()); // DESCRIPTION meta.setDescription(toTransform.getNotes()); // ACCESS RIGHTS if(toTransform.isOpen()) meta.setAccess_right(AccessRights.open); else { meta.setAccess_right(AccessRights.restricted); meta.setAccess_conditions("Visit the VRE "+toTransform.getVRE()+" to access it."); } // LICENSE meta.setLicense(toTransform.getLicenseId()); // TAGS meta.setKeywords(new ArrayList(toTransform.getTags())); //RELATED IDENTIFIER String itemUrl=toTransform.getItemUrl(); ArrayList relatedIdentifiers=new ArrayList<>(); if(itemUrl!=null) relatedIdentifiers.add(new RelatedIdentifier(itemUrl,RelatedIdentifier.Relation.isCompiledBy)); meta.setRelated_identifiers(relatedIdentifiers); //CONTRIBUTORS & CREATORS ArrayList contributors=new ArrayList<>(); ArrayList creators=new ArrayList<>(); String authorName=toTransform.getAuthor(); if(authorName!=null) { Contributor author=new Contributor(Contributor.Type.Producer); author.setName(authorName); contributors.add(author); creators.add(new Creator(authorName)); } String maintainerName=toTransform.getAuthor(); if(maintainerName!=null) { Contributor maintainer=new Contributor(Contributor.Type.DataCurator); maintainer.setName(maintainerName); contributors.add(maintainer); creators.add(new Creator(authorName)); } // D4Science as contributor Contributor d4Science=new Contributor(Contributor.Type.HostingInstitution); d4Science.setName("D4Science"); contributors.add(d4Science); meta.setContributors(contributors); meta.setCreators(creators); // VERSION meta.setVersion(toTransform.getVersion()); // DATES // PUBLICATION DATE = now meta.setPublication_date(Date.from(Instant.now())); deposition.setMetadata(meta); // profile specific mappings return applyMappings(toTransform, deposition); } private ZenodoDeposition applyMappings(CkanItemDescriptor source, ZenodoDeposition target) throws TransformationException { try{ ObjectMapper mapper=Parsing.getMapper(); DocumentContext sourceCtx=JsonPath.using(Parsing.JSON_PATH_ALWAYS_LIST_CONFIG).parse(source.getContent()); DocumentContext targetCtx=JsonPath.using(Parsing.JSON_PATH_ALWAYS_LIST_CONFIG).parse(mapper.writeValueAsString(target)); for(Mapping mapping:mappings) { try { // extract source List sourceValues=new ArrayList<>(); for(Value v: mapping.getSource().getValues()) { String actualValue=null; switch(v.getType()) { case constant : { actualValue=v.getValue(); break; } case jsonPath : { for(String s: ((Collection) sourceCtx.read(v.getValue()))){ if(s!=null) { s=s.trim(); if(!s.isEmpty())actualValue=s; } } break; } } // Adding to actual values if(actualValue!=null) { if(v.getSplit()!=null) for(String toAdd:actualValue.split(v.getSplit())) sourceValues.add(toAdd.trim()); else sourceValues.add(actualValue); } if(!sourceValues.isEmpty()) break; } for(String sourceValue:sourceValues) { String resultingValue=sourceValue; // apply regexps for(Regexp regexp:mapping.getRegexp()) { switch(regexp.getType()) { case extract : { Pattern p=Pattern.compile(regexp.getTarget()); Matcher m = p.matcher(resultingValue); if(m.find()) resultingValue=m.group(); else resultingValue=null; break; } case replace : { if(resultingValue!=null) { String replacement=regexp.getReplacement()!=null?regexp.getReplacement():""; resultingValue=resultingValue.replaceAll(regexp.getTarget(), replacement); break; } } } } // apply value mappings resultingValue =mapping.getValueMapping().getOrDefault(sourceValue, resultingValue); // check if targetPath exists List targetElementFound=targetCtx.read(mapping.getTargetPath()); if(targetElementFound==null || targetElementFound.size()==0 || targetElementFound.get(0)==null) { // targetCtx=targetCtx.add(mapping.getTargetPath(),Collections.singletonList("nothing")); targetCtx=Parsing.addElement(targetCtx, mapping.getTargetPath()); } if(mapping.getTargetElement().getAppend()){ String original=((List)targetCtx.read(mapping.getTargetPath()+"."+mapping.getTargetElement().getTargetElement())).get(0); if(original!=null && !original.isEmpty()) resultingValue=original+resultingValue; } targetCtx=targetCtx.put(mapping.getTargetPath(),mapping.getTargetElement().getTargetElement(), resultingValue); } }catch(Throwable t) { throw new TransformationException("Exception while applying "+mapping,t); } } String serializedOutput=targetCtx.jsonString(); log.debug("Mapping complete. Going to return : "+serializedOutput); return mapper.readValue(serializedOutput, ZenodoDeposition.class); }catch(Throwable t) { log.error("Unable to translate "+source+" using previous "+target,t); throw new TransformationException("Unable to translate "+source.getName(),t); } } public List filterResources(CkanItemDescriptor source) throws TransformationException{ try { ObjectMapper mapper=Parsing.getMapper(); DocumentContext sourceCtx=JsonPath.using(Parsing.JSON_PATH_ALWAYS_LIST_CONFIG).parse(source.getContent()); HashSet toReturn=new HashSet(); for(Filter f:resourceFilter.getFilters()) { JSONArray filtered=sourceCtx.read(f.getConditions().get(0)); for(Object obj:filtered) { Map map=(Map) obj; toReturn.add(mapper.readValue((new JSONObject(map)).toJSONString(), CkanResource.class)); } } return new ArrayList(toReturn); }catch(Throwable t) { log.error("Unable to filter resources. ",t); throw new TransformationException("Unable to filter "+source.getName()+" resources",t); } } }