package eu.eudat.models.rda.mapper; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import eu.eudat.data.entities.DatasetProfile; import eu.eudat.elastic.criteria.DatasetCriteria; import eu.eudat.elastic.entities.Tag; import eu.eudat.logic.managers.DatasetManager; import eu.eudat.logic.services.ApiContext; import eu.eudat.logic.utilities.helpers.StreamDistinctBy; import eu.eudat.logic.utilities.json.JsonSearcher; import eu.eudat.models.data.datasetprofile.DatasetProfileOverviewModel; import eu.eudat.models.data.datasetwizard.DatasetWizardModel; import eu.eudat.models.rda.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Component; import javax.transaction.Transactional; import java.time.Instant; import java.time.ZoneId; import java.time.format.DateTimeFormatter; import java.time.format.DateTimeParseException; import java.util.*; import java.util.stream.Collectors; import java.util.stream.Stream; import java.util.stream.StreamSupport; @Component public class DatasetRDAMapper { private static final Logger logger = LoggerFactory.getLogger(DatasetRDAMapper.class); private DatasetManager datasetManager; private ApiContext apiContext; @Autowired public DatasetRDAMapper(DatasetManager datasetManager, ApiContext apiContext) { this.datasetManager = datasetManager; this.apiContext = apiContext; } @Transactional public Dataset toRDA(eu.eudat.data.entities.Dataset dataset, eu.eudat.models.rda.Dmp dmp) { Dataset rda = new Dataset(); // rda.setDatasetId(DatasetIdRDAMapper.toRDA(dataset.getId())); if (dataset.getLabel() == null) { throw new IllegalArgumentException("Dataset Label is missing"); } rda.setTitle(dataset.getLabel()); rda.setDescription(dataset.getDescription()); rda.setAdditionalProperty("template", dataset.getProfile().getId()); try { Map templateIdsToValues = apiContext.getUtilitiesService().getGenericObjectMapper().readValue(dataset.getProperties(), LinkedHashMap.class); DatasetWizardModel datasetWizardModel = new DatasetWizardModel().fromDataModel(dataset); datasetWizardModel.setDatasetProfileDefinition(datasetManager.getPagedProfile(datasetWizardModel, dataset)); ObjectMapper mapper = new ObjectMapper(); String datasetDescriptionJson = mapper.writeValueAsString(datasetWizardModel.getDatasetProfileDefinition()); JsonNode datasetDescriptionObj = mapper.readTree(datasetDescriptionJson); List idNodes = JsonSearcher.findNodes(datasetDescriptionObj, "rdaProperty", "dataset.dataset_id"); if (!idNodes.isEmpty()) { rda.setDatasetId(DatasetIdRDAMapper.toRDA(idNodes)); } if (rda.getDatasetId() == null) { rda.setDatasetId(new DatasetId(dataset.getId().toString(), DatasetId.Type.OTHER)); } List typeNodes = JsonSearcher.findNodes(datasetDescriptionObj, "rdaProperty", "dataset.type"); if (!typeNodes.isEmpty() && !typeNodes.get(0).get("value").asText().isEmpty()) { rda.setType(typeNodes.get(0).get("value").asText()); } else { rda.setType("DMP Dataset"); } List languageNodes = JsonSearcher.findNodes(datasetDescriptionObj, "rdaProperty", "dataset.language"); if (!languageNodes.isEmpty() && !languageNodes.get(0).get("value").asText().isEmpty()) { rda.setLanguage(Language.fromValue(languageNodes.get(0).get("value").asText())); } else { rda.setLanguage(LanguageRDAMapper.mapLanguageIsoToRDAIso(dataset.getProfile().getLanguage())); } List metadataNodes = JsonSearcher.findNodes(datasetDescriptionObj, "rdaProperty", "dataset.metadata"); if (!metadataNodes.isEmpty()) { rda.setMetadata(MetadataRDAMapper.toRDAList(metadataNodes)); }else{ rda.setMetadata(new ArrayList<>()); } List qaNodes = JsonSearcher.findNodes(datasetDescriptionObj, "rdaProperty", "dataset.data_quality_assurance"); if (!qaNodes.isEmpty()) { /*rda.setDataQualityAssurance(qaNodes.stream().map(qaNode -> qaNode.get("value").asText()).collect(Collectors.toList())); for (int i = 0; i < qaNodes.size(); i++) { rda.setAdditionalProperty("qaId" + (i + 1), qaNodes.get(i).get("id").asText()); }*/ List qaList = new ArrayList<>(); String qa; for(JsonNode node: qaNodes){ if(node.get("value").isArray()){ Iterator iter = node.get("value").elements(); while(iter.hasNext()) { qa = iter.next().asText(); qaList.add(qa); } } } String data_quality; for(JsonNode dqa: qaNodes){ data_quality = dqa.get("value").asText(); if(!data_quality.isEmpty()){ qaList.add(data_quality); rda.setAdditionalProperty("otherDQAID", dqa.get("id")); rda.setAdditionalProperty("otherDQA", data_quality); break; } } rda.setDataQualityAssurance(qaList); }else{ rda.setDataQualityAssurance(new ArrayList<>()); } List preservationNodes = JsonSearcher.findNodes(datasetDescriptionObj, "rdaProperty", "dataset.preservation_statement"); if (!preservationNodes.isEmpty() && !preservationNodes.get(0).get("value").asText().isEmpty()) { rda.setPreservationStatement(preservationNodes.get(0).get("value").asText()); } List distributionNodes = JsonSearcher.findNodes(datasetDescriptionObj, "rdaProperty", "dataset.distribution"); if (!distributionNodes.isEmpty()) { rda.setDistribution(DistributionRDAMapper.toRDAList(distributionNodes)); }else{ rda.setDistribution(new ArrayList<>()); } List keywordNodes = JsonSearcher.findNodes(datasetDescriptionObj, "rdaProperty", "dataset.keyword"); if (!keywordNodes.isEmpty()) { rda.setKeyword(keywordNodes.stream().map(keywordNode -> { JsonNode value = keywordNode.get("value"); if (value.isArray()) { return StreamSupport.stream(value.spliterator(), false).map(node -> KeywordRDAMapper.toRDA(node.toString())).flatMap(Collection::stream).collect(Collectors.toList()); } else { return KeywordRDAMapper.toRDA(keywordNode.get("value").asText()); } }).flatMap(Collection::stream).collect(Collectors.toList())); for (int i = 0; i < keywordNodes.size(); i++) { rda.setAdditionalProperty("keyword" + (i + 1), keywordNodes.get(i).get("id").asText()); } } else if (apiContext.getOperationsContext().getElasticRepository().getDatasetRepository().exists()) { List tags = apiContext.getOperationsContext().getElasticRepository().getDatasetRepository().findDocument(dataset.getId().toString()).getTags().stream().map(Tag::getName).collect(Collectors.toList()); rda.setKeyword(tags); } List personalDataNodes = JsonSearcher.findNodes(datasetDescriptionObj, "rdaProperty", "dataset.personal_data"); if (!personalDataNodes.isEmpty()) { try{ rda.setPersonalData(personalDataNodes.stream().map(personalDataNode -> Dataset.PersonalData.fromValue(personalDataNode.get("value").asText())).findFirst().get()); }catch(IllegalArgumentException e){ rda.setPersonalData(Dataset.PersonalData.UNKNOWN); } } else { rda.setPersonalData(Dataset.PersonalData.UNKNOWN); } List securityAndPrivacyNodes = JsonSearcher.findNodes(datasetDescriptionObj, "rdaProperty", "dataset.security_and_privacy"); if (!securityAndPrivacyNodes.isEmpty()) { rda.setSecurityAndPrivacy(SecurityAndPrivacyRDAMapper.toRDAList(securityAndPrivacyNodes)); }else{ rda.setSecurityAndPrivacy(new ArrayList<>()); } List sensitiveDataNodes = JsonSearcher.findNodes(datasetDescriptionObj, "rdaProperty", "dataset.sensitive_data"); if (!sensitiveDataNodes.isEmpty()) { try{ rda.setSensitiveData(sensitiveDataNodes.stream().map(sensitiveDataNode -> Dataset.SensitiveData.fromValue(sensitiveDataNode.get("value").asText())).findFirst().get()); }catch(IllegalArgumentException e){ rda.setSensitiveData(Dataset.SensitiveData.UNKNOWN); } } else { rda.setSensitiveData(Dataset.SensitiveData.UNKNOWN); } List technicalResourceNodes = JsonSearcher.findNodes(datasetDescriptionObj, "rdaProperty", "dataset.technical_resource"); if (!technicalResourceNodes.isEmpty()) { rda.setTechnicalResource(TechnicalResourceRDAMapper.toRDAList(technicalResourceNodes)); }else{ rda.setTechnicalResource(new ArrayList<>()); } List issuedNodes = JsonSearcher.findNodes(datasetDescriptionObj, "rdaProperty", "dataset.issued"); if (!issuedNodes.isEmpty() && !issuedNodes.get(0).get("value").asText().isEmpty()) { rda.setIssued(issuedNodes.get(0).get("value").asText()); } List contributorNodes = JsonSearcher.findNodes(datasetDescriptionObj, "rdaProperty", "dmp.contributor"); if (!contributorNodes.isEmpty()) { dmp.getContributor().addAll(contributorNodes.stream().map(contributorNode -> { JsonNode value = contributorNode.get("value"); if (value.isArray()) { return StreamSupport.stream(value.spliterator(), false).map(node -> ContributorRDAMapper.toRDA(node.asText())).collect(Collectors.toList()); } else { return Collections.singletonList(new Contributor()); // return null kalutera } }).flatMap(Collection::stream).collect(Collectors.toList())); } List costNodes = JsonSearcher.findNodes(datasetDescriptionObj, "rdaProperty", "dmp.cost"); if (!costNodes.isEmpty()) { dmp.getCost().addAll(CostRDAMapper.toRDAList(costNodes)); } List ethicsNodes = JsonSearcher.findNodes(datasetDescriptionObj, "rdaProperty", "dmp.ethical_issues"); if (!ethicsNodes.isEmpty()) { for(JsonNode node: ethicsNodes){ String rdaProperty = node.get("rdaProperty").asText(); String rdaValue = node.get("value").asText(); if(rdaValue == null || rdaValue.isEmpty()){ continue; } if(rdaProperty.contains("exist")){ try { Dmp.EthicalIssuesExist exists = dmp.getEthicalIssuesExist(); if(exists == null || ((exists == Dmp.EthicalIssuesExist.NO || exists == Dmp.EthicalIssuesExist.UNKNOWN) && rdaValue.equals("yes")) || (exists == Dmp.EthicalIssuesExist.YES && !(rdaValue.equals("no") || rdaValue.equals("unknown"))) || (exists == Dmp.EthicalIssuesExist.UNKNOWN && rdaValue.equals("no"))){ dmp.setEthicalIssuesExist(Dmp.EthicalIssuesExist.fromValue(rdaValue)); } }catch(IllegalArgumentException e){ logger.warn(e.getLocalizedMessage() + ". Setting ethical_issues_exist to unknown"); dmp.setEthicalIssuesExist(Dmp.EthicalIssuesExist.UNKNOWN); } } // else if(rdaProperty.contains("description")){ // if(dmp.getEthicalIssuesDescription() == null){ // dmp.setEthicalIssuesDescription(rdaValue); // } // else{ // dmp.setEthicalIssuesDescription(dmp.getEthicalIssuesDescription() + ", " + rdaValue); // } // } // else if(rdaProperty.contains("report")){ // try { // dmp.setEthicalIssuesReport(URI.create(rdaValue)); // } catch (IllegalArgumentException e) { // logger.warn(e.getLocalizedMessage() + ". Skipping url parsing"); // } // } } } List foundNodes = Stream.of(typeNodes, languageNodes, metadataNodes, qaNodes, preservationNodes, distributionNodes, keywordNodes, personalDataNodes, securityAndPrivacyNodes, sensitiveDataNodes, technicalResourceNodes).flatMap(Collection::stream).collect(Collectors.toList()); templateIdsToValues.entrySet().forEach(entry -> { boolean isFound = foundNodes.stream().anyMatch(node -> node.get("id").asText().equals(entry.getKey())); if (!isFound && entry.getValue() != null && !entry.getValue().toString().isEmpty()) { try { Instant time = Instant.parse(entry.getValue().toString()); rda.setAdditionalProperty(entry.getKey(), DateTimeFormatter.ofPattern("yyyy-MM-dd").withZone(ZoneId.systemDefault()).format(time)); } catch (DateTimeParseException e) { rda.setAdditionalProperty(entry.getKey(), entry.getValue()); } } }); } catch (Exception e) { logger.error(e.getMessage(), e); } return rda; } public eu.eudat.data.entities.Dataset toEntity(Dataset rda, DatasetProfile defaultProfile) { eu.eudat.data.entities.Dataset entity = new eu.eudat.data.entities.Dataset(); entity.setLabel(rda.getTitle()); entity.setDescription(rda.getDescription()); try { DatasetProfile profile = apiContext.getOperationsContext().getDatabaseRepository().getDatasetProfileDao().find(UUID.fromString(rda.getAdditionalProperties().get("template").toString())); entity.setProfile(profile); }catch(Exception e) { logger.warn(e.getMessage(), e); entity.setProfile(defaultProfile); } try { Map properties = new HashMap<>(); DatasetWizardModel datasetWizardModel = new DatasetWizardModel(); datasetWizardModel.setProfile(new DatasetProfileOverviewModel().fromDataModel(entity.getProfile())); datasetWizardModel.setDatasetProfileDefinition(datasetManager.getPagedProfile(datasetWizardModel, entity)); ObjectMapper mapper = new ObjectMapper(); String datasetDescriptionJson = mapper.writeValueAsString(datasetWizardModel.getDatasetProfileDefinition()); JsonNode datasetDescriptionObj = mapper.readTree(datasetDescriptionJson); List typeNodes = JsonSearcher.findNodes(datasetDescriptionObj, "rdaProperty", "dataset.type"); if (!typeNodes.isEmpty()) { properties.put(typeNodes.get(0).get("id").asText(), rda.getType()); } List languageNodes = JsonSearcher.findNodes(datasetDescriptionObj, "rdaProperty", "dataset.language"); if (!languageNodes.isEmpty() && rda.getLanguage() != null) { properties.put(languageNodes.get(0).get("id").asText(), rda.getLanguage().value()); } if (rda.getMetadata() != null) { properties.putAll(MetadataRDAMapper.toProperties(rda.getMetadata())); } if (rda.getDatasetId() != null) { properties.putAll(DatasetIdRDAMapper.toProperties(rda.getDatasetId(), datasetDescriptionObj)); } /*List qaIds = rda.getAdditionalProperties().entrySet().stream().filter(entry -> entry.getKey().startsWith("qaId")).map(entry -> entry.getValue().toString()).collect(Collectors.toList()); for (int i = 0; i < qaIds.size(); i++) { properties.put(qaIds.get(i), rda.getDataQualityAssurance().get(i)); }*/ List qaNodes = JsonSearcher.findNodes(datasetDescriptionObj, "rdaProperty", "dataset.data_quality_assurance"); if (!qaNodes.isEmpty() && rda.getDataQualityAssurance() != null && !rda.getDataQualityAssurance().isEmpty()) { ObjectMapper m = new ObjectMapper(); List qas = new ArrayList<>(rda.getDataQualityAssurance()); if(!qas.isEmpty()){ properties.put(qaNodes.get(0).get("id").asText(), m.writeValueAsString(qas)); if(rda.getAdditionalProperties().containsKey("otherDQAID")){ properties.put((String)rda.getAdditionalProperties().get("otherDQAID"), rda.getAdditionalProperties().get("otherDQA")); } } } List preservationNodes = JsonSearcher.findNodes(datasetDescriptionObj, "rdaProperty", "dataset.preservation_statement"); if (!preservationNodes.isEmpty()) { properties.put(preservationNodes.get(0).get("id").asText(), rda.getPreservationStatement()); } List issuedNodes = JsonSearcher.findNodes(datasetDescriptionObj, "rdaProperty", "dataset.issued"); if (!issuedNodes.isEmpty()) { properties.put(issuedNodes.get(0).get("id").asText(), rda.getIssued()); } if (rda.getDistribution() != null && !rda.getDistribution().isEmpty()) { properties.putAll(DistributionRDAMapper.toProperties(rda.getDistribution().get(0), datasetDescriptionObj)); } if (rda.getKeyword() != null) { List keywordIds = rda.getAdditionalProperties().entrySet().stream().filter(entry -> entry.getKey().startsWith("keyword")).map(entry -> entry.getValue().toString()).collect(Collectors.toList()); // boolean takeAll = false; // if (keywordIds.size() < rda.getKeyword().size()) { // takeAll = true; // } DatasetCriteria criteria = new DatasetCriteria(); criteria.setHasTags(true); List tags = this.apiContext.getOperationsContext().getElasticRepository().getDatasetRepository().query(criteria).stream().map(eu.eudat.elastic.entities.Dataset::getTags).flatMap(Collection::stream).filter(StreamDistinctBy.distinctByKey(Tag::getId)).collect(Collectors.toList()); if(!rda.getKeyword().isEmpty()){ List templateTags = tags.stream().filter(tag -> rda.getKeyword().contains(tag.getName())).collect(Collectors.toList()); properties.put(keywordIds.get(0), mapper.writeValueAsString(templateTags)); // for (int i = 0; i < keywordIds.size(); i++) { // //if (takeAll) { // List tags = new ArrayList<>(); // for (String keyword : rda.getKeyword()) { // tags.add(mapper.writeValueAsString(toTagEntity(keyword))); // } // properties.put(keywordIds.get(i), tags); // } else { // properties.put(keywordIds.get(i), mapper.writeValueAsString(toTagEntity(rda.getKeyword().get(i)))); // } // properties.put(keywordIds.get(i), rda.getKeyword().get(i)); // } } } List personalDataNodes = JsonSearcher.findNodes(datasetDescriptionObj, "rdaProperty", "dataset.personal_data"); if (!personalDataNodes.isEmpty()) { properties.put(personalDataNodes.get(0).get("id").asText(), rda.getPersonalData().value()); } if (rda.getSecurityAndPrivacy() != null) { properties.putAll(SecurityAndPrivacyRDAMapper.toProperties(rda.getSecurityAndPrivacy())); } List sensitiveDataNodes = JsonSearcher.findNodes(datasetDescriptionObj, "rdaProperty", "dataset.sensitive_data"); if (!sensitiveDataNodes.isEmpty()) { properties.put(sensitiveDataNodes.get(0).get("id").asText(), rda.getSensitiveData().value()); } if (rda.getTechnicalResource() != null) { properties.putAll(TechnicalResourceRDAMapper.toProperties(rda.getTechnicalResource())); } rda.getAdditionalProperties().entrySet().stream() .filter(entry -> !entry.getKey().equals("template") && !entry.getKey().startsWith("qaId") && !entry.getKey().startsWith("keyword")) .forEach(entry -> properties.put(entry.getKey(), entry.getValue())); entity.setProperties(new ObjectMapper().writeValueAsString(properties)); } catch (Exception e) { logger.error(e.getMessage(), e); } return entity; } private static Tag toTagEntity(String name) { Tag tag = new Tag(); tag.setId(""); tag.setName(name); return tag; } }