argos/dmp-backend/web/src/main/java/eu/eudat/models/rda/mapper/DatasetRDAMapper.java

389 lines
18 KiB
Java

package eu.eudat.models.rda.mapper;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.eudat.data.entities.DatasetProfile;
import eu.eudat.elastic.criteria.DatasetCriteria;
import eu.eudat.elastic.entities.Tag;
import eu.eudat.logic.managers.DatasetManager;
import eu.eudat.logic.services.ApiContext;
import eu.eudat.logic.utilities.helpers.StreamDistinctBy;
import eu.eudat.logic.utilities.json.JsonSearcher;
import eu.eudat.models.data.datasetprofile.DatasetProfileOverviewModel;
import eu.eudat.models.data.datasetwizard.DatasetWizardModel;
import eu.eudat.models.rda.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import javax.transaction.Transactional;
import java.time.Instant;
import java.time.ZoneId;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeParseException;
import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
@Component
public class DatasetRDAMapper {
private static final Logger logger = LoggerFactory.getLogger(DatasetRDAMapper.class);
private DatasetManager datasetManager;
private ApiContext apiContext;
@Autowired
public DatasetRDAMapper(DatasetManager datasetManager, ApiContext apiContext) {
this.datasetManager = datasetManager;
this.apiContext = apiContext;
}
@Transactional
public Dataset toRDA(eu.eudat.data.entities.Dataset dataset, eu.eudat.models.rda.Dmp dmp) {
Dataset rda = new Dataset();
// rda.setDatasetId(DatasetIdRDAMapper.toRDA(dataset.getId()));
if (dataset.getLabel() == null) {
throw new IllegalArgumentException("Dataset Label is missing");
}
rda.setTitle(dataset.getLabel());
rda.setDescription(dataset.getDescription());
rda.setAdditionalProperty("template", dataset.getProfile().getId());
try {
Map<String, Object> templateIdsToValues = apiContext.getUtilitiesService().getGenericObjectMapper().readValue(dataset.getProperties(), LinkedHashMap.class);
DatasetWizardModel datasetWizardModel = new DatasetWizardModel().fromDataModel(dataset);
datasetWizardModel.setDatasetProfileDefinition(datasetManager.getPagedProfile(datasetWizardModel, dataset));
ObjectMapper mapper = new ObjectMapper();
String datasetDescriptionJson = mapper.writeValueAsString(datasetWizardModel.getDatasetProfileDefinition());
JsonNode datasetDescriptionObj = mapper.readTree(datasetDescriptionJson);
List<JsonNode> idNodes = JsonSearcher.findNodes(datasetDescriptionObj, "rdaProperty", "dataset.dataset_id");
if (!idNodes.isEmpty()) {
rda.setDatasetId(DatasetIdRDAMapper.toRDA(idNodes));
}
if (rda.getDatasetId() == null) {
rda.setDatasetId(new DatasetId(dataset.getId().toString(), DatasetId.Type.OTHER));
}
List<JsonNode> typeNodes = JsonSearcher.findNodes(datasetDescriptionObj, "rdaProperty", "dataset.type");
if (!typeNodes.isEmpty() && !typeNodes.get(0).get("value").asText().isEmpty()) {
rda.setType(typeNodes.get(0).get("value").asText());
} else {
rda.setType("DMP Dataset");
}
List<JsonNode> languageNodes = JsonSearcher.findNodes(datasetDescriptionObj, "rdaProperty", "dataset.language");
if (!languageNodes.isEmpty() && !languageNodes.get(0).get("value").asText().isEmpty()) {
rda.setLanguage(Language.fromValue(languageNodes.get(0).get("value").asText()));
} else {
rda.setLanguage(LanguageRDAMapper.mapLanguageIsoToRDAIso(dataset.getProfile().getLanguage()));
}
List<JsonNode> metadataNodes = JsonSearcher.findNodes(datasetDescriptionObj, "rdaProperty", "dataset.metadata");
if (!metadataNodes.isEmpty()) {
rda.setMetadata(MetadataRDAMapper.toRDAList(metadataNodes));
}else{
rda.setMetadata(new ArrayList<>());
}
List<JsonNode> qaNodes = JsonSearcher.findNodes(datasetDescriptionObj, "rdaProperty", "dataset.data_quality_assurance");
if (!qaNodes.isEmpty()) {
/*rda.setDataQualityAssurance(qaNodes.stream().map(qaNode -> qaNode.get("value").asText()).collect(Collectors.toList()));
for (int i = 0; i < qaNodes.size(); i++) {
rda.setAdditionalProperty("qaId" + (i + 1), qaNodes.get(i).get("id").asText());
}*/
List<String> qaList = new ArrayList<>();
String qa;
for(JsonNode node: qaNodes){
if(node.get("value").isArray()){
Iterator<JsonNode> iter = node.get("value").elements();
while(iter.hasNext()) {
qa = iter.next().asText();
qaList.add(qa);
}
}
}
String data_quality;
for(JsonNode dqa: qaNodes){
data_quality = dqa.get("value").asText();
if(!data_quality.isEmpty()){
qaList.add(data_quality);
rda.setAdditionalProperty("otherDQAID", dqa.get("id"));
rda.setAdditionalProperty("otherDQA", data_quality);
break;
}
}
rda.setDataQualityAssurance(qaList);
}else{
rda.setDataQualityAssurance(new ArrayList<>());
}
List<JsonNode> preservationNodes = JsonSearcher.findNodes(datasetDescriptionObj, "rdaProperty", "dataset.preservation_statement");
if (!preservationNodes.isEmpty() && !preservationNodes.get(0).get("value").asText().isEmpty()) {
rda.setPreservationStatement(preservationNodes.get(0).get("value").asText());
}
List<JsonNode> distributionNodes = JsonSearcher.findNodes(datasetDescriptionObj, "rdaProperty", "dataset.distribution");
if (!distributionNodes.isEmpty()) {
rda.setDistribution(DistributionRDAMapper.toRDAList(distributionNodes));
}else{
rda.setDistribution(new ArrayList<>());
}
List<JsonNode> keywordNodes = JsonSearcher.findNodes(datasetDescriptionObj, "rdaProperty", "dataset.keyword");
if (!keywordNodes.isEmpty()) {
rda.setKeyword(keywordNodes.stream().map(keywordNode -> {
JsonNode value = keywordNode.get("value");
if (value.isArray()) {
return StreamSupport.stream(value.spliterator(), false).map(node -> KeywordRDAMapper.toRDA(node.toString())).flatMap(Collection::stream).collect(Collectors.toList());
} else {
return KeywordRDAMapper.toRDA(keywordNode.get("value").asText());
}
}).flatMap(Collection::stream).collect(Collectors.toList()));
for (int i = 0; i < keywordNodes.size(); i++) {
rda.setAdditionalProperty("keyword" + (i + 1), keywordNodes.get(i).get("id").asText());
}
} else if (apiContext.getOperationsContext().getElasticRepository().getDatasetRepository().exists()) {
List<String> tags = apiContext.getOperationsContext().getElasticRepository().getDatasetRepository().findDocument(dataset.getId().toString()).getTags().stream().map(Tag::getName).collect(Collectors.toList());
rda.setKeyword(tags);
}
List<JsonNode> personalDataNodes = JsonSearcher.findNodes(datasetDescriptionObj, "rdaProperty", "dataset.personal_data");
if (!personalDataNodes.isEmpty()) {
try{
rda.setPersonalData(personalDataNodes.stream().map(personalDataNode -> Dataset.PersonalData.fromValue(personalDataNode.get("value").asText())).findFirst().get());
}catch(IllegalArgumentException e){
rda.setPersonalData(Dataset.PersonalData.UNKNOWN);
}
} else {
rda.setPersonalData(Dataset.PersonalData.UNKNOWN);
}
List<JsonNode> securityAndPrivacyNodes = JsonSearcher.findNodes(datasetDescriptionObj, "rdaProperty", "dataset.security_and_privacy");
if (!securityAndPrivacyNodes.isEmpty()) {
rda.setSecurityAndPrivacy(SecurityAndPrivacyRDAMapper.toRDAList(securityAndPrivacyNodes));
}else{
rda.setSecurityAndPrivacy(new ArrayList<>());
}
List<JsonNode> sensitiveDataNodes = JsonSearcher.findNodes(datasetDescriptionObj, "rdaProperty", "dataset.sensitive_data");
if (!sensitiveDataNodes.isEmpty()) {
try{
rda.setSensitiveData(sensitiveDataNodes.stream().map(sensitiveDataNode -> Dataset.SensitiveData.fromValue(sensitiveDataNode.get("value").asText())).findFirst().get());
}catch(IllegalArgumentException e){
rda.setSensitiveData(Dataset.SensitiveData.UNKNOWN);
}
} else {
rda.setSensitiveData(Dataset.SensitiveData.UNKNOWN);
}
List<JsonNode> technicalResourceNodes = JsonSearcher.findNodes(datasetDescriptionObj, "rdaProperty", "dataset.technical_resource");
if (!technicalResourceNodes.isEmpty()) {
rda.setTechnicalResource(TechnicalResourceRDAMapper.toRDAList(technicalResourceNodes));
}else{
rda.setTechnicalResource(new ArrayList<>());
}
List<JsonNode> issuedNodes = JsonSearcher.findNodes(datasetDescriptionObj, "rdaProperty", "dataset.issued");
if (!issuedNodes.isEmpty() && !issuedNodes.get(0).get("value").asText().isEmpty()) {
rda.setIssued(issuedNodes.get(0).get("value").asText());
}
List<JsonNode> contributorNodes = JsonSearcher.findNodes(datasetDescriptionObj, "rdaProperty", "dmp.contributor");
if (!contributorNodes.isEmpty()) {
dmp.getContributor().addAll(contributorNodes.stream().map(contributorNode -> {
JsonNode value = contributorNode.get("value");
if (value.isArray()) {
return StreamSupport.stream(value.spliterator(), false).map(node -> ContributorRDAMapper.toRDA(node.asText())).collect(Collectors.toList());
} else {
return Collections.singletonList(new Contributor()); // return null kalutera
}
}).flatMap(Collection::stream).collect(Collectors.toList()));
}
List<JsonNode> costNodes = JsonSearcher.findNodes(datasetDescriptionObj, "rdaProperty", "dmp.cost");
if (!costNodes.isEmpty()) {
dmp.getCost().addAll(CostRDAMapper.toRDAList(costNodes));
}
List<JsonNode> ethicsNodes = JsonSearcher.findNodes(datasetDescriptionObj, "rdaProperty", "dmp.ethical_issues");
if (!ethicsNodes.isEmpty()) {
for(JsonNode node: ethicsNodes){
String rdaProperty = node.get("rdaProperty").asText();
String rdaValue = node.get("value").asText();
if(rdaValue == null || rdaValue.isEmpty()){
continue;
}
if(rdaProperty.contains("exist")){
try {
Dmp.EthicalIssuesExist exists = dmp.getEthicalIssuesExist();
if(exists == null
|| ((exists == Dmp.EthicalIssuesExist.NO || exists == Dmp.EthicalIssuesExist.UNKNOWN) && rdaValue.equals("yes"))
|| (exists == Dmp.EthicalIssuesExist.YES && !(rdaValue.equals("no") || rdaValue.equals("unknown")))
|| (exists == Dmp.EthicalIssuesExist.UNKNOWN && rdaValue.equals("no"))){
dmp.setEthicalIssuesExist(Dmp.EthicalIssuesExist.fromValue(rdaValue));
}
}catch(IllegalArgumentException e){
logger.warn(e.getLocalizedMessage() + ". Setting ethical_issues_exist to unknown");
dmp.setEthicalIssuesExist(Dmp.EthicalIssuesExist.UNKNOWN);
}
}
// else if(rdaProperty.contains("description")){
// if(dmp.getEthicalIssuesDescription() == null){
// dmp.setEthicalIssuesDescription(rdaValue);
// }
// else{
// dmp.setEthicalIssuesDescription(dmp.getEthicalIssuesDescription() + ", " + rdaValue);
// }
// }
// else if(rdaProperty.contains("report")){
// try {
// dmp.setEthicalIssuesReport(URI.create(rdaValue));
// } catch (IllegalArgumentException e) {
// logger.warn(e.getLocalizedMessage() + ". Skipping url parsing");
// }
// }
}
}
List<JsonNode> foundNodes = Stream.of(typeNodes, languageNodes, metadataNodes, qaNodes, preservationNodes, distributionNodes,
keywordNodes, personalDataNodes, securityAndPrivacyNodes, sensitiveDataNodes, technicalResourceNodes).flatMap(Collection::stream).collect(Collectors.toList());
templateIdsToValues.entrySet().forEach(entry -> {
boolean isFound = foundNodes.stream().anyMatch(node -> node.get("id").asText().equals(entry.getKey()));
if (!isFound && entry.getValue() != null && !entry.getValue().toString().isEmpty()) {
try {
Instant time = Instant.parse(entry.getValue().toString());
rda.setAdditionalProperty(entry.getKey(), DateTimeFormatter.ofPattern("yyyy-MM-dd").withZone(ZoneId.systemDefault()).format(time));
} catch (DateTimeParseException e) {
rda.setAdditionalProperty(entry.getKey(), entry.getValue());
}
}
});
} catch (Exception e) {
logger.error(e.getMessage(), e);
}
return rda;
}
public eu.eudat.data.entities.Dataset toEntity(Dataset rda, DatasetProfile defaultProfile) {
eu.eudat.data.entities.Dataset entity = new eu.eudat.data.entities.Dataset();
entity.setLabel(rda.getTitle());
entity.setDescription(rda.getDescription());
try {
DatasetProfile profile = apiContext.getOperationsContext().getDatabaseRepository().getDatasetProfileDao().find(UUID.fromString(rda.getAdditionalProperties().get("template").toString()));
entity.setProfile(profile);
}catch(Exception e) {
logger.warn(e.getMessage(), e);
entity.setProfile(defaultProfile);
}
try {
Map<String, Object> properties = new HashMap<>();
DatasetWizardModel datasetWizardModel = new DatasetWizardModel();
datasetWizardModel.setProfile(new DatasetProfileOverviewModel().fromDataModel(entity.getProfile()));
datasetWizardModel.setDatasetProfileDefinition(datasetManager.getPagedProfile(datasetWizardModel, entity));
ObjectMapper mapper = new ObjectMapper();
String datasetDescriptionJson = mapper.writeValueAsString(datasetWizardModel.getDatasetProfileDefinition());
JsonNode datasetDescriptionObj = mapper.readTree(datasetDescriptionJson);
List<JsonNode> typeNodes = JsonSearcher.findNodes(datasetDescriptionObj, "rdaProperty", "dataset.type");
if (!typeNodes.isEmpty()) {
properties.put(typeNodes.get(0).get("id").asText(), rda.getType());
}
List<JsonNode> languageNodes = JsonSearcher.findNodes(datasetDescriptionObj, "rdaProperty", "dataset.language");
if (!languageNodes.isEmpty() && rda.getLanguage() != null) {
properties.put(languageNodes.get(0).get("id").asText(), rda.getLanguage().value());
}
if (rda.getMetadata() != null) {
properties.putAll(MetadataRDAMapper.toProperties(rda.getMetadata()));
}
if (rda.getDatasetId() != null) {
properties.putAll(DatasetIdRDAMapper.toProperties(rda.getDatasetId(), datasetDescriptionObj));
}
/*List <String> qaIds = rda.getAdditionalProperties().entrySet().stream().filter(entry -> entry.getKey().startsWith("qaId")).map(entry -> entry.getValue().toString()).collect(Collectors.toList());
for (int i = 0; i < qaIds.size(); i++) {
properties.put(qaIds.get(i), rda.getDataQualityAssurance().get(i));
}*/
List<JsonNode> qaNodes = JsonSearcher.findNodes(datasetDescriptionObj, "rdaProperty", "dataset.data_quality_assurance");
if (!qaNodes.isEmpty() && rda.getDataQualityAssurance() != null && !rda.getDataQualityAssurance().isEmpty()) {
ObjectMapper m = new ObjectMapper();
List<String> qas = new ArrayList<>(rda.getDataQualityAssurance());
if(!qas.isEmpty()){
properties.put(qaNodes.get(0).get("id").asText(), m.writeValueAsString(qas));
if(rda.getAdditionalProperties().containsKey("otherDQAID")){
properties.put((String)rda.getAdditionalProperties().get("otherDQAID"), rda.getAdditionalProperties().get("otherDQA"));
}
}
}
List<JsonNode> preservationNodes = JsonSearcher.findNodes(datasetDescriptionObj, "rdaProperty", "dataset.preservation_statement");
if (!preservationNodes.isEmpty()) {
properties.put(preservationNodes.get(0).get("id").asText(), rda.getPreservationStatement());
}
List<JsonNode> issuedNodes = JsonSearcher.findNodes(datasetDescriptionObj, "rdaProperty", "dataset.issued");
if (!issuedNodes.isEmpty()) {
properties.put(issuedNodes.get(0).get("id").asText(), rda.getIssued());
}
if (rda.getDistribution() != null && !rda.getDistribution().isEmpty()) {
properties.putAll(DistributionRDAMapper.toProperties(rda.getDistribution().get(0), datasetDescriptionObj));
}
if (rda.getKeyword() != null) {
List<String> keywordIds = rda.getAdditionalProperties().entrySet().stream().filter(entry -> entry.getKey().startsWith("keyword")).map(entry -> entry.getValue().toString()).collect(Collectors.toList());
// boolean takeAll = false;
// if (keywordIds.size() < rda.getKeyword().size()) {
// takeAll = true;
// }
DatasetCriteria criteria = new DatasetCriteria();
criteria.setHasTags(true);
List<Tag> tags = this.apiContext.getOperationsContext().getElasticRepository().getDatasetRepository().query(criteria).stream().map(eu.eudat.elastic.entities.Dataset::getTags).flatMap(Collection::stream).filter(StreamDistinctBy.distinctByKey(Tag::getId)).collect(Collectors.toList());
if(!rda.getKeyword().isEmpty()){
List<Tag> templateTags = tags.stream().filter(tag -> rda.getKeyword().contains(tag.getName())).collect(Collectors.toList());
properties.put(keywordIds.get(0), mapper.writeValueAsString(templateTags));
// for (int i = 0; i < keywordIds.size(); i++) {
// //if (takeAll) {
// List<String> tags = new ArrayList<>();
// for (String keyword : rda.getKeyword()) {
// tags.add(mapper.writeValueAsString(toTagEntity(keyword)));
// }
// properties.put(keywordIds.get(i), tags);
// } else {
// properties.put(keywordIds.get(i), mapper.writeValueAsString(toTagEntity(rda.getKeyword().get(i))));
// }
// properties.put(keywordIds.get(i), rda.getKeyword().get(i));
// }
}
}
List<JsonNode> personalDataNodes = JsonSearcher.findNodes(datasetDescriptionObj, "rdaProperty", "dataset.personal_data");
if (!personalDataNodes.isEmpty()) {
properties.put(personalDataNodes.get(0).get("id").asText(), rda.getPersonalData().value());
}
if (rda.getSecurityAndPrivacy() != null) {
properties.putAll(SecurityAndPrivacyRDAMapper.toProperties(rda.getSecurityAndPrivacy()));
}
List<JsonNode> sensitiveDataNodes = JsonSearcher.findNodes(datasetDescriptionObj, "rdaProperty", "dataset.sensitive_data");
if (!sensitiveDataNodes.isEmpty()) {
properties.put(sensitiveDataNodes.get(0).get("id").asText(), rda.getSensitiveData().value());
}
if (rda.getTechnicalResource() != null) {
properties.putAll(TechnicalResourceRDAMapper.toProperties(rda.getTechnicalResource()));
}
rda.getAdditionalProperties().entrySet().stream()
.filter(entry -> !entry.getKey().equals("template") && !entry.getKey().startsWith("qaId") && !entry.getKey().startsWith("keyword"))
.forEach(entry -> properties.put(entry.getKey(), entry.getValue()));
entity.setProperties(new ObjectMapper().writeValueAsString(properties));
} catch (Exception e) {
logger.error(e.getMessage(), e);
}
return entity;
}
private static Tag toTagEntity(String name) {
Tag tag = new Tag();
tag.setId("");
tag.setName(name);
return tag;
}
}