file-transformer-rda-json/core/src/main/java/eu/eudat/file/transformer/rda/mapper/DatasetRDAMapper.java

437 lines
22 KiB
Java

package eu.eudat.file.transformer.rda.mapper;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.ArrayNode;
import eu.eudat.file.transformer.models.description.DescriptionFileTransformerModel;
import eu.eudat.file.transformer.models.descriptiontemplate.DescriptionTemplateFileTransformerModel;
import eu.eudat.file.transformer.models.descriptiontemplate.definition.FieldFileTransformerModel;
import eu.eudat.file.transformer.models.descriptiontemplate.definition.SectionFileTransformerModel;
import eu.eudat.file.transformer.models.tag.TagFileTransformerModel;
import eu.eudat.file.transformer.rda.*;
import eu.eudat.file.transformer.utils.descriptionTemplate.TemplateFieldSearcher;
import eu.eudat.file.transformer.utils.json.JsonSearcher;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import java.net.URI;
import java.time.Instant;
import java.time.ZoneId;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeParseException;
import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
@Component
public class DatasetRDAMapper {
private static final Logger logger = LoggerFactory.getLogger(DatasetRDAMapper.class);
private final ObjectMapper mapper;
@Autowired
public DatasetRDAMapper() {
this.mapper = new ObjectMapper();
}
public Dataset toRDA(DescriptionFileTransformerModel descriptionEntity, Dmp dmp) {
Dataset rda = new Dataset();
// rda.setDatasetId(DatasetIdRDAMapper.toRDA(dataset.getId()));
if (descriptionEntity.getLabel() == null) {
throw new IllegalArgumentException("Dataset Label is missing");
}
Map<String, Object> templateIdsToValues = this.createFieldIdValueMap(descriptionEntity.getDescriptionTemplate());
rda.setTitle(descriptionEntity.getLabel());
rda.setDescription(descriptionEntity.getDescription());
//rda.setAdditionalProperty("template", descriptionEntity.getDescriptionTemplate()); //TODO
try {
List<FieldFileTransformerModel> idNodes = TemplateFieldSearcher.searchFields(descriptionEntity.getDescriptionTemplate(), "schematics", "rda.dataset.dataset_id");
if (!idNodes.isEmpty()) {
rda.setDatasetId(DatasetIdRDAMapper.toRDA(idNodes));
}
if (rda.getDatasetId() == null) {
rda.setDatasetId(new DatasetId(descriptionEntity.getId().toString(), DatasetId.Type.OTHER));
}
List<FieldFileTransformerModel> typeNodes = TemplateFieldSearcher.searchFields(descriptionEntity.getDescriptionTemplate(), "schematics", "rda.dataset.type");
if (!typeNodes.isEmpty() && typeNodes.get(0).getData() != null && !typeNodes.get(0).getData().getValue().isEmpty()) {
rda.setType(typeNodes.get(0).getData().getValue());
} else {
rda.setType("DMP Dataset");
}
List<FieldFileTransformerModel> languageNodes = TemplateFieldSearcher.searchFields(descriptionEntity.getDescriptionTemplate(), "schematics", "rda.dataset.language");
if (!languageNodes.isEmpty() && languageNodes.get(0).getData() != null && !languageNodes.get(0).getData().getValue().isEmpty()) {
String lang = languageNodes.get(0).getData().getValue();
try {
rda.setLanguage(Language.fromValue(lang));
}
catch (IllegalArgumentException e){
//TODO
logger.warn("Language " + lang + " from semantic rda.dataset.language was not found. Setting '" + descriptionEntity.getDescriptionTemplate().getLanguage() +"' as language from the dataset profile.");
rda.setLanguage(LanguageRDAMapper.mapLanguageIsoToRDAIso(descriptionEntity.getDescriptionTemplate().getLanguage()));
}
} else {
//TODO
rda.setLanguage(LanguageRDAMapper.mapLanguageIsoToRDAIso(descriptionEntity.getDescriptionTemplate().getLanguage()));
}
List<FieldFileTransformerModel> metadataNodes = TemplateFieldSearcher.searchFields(descriptionEntity.getDescriptionTemplate(), "schematics", "rda.dataset.metadata");
if (!metadataNodes.isEmpty()) {
rda.setMetadata(MetadataRDAMapper.toRDAList(metadataNodes));
}else{
rda.setMetadata(new ArrayList<>());
}
List<FieldFileTransformerModel> qaNodes = TemplateFieldSearcher.searchFields(descriptionEntity.getDescriptionTemplate(), "schematics", "rda.dataset.data_quality_assurance");
if (!qaNodes.isEmpty()) {
rda.setDataQualityAssurance(qaNodes.stream().filter(qaNode -> qaNode.getData() != null).map(qaNode -> qaNode.getData().getValue()).collect(Collectors.toList()));
for (int i = 0; i < qaNodes.size(); i++) {
rda.setAdditionalProperty("qaId" + (i + 1), qaNodes.get(i).getId());
}
List<String> qaList = new ArrayList<>();
String qa;
for(FieldFileTransformerModel node: qaNodes){
if (node.getData() == null) {
continue;
}
JsonNode valueNode = mapper.readTree(node.getData().getValue());
if(valueNode.isArray()){
Iterator<JsonNode> iter = valueNode.elements();
while(iter.hasNext()) {
qa = iter.next().asText();
qaList.add(qa);
}
}
}
String data_quality;
for(FieldFileTransformerModel dqa: qaNodes){
if (dqa.getData() == null) {
continue;
}
data_quality = dqa.getData().getValue();
if(!data_quality.isEmpty()){
qaList.add(data_quality);
rda.setAdditionalProperty("otherDQAID", dqa.getId());
rda.setAdditionalProperty("otherDQA", data_quality);
break;
}
}
rda.setDataQualityAssurance(qaList);
}else{
rda.setDataQualityAssurance(new ArrayList<>());
}
List<FieldFileTransformerModel> preservationNodes = TemplateFieldSearcher.searchFields(descriptionEntity.getDescriptionTemplate(), "schematics", "rda.dataset.preservation_statement");
if (!preservationNodes.isEmpty() && preservationNodes.get(0).getData() != null && !preservationNodes.get(0).getData().getValue().isEmpty()) {
rda.setPreservationStatement(preservationNodes.get(0).getData().getValue());
}
List<FieldFileTransformerModel> distributionNodes = TemplateFieldSearcher.searchFields(descriptionEntity.getDescriptionTemplate(), "schematics", "rda.dataset.distribution");
if (!distributionNodes.isEmpty()) {
rda.setDistribution(DistributionRDAMapper.toRDAList(distributionNodes));
}else{
rda.setDistribution(new ArrayList<>());
}
List<FieldFileTransformerModel> keywordNodes = TemplateFieldSearcher.searchFields(descriptionEntity.getDescriptionTemplate(), "schematics", "rda.dataset.keyword");
if (!keywordNodes.isEmpty()) {
rda.setKeyword(keywordNodes.stream().filter(keywordNode -> keywordNode.getData() != null).map(keywordNode -> {
try {
JsonNode value = mapper.readTree(keywordNode.getData().getValue());
if (value.isArray()) {
return StreamSupport.stream(value.spliterator(), false).map(node -> KeywordRDAMapper.toRDA(node.toString())).flatMap(Collection::stream).collect(Collectors.toList());
} else {
return KeywordRDAMapper.toRDA(keywordNode.getData().getValue());
}
}catch (JsonProcessingException e) {
logger.error(e.getMessage(), e);
return null;
}
}).filter(Objects::nonNull).flatMap(Collection::stream).collect(Collectors.toList()));
for (int i = 0; i < keywordNodes.size(); i++) {
rda.setAdditionalProperty("keyword" + (i + 1), keywordNodes.get(i).getId());
}
}
// else if (apiContext.getOperationsContext().getElasticRepository().getDatasetRepository().exists()) { //TODO
// List<String> tags = apiContext.getOperationsContext().getElasticRepository().getDatasetRepository().findDocument(descriptionEntity.getId().toString()).getTags().stream().map(Tag::getName).collect(Collectors.toList());
// rda.setKeyword(tags);
// }
List<FieldFileTransformerModel> personalDataNodes = TemplateFieldSearcher.searchFields(descriptionEntity.getDescriptionTemplate(), "schematics", "rda.dataset.personal_data");
if (!personalDataNodes.isEmpty()) {
try{
rda.setPersonalData(personalDataNodes.stream().filter(personalDataNode -> personalDataNode.getData() != null).map(personalDataNode -> Dataset.PersonalData.fromValue(personalDataNode.getData().getValue())).findFirst().get());
}catch(IllegalArgumentException e){
rda.setPersonalData(Dataset.PersonalData.UNKNOWN);
}
} else {
rda.setPersonalData(Dataset.PersonalData.UNKNOWN);
}
List<FieldFileTransformerModel> securityAndPrivacyNodes = TemplateFieldSearcher.searchFields(descriptionEntity.getDescriptionTemplate(), "schematics", "rda.dataset.security_and_privacy");
if (!securityAndPrivacyNodes.isEmpty()) {
rda.setSecurityAndPrivacy(SecurityAndPrivacyRDAMapper.toRDAList(securityAndPrivacyNodes));
}else{
rda.setSecurityAndPrivacy(new ArrayList<>());
}
List<FieldFileTransformerModel> sensitiveDataNodes = TemplateFieldSearcher.searchFields(descriptionEntity.getDescriptionTemplate(), "schematics", "rda.dataset.sensitive_data");
if (!sensitiveDataNodes.isEmpty()) {
try{
rda.setSensitiveData(sensitiveDataNodes.stream().filter(sensitiveDataNode -> sensitiveDataNode.getData() != null).map(sensitiveDataNode -> Dataset.SensitiveData.fromValue(sensitiveDataNode.getData().getValue())).findFirst().get());
}catch(IllegalArgumentException e){
rda.setSensitiveData(Dataset.SensitiveData.UNKNOWN);
}
} else {
rda.setSensitiveData(Dataset.SensitiveData.UNKNOWN);
}
List<FieldFileTransformerModel> technicalResourceNodes = TemplateFieldSearcher.searchFields(descriptionEntity.getDescriptionTemplate(), "schematics", "rda.dataset.technical_resource");
if (!technicalResourceNodes.isEmpty()) {
rda.setTechnicalResource(TechnicalResourceRDAMapper.toRDAList(technicalResourceNodes));
}else{
rda.setTechnicalResource(new ArrayList<>());
}
List<FieldFileTransformerModel> issuedNodes = TemplateFieldSearcher.searchFields(descriptionEntity.getDescriptionTemplate(), "schematics", "rda.dataset.issued");
if (!issuedNodes.isEmpty() && issuedNodes.get(0).getData() != null && !issuedNodes.get(0).getData().getValue().isEmpty()) {
rda.setIssued(issuedNodes.get(0).getData().getValue());
}
List<FieldFileTransformerModel> contributorNodes = TemplateFieldSearcher.searchFields(descriptionEntity.getDescriptionTemplate(), "schematics", "rda.dmp.contributor");
if (!contributorNodes.isEmpty()) {
dmp.getContributor().addAll(contributorNodes.stream().filter(contributorNode -> contributorNode.getData() != null).map(contributorNode -> {
try {
JsonNode value = mapper.readTree(contributorNode.getData().getValue());
if (value.isArray()) {
return StreamSupport.stream(value.spliterator(), false).map(node -> ContributorRDAMapper.toRDA(node.asText())).collect(Collectors.toList());
} else {
return Collections.singletonList(new Contributor());
}
}catch (JsonProcessingException e) {
return null;
}
}).filter(Objects::nonNull).flatMap(Collection::stream).toList());
dmp.setContributor(dmp.getContributor().stream().filter(contributor -> contributor.getContributorId() != null && contributor.getName() != null).collect(Collectors.toList()));
}
List<FieldFileTransformerModel> costNodes = TemplateFieldSearcher.searchFields(descriptionEntity.getDescriptionTemplate(), "schematics", "rda.dmp.cost");
if (!costNodes.isEmpty()) {
dmp.getCost().addAll(CostRDAMapper.toRDAList(costNodes));
}
List<FieldFileTransformerModel> ethicsNodes = TemplateFieldSearcher.searchFields(descriptionEntity.getDescriptionTemplate(), "schematics", "rda.dmp.ethical_issues");
if (!ethicsNodes.isEmpty()) {
for(FieldFileTransformerModel node: ethicsNodes){
String rdaProperty = node.getSchematics().stream().filter(schematic -> schematic.startsWith("rda.dmp.ethical_issues")).findFirst().orElse("");
if (node.getData() == null) {
continue;
}
String rdaValue = node.getData().getValue();
if(rdaValue == null || rdaValue.isEmpty()){
continue;
}
if(rdaProperty.contains("exist")){
try {
Dmp.EthicalIssuesExist exists = dmp.getEthicalIssuesExist();
if(exists == null
|| ((exists == Dmp.EthicalIssuesExist.NO || exists == Dmp.EthicalIssuesExist.UNKNOWN) && rdaValue.equals("yes"))
|| (exists == Dmp.EthicalIssuesExist.YES && !(rdaValue.equals("no") || rdaValue.equals("unknown")))
|| (exists == Dmp.EthicalIssuesExist.UNKNOWN && rdaValue.equals("no"))){
dmp.setEthicalIssuesExist(Dmp.EthicalIssuesExist.fromValue(rdaValue));
}
}catch(IllegalArgumentException e){
logger.warn(e.getLocalizedMessage() + ". Setting ethical_issues_exist to unknown");
dmp.setEthicalIssuesExist(Dmp.EthicalIssuesExist.UNKNOWN);
}
}
else if(rdaProperty.contains("description")){
if(dmp.getEthicalIssuesDescription() == null){
dmp.setEthicalIssuesDescription(rdaValue);
}
else{
dmp.setEthicalIssuesDescription(dmp.getEthicalIssuesDescription() + ", " + rdaValue);
}
}
else if(rdaProperty.contains("report")){
try {
dmp.setEthicalIssuesReport(URI.create(rdaValue));
} catch (IllegalArgumentException e) {
logger.warn(e.getLocalizedMessage() + ". Skipping url parsing");
}
}
}
}
List<FieldFileTransformerModel> foundNodes = Stream.of(typeNodes, languageNodes, metadataNodes, qaNodes, preservationNodes, distributionNodes,
keywordNodes, personalDataNodes, securityAndPrivacyNodes, sensitiveDataNodes, technicalResourceNodes).flatMap(Collection::stream).toList();
templateIdsToValues.entrySet().forEach(entry -> {
boolean isFound = foundNodes.stream().anyMatch(node -> node.getId().equals(entry.getKey()));
if (!isFound && entry.getValue() != null && !entry.getValue().toString().isEmpty()) {
try {
Instant time = Instant.parse(entry.getValue().toString());
rda.setAdditionalProperty(entry.getKey(), DateTimeFormatter.ofPattern("yyyy-MM-dd").withZone(ZoneId.systemDefault()).format(time));
} catch (DateTimeParseException e) {
rda.setAdditionalProperty(entry.getKey(), entry.getValue());
}
}
});
} catch (Exception e) {
logger.error(e.getMessage(), e);
}
return rda;
}
public DescriptionFileTransformerModel toEntity(Dataset rda, DescriptionTemplateFileTransformerModel defaultProfile) {
DescriptionFileTransformerModel entity = new DescriptionFileTransformerModel();
entity.setLabel(rda.getTitle());
entity.setDescription(rda.getDescription());
/*try {
DescriptionTemplateEntity profile = apiContext.getOperationsContext().getDatabaseRepository().getDatasetProfileDao().find(UUID.fromString(rda.getAdditionalProperties().get("template").toString()));
//entity.setDescriptionTemplateId(profile.getId()); //TODO
}catch(Exception e) {
logger.warn(e.getMessage(), e);*/
entity.setDescriptionTemplate(defaultProfile); //TODO
// }
try {
// PropertyDefini properties = new PropertyDefinition();
// properties.setFields(new ArrayList<>());
String datasetDescriptionJson = mapper.writeValueAsString(entity.getDescriptionTemplate());
JsonNode datasetDescriptionObj = mapper.readTree(datasetDescriptionJson);
List<FieldFileTransformerModel> typeNodes = TemplateFieldSearcher.searchFields(entity.getDescriptionTemplate(), "schematics", "rda.dataset.type");
if (!typeNodes.isEmpty()) {
typeNodes.get(0).getData().setValue(rda.getType());
}
List<FieldFileTransformerModel> languageNodes = TemplateFieldSearcher.searchFields(entity.getDescriptionTemplate(), "schematics", "rda.dataset.language");
if (!languageNodes.isEmpty() && rda.getLanguage() != null) {
languageNodes.get(0).getData().setValue(rda.getLanguage().value());
}
//TODO
/*if (rda.getMetadata() != null) {
properties.getFields().addAll(MetadataRDAMapper.toProperties(rda.getMetadata()));
}*/
//TODO
/*if (rda.getDatasetId() != null) {
properties.getFields().addAll(DatasetIdRDAMapper.toProperties(rda.getDatasetId(), datasetDescriptionObj));
}*/
/*List <String> qaIds = rda.getAdditionalProperties().entrySet().stream().filter(entry -> entry.getKey().startsWith("qaId")).map(entry -> entry.getValue().toString()).collect(Collectors.toList());
for (int i = 0; i < qaIds.size(); i++) {
properties.put(qaIds.get(i), rda.getDataQualityAssurance().get(i));
}*/
List<FieldFileTransformerModel> qaNodes = TemplateFieldSearcher.searchFields(entity.getDescriptionTemplate(), "schematics", "rda.dataset.data_quality_assurance");
if (!qaNodes.isEmpty() && rda.getDataQualityAssurance() != null && !rda.getDataQualityAssurance().isEmpty()) {
ObjectMapper m = new ObjectMapper();
List<String> qas = new ArrayList<>(rda.getDataQualityAssurance());
if(!qas.isEmpty()){
qaNodes.get(0).getData().setValue(mapper.writeValueAsString(qas));
if(rda.getAdditionalProperties().containsKey("otherDQAID")){
List<FieldFileTransformerModel> subFields = TemplateFieldSearcher.searchFields(entity.getDescriptionTemplate(), "id", (String) rda.getAdditionalProperties().get("otherDQAID"));
if (subFields != null && !subFields.isEmpty()) {
subFields.get(0).getData().setValue((String) rda.getAdditionalProperties().get("otherDQA"));
}
}
}
}
List<FieldFileTransformerModel> preservationNodes = TemplateFieldSearcher.searchFields(entity.getDescriptionTemplate(), "schematics", "rda.dataset.preservation_statement");
if (!preservationNodes.isEmpty()) {
preservationNodes.get(0).getData().setValue(rda.getPreservationStatement());
}
List<FieldFileTransformerModel> issuedNodes = TemplateFieldSearcher.searchFields(entity.getDescriptionTemplate(), "schematics", "rda.dataset.issued");
if (!issuedNodes.isEmpty()) {
issuedNodes.get(0).getData().setValue(rda.getIssued());
}
//TODO
/*if (rda.getDistribution() != null && !rda.getDistribution().isEmpty()) {
properties.getFields().addAll(DistributionRDAMapper.toProperties(rda.getDistribution().get(0), datasetDescriptionObj));
}*/
if (rda.getKeyword() != null) {
List<String> keywordIds = rda.getAdditionalProperties().entrySet().stream().filter(entry -> entry.getKey().startsWith("keyword")).map(entry -> entry.getValue().toString()).collect(Collectors.toList());
boolean takeAll = false;
if (keywordIds.size() < rda.getKeyword().size()) {
takeAll = true;
}
for (int i = 0; i < keywordIds.size(); i++) {
List<FieldFileTransformerModel> tagField = TemplateFieldSearcher.searchFields(entity.getDescriptionTemplate(), "id", keywordIds.get(i));
if (takeAll) {
List<String> tags = new ArrayList<>();
for (String keyword : rda.getKeyword()) {
tags.add(mapper.writeValueAsString(toTagEntity(keyword)));
}
tagField.get(0).getData().setValue(String.valueOf(tags));
} else {
tagField.get(0).getData().setValue(mapper.writeValueAsString(toTagEntity(rda.getKeyword().get(i))));
}
/*properties.getFields().add(field);
Field field1 = new Field();
field1.setKey(keywordIds.get(i));
field1.setValue(rda.getKeyword().get(i));
properties.getFields().add(field1);*/
}
}
List<FieldFileTransformerModel> personalDataNodes = TemplateFieldSearcher.searchFields(entity.getDescriptionTemplate(), "schematics", "rda.dataset.personal_data");
if (!personalDataNodes.isEmpty()) {
personalDataNodes.get(0).getData().setValue(rda.getPersonalData().value());
}
//TODO
/*if (rda.getSecurityAndPrivacy() != null) {
properties.getFields().addAll(SecurityAndPrivacyRDAMapper.toProperties(rda.getSecurityAndPrivacy()));
}*/
List<FieldFileTransformerModel> sensitiveDataNodes = TemplateFieldSearcher.searchFields(entity.getDescriptionTemplate(), "schematics", "rda.dataset.sensitive_data");
if (!sensitiveDataNodes.isEmpty()) {
sensitiveDataNodes.get(0).getData().setValue(rda.getSensitiveData().value());
}
//TODO
/*if (rda.getTechnicalResource() != null) {
properties.getFields().addAll(TechnicalResourceRDAMapper.toProperties(rda.getTechnicalResource()));
}*/
rda.getAdditionalProperties().entrySet().stream()
.filter(entry -> !entry.getKey().equals("template") && !entry.getKey().startsWith("qaId") && !entry.getKey().startsWith("keyword"))
.forEach(entry -> {
List<FieldFileTransformerModel> field = TemplateFieldSearcher.searchFields(entity.getDescriptionTemplate(), "id", entry.getKey());
field.get(0).getData().setValue((String) entry.getValue());
});
} catch (Exception e) {
logger.error(e.getMessage(), e);
}
return entity;
}
private static TagFileTransformerModel toTagEntity(String name) {
TagFileTransformerModel tag = new TagFileTransformerModel();
tag.setId(UUID.randomUUID());
tag.setLabel(name);
return tag;
}
private Map<String, Object> createFieldIdValueMap(DescriptionTemplateFileTransformerModel template) {
Map<String, Object> result = new HashMap<>();
template.getDefinition().getPages().forEach(page -> page.getSections().forEach(section -> result.putAll(createFieldIdValueMapFromSection(section))));
return result;
}
private Map<String, Object> createFieldIdValueMapFromSection(SectionFileTransformerModel section) {
Map<String, Object> result = new HashMap<>();
if (section.getSections() != null && !section.getSections().isEmpty()) {
section.getSections().forEach(subSection -> result.putAll(createFieldIdValueMapFromSection(subSection)));
}
if (section.getFieldSets() != null && !section.getFieldSets().isEmpty()) {
section.getFieldSets().stream().filter(fieldSet -> fieldSet.getFields() != null && !fieldSet.getFields().isEmpty())
.forEach(fieldSet -> fieldSet.getFields().stream().filter(field -> field.getData() != null).forEach(field -> result.put(field.getId(), field.getData().getValue())));
}
return result;
}
}