file-transformer-rda-json/core/src/main/java/org/opencdmp/filetransformer/rda/model/rda/mapper/DatasetRDAMapper.java

359 lines
20 KiB
Java

package org.opencdmp.filetransformer.rda.model.rda.mapper;
import org.opencdmp.commonmodels.models.description.DescriptionModel;
import org.opencdmp.commonmodels.models.description.PropertyDefinitionFieldSetItemModel;
import org.opencdmp.commonmodels.models.description.PropertyDefinitionFieldSetModel;
import org.opencdmp.commonmodels.models.description.PropertyDefinitionModel;
import org.opencdmp.commonmodels.models.descriptiotemplate.DescriptionTemplateModel;
import org.opencdmp.commonmodels.models.descriptiotemplate.FieldModel;
import org.opencdmp.filetransformer.rda.model.rda.Dataset;
import org.opencdmp.filetransformer.rda.model.rda.DatasetId;
import org.opencdmp.filetransformer.rda.model.rda.Dmp;
import org.opencdmp.filetransformer.rda.model.rda.Language;
import org.opencdmp.filetransformer.rda.service.descriptiontemplatesearcher.TemplateFieldSearcherService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import java.net.URI;
import java.util.*;
import java.util.stream.Collectors;
@Component
public class DatasetRDAMapper {
private static final Logger logger = LoggerFactory.getLogger(DatasetRDAMapper.class);
private final TemplateFieldSearcherService templateFieldSearcherService;
private final LanguageRDAMapper languageRDAMapper;
private final DatasetIdRDAMapper datasetIdRDAMapper;
private final MetadataRDAMapper metadataRDAMapper;
private final DistributionRDAMapper distributionRDAMapper;
private final TechnicalResourceRDAMapper technicalResourceRDAMapper;
private final CostRDAMapper costRDAMapper;
private final SecurityAndPrivacyRDAMapper securityAndPrivacyRDAMapper;
private final KeywordRDAMapper keywordRDAMapper;
@Autowired
public DatasetRDAMapper(TemplateFieldSearcherService templateFieldSearcherService, LanguageRDAMapper languageRDAMapper, DatasetIdRDAMapper datasetIdRDAMapper, MetadataRDAMapper metadataRDAMapper, DistributionRDAMapper distributionRDAMapper, TechnicalResourceRDAMapper technicalResourceRDAMapper, CostRDAMapper costRDAMapper, SecurityAndPrivacyRDAMapper securityAndPrivacyRDAMapper, KeywordRDAMapper keywordRDAMapper) {
this.templateFieldSearcherService = templateFieldSearcherService;
this.languageRDAMapper = languageRDAMapper;
this.datasetIdRDAMapper = datasetIdRDAMapper;
this.metadataRDAMapper = metadataRDAMapper;
this.distributionRDAMapper = distributionRDAMapper;
this.technicalResourceRDAMapper = technicalResourceRDAMapper;
this.costRDAMapper = costRDAMapper;
this.securityAndPrivacyRDAMapper = securityAndPrivacyRDAMapper;
this.keywordRDAMapper = keywordRDAMapper;
}
private List<org.opencdmp.commonmodels.models.description.FieldModel> findValueField(FieldModel fieldModel, PropertyDefinitionModel descriptionTemplateModel){
List<org.opencdmp.commonmodels.models.description.FieldModel> items = new ArrayList<>();
if (descriptionTemplateModel == null || descriptionTemplateModel.getFieldSets() == null) return items;
for (PropertyDefinitionFieldSetModel propertyDefinitionFieldSetModel : descriptionTemplateModel.getFieldSets().values()){
if (propertyDefinitionFieldSetModel.getItems() == null) continue;
for (PropertyDefinitionFieldSetItemModel propertyDefinitionFieldSetItemModel : propertyDefinitionFieldSetModel.getItems()){
if (propertyDefinitionFieldSetItemModel.getFields() == null) continue;
org.opencdmp.commonmodels.models.description.FieldModel valueField = propertyDefinitionFieldSetItemModel.getFields().getOrDefault(fieldModel.getId(), null);
if (valueField != null) items.add(valueField);
}
}
return items;
}
private List<org.opencdmp.commonmodels.models.description.FieldModel> getAllValueFields(PropertyDefinitionModel descriptionTemplateModel){
List<org.opencdmp.commonmodels.models.description.FieldModel> items = new ArrayList<>();
if (descriptionTemplateModel == null || descriptionTemplateModel.getFieldSets() == null) return items;
for (PropertyDefinitionFieldSetModel propertyDefinitionFieldSetModel : descriptionTemplateModel.getFieldSets().values()){
if (propertyDefinitionFieldSetModel.getItems() == null) continue;
for (PropertyDefinitionFieldSetItemModel propertyDefinitionFieldSetItemModel : propertyDefinitionFieldSetModel.getItems()){
if (propertyDefinitionFieldSetItemModel.getFields() == null) continue;
items.addAll(propertyDefinitionFieldSetItemModel.getFields().values());
}
}
return items;
}
public Dataset toRDA(DescriptionModel descriptionEntity, Map<String, Object> extraData) {
if (descriptionEntity == null) return null;
if (descriptionEntity.getLabel() == null) throw new IllegalArgumentException("Dataset Label is missing");
if (extraData == null) throw new IllegalArgumentException("extraData is missing");
Object dmpObject = extraData.getOrDefault("dmp", null);
if (dmpObject == null) throw new IllegalArgumentException("Dmp is missing");
Dmp dmp = (Dmp)dmpObject;
Dataset rda = new Dataset();
rda.setTitle(descriptionEntity.getLabel());
rda.setDescription(descriptionEntity.getDescription());
rda.setAdditionalProperty("template", descriptionEntity.getDescriptionTemplate().getId());
rda.setAdditionalProperty("dmpSectionIndex", descriptionEntity.getSectionId());
//Map<String, Object> templateIdsToValues = this.createFieldIdValueMap(descriptionEntity.getDescriptionTemplate());
//rda.setAdditionalProperty("template", descriptionEntity.getDescriptionTemplate()); //TODO
try {
Map<String, List<FieldModel>> idNodes = this.templateFieldSearcherService.searchFieldsBySemantics(descriptionEntity.getDescriptionTemplate(), "rda.dataset.dataset_id");
if (!idNodes.isEmpty()) {
rda.setDatasetId(datasetIdRDAMapper.toRDA(idNodes, this.getAllValueFields(descriptionEntity.getProperties())));
}
if (rda.getDatasetId() == null) {
rda.setDatasetId(new DatasetId(descriptionEntity.getId().toString(), DatasetId.Type.OTHER));
}
Map<String, List<FieldModel>> typeNodes = this.templateFieldSearcherService.searchFieldsBySemantics(descriptionEntity.getDescriptionTemplate(), "rda.dataset.type");
for (Map.Entry<String, List<FieldModel>> entry : typeNodes.entrySet()){
for (FieldModel typeNode : entry.getValue()) {
List<org.opencdmp.commonmodels.models.description.FieldModel> fieldValues = this.findValueField(typeNode, descriptionEntity.getProperties());
if (!fieldValues.isEmpty()) rda.setType(fieldValues.stream().filter(x -> x.getTextValue() != null && x.getTextValue().isBlank()).map(org.opencdmp.commonmodels.models.description.FieldModel::getTextValue).findFirst().orElse(null));
if (rda.getType() != null && !rda.getType().isBlank()) break;
}
}
if (rda.getType() == null || rda.getType().isBlank()) rda.setType("DMP Dataset");
Map<String, List<FieldModel>> languageNodes = this.templateFieldSearcherService.searchFieldsBySemantics(descriptionEntity.getDescriptionTemplate(), "rda.dataset.language");
for (Map.Entry<String, List<FieldModel>> entry : languageNodes.entrySet()){
for (FieldModel languageNode : entry.getValue()) {
List<org.opencdmp.commonmodels.models.description.FieldModel> fieldValues = this.findValueField(languageNode, descriptionEntity.getProperties());
try {
if (!fieldValues.isEmpty()) rda.setLanguage(fieldValues.stream().filter(x -> x.getTextValue() != null && x.getTextValue().isBlank()).map(x -> Language.fromValue(x.getTextValue())).findFirst().orElse(null));
} catch (IllegalArgumentException e) {
logger.warn("Language from semantic rda.dataset.language was not found.");
}
if (rda.getLanguage() != null) break;
}
}
if (rda.getLanguage() == null) rda.setLanguage(languageRDAMapper.toRDA(descriptionEntity.getDescriptionTemplate().getLanguage()));
Map<String, List<FieldModel>> metadataNodes = this.templateFieldSearcherService.searchFieldsBySemantics(descriptionEntity.getDescriptionTemplate(), "rda.dataset.metadata");
if (!metadataNodes.isEmpty()) {
Map<String, Object> valueFieldsMap = new HashMap<>();
valueFieldsMap.put("valueFields", this.getAllValueFields(descriptionEntity.getProperties()));
rda.setMetadata(metadataRDAMapper.toRDA(metadataNodes, this.getAllValueFields(descriptionEntity.getProperties())));
}else{
rda.setMetadata(new ArrayList<>());
}
//TODO
// List<FieldModel> qaNodes = this.templateFieldSearcherService.searchFieldsBySemantics(descriptionEntity.getDescriptionTemplate(), "rda.dataset.data_quality_assurance");
// if (!qaNodes.isEmpty()) {
// rda.setDataQualityAssurance(qaNodes.stream().filter(qaNode -> qaNode.getData() != null).map(qaNode -> qaNode.getData().getValue()).collect(Collectors.toList()));
// for (int i = 0; i < qaNodes.size(); i++) {
// rda.setAdditionalProperty("qaId" + (i + 1), qaNodes.get(i).getId());
// }
// List<String> qaList = new ArrayList<>();
// String qa;
// for(FieldModel node: qaNodes){
// if (node.getData() == null) {
// continue;
// }
// JsonNode valueNode = mapper.readTree(node.getData().getValue());
// if(valueNode.isArray()){
// Iterator<JsonNode> iter = valueNode.elements();
// while(iter.hasNext()) {
// qa = iter.next().asText();
// qaList.add(qa);
// }
// }
// }
// String data_quality;
// for(FieldModel dqa: qaNodes){
// if (dqa.getData() == null) {
// continue;
// }
// data_quality = dqa.getData().getValue();
// if(!data_quality.isEmpty()){
// qaList.add(data_quality);
// rda.setAdditionalProperty("otherDQAID", dqa.getId());
// rda.setAdditionalProperty("otherDQA", data_quality);
// break;
// }
// }
// rda.setDataQualityAssurance(qaList);
// }else{
// rda.setDataQualityAssurance(new ArrayList<>());
// }
Map<String, List<FieldModel>> preservationNodes = this.templateFieldSearcherService.searchFieldsBySemantics(descriptionEntity.getDescriptionTemplate(), "rda.dataset.preservation_statement");
for (Map.Entry<String, List<FieldModel>> entry : preservationNodes.entrySet()){
for (FieldModel preservationNode : entry.getValue()){
List<org.opencdmp.commonmodels.models.description.FieldModel> fieldValues = this.findValueField(preservationNode, descriptionEntity.getProperties());
if (!fieldValues.isEmpty()) rda.setPreservationStatement(fieldValues.stream().filter(x-> x.getTextValue() != null && x.getTextValue().isBlank()).map(org.opencdmp.commonmodels.models.description.FieldModel::getTextValue).findFirst().orElse(null));
if (rda.getPreservationStatement() != null && !rda.getPreservationStatement().isBlank()) break;
}
}
Map<String, List<FieldModel>> distributionNodes = this.templateFieldSearcherService.searchFieldsBySemantics(descriptionEntity.getDescriptionTemplate(), "rda.dataset.distribution");
if (!distributionNodes.isEmpty()) {
rda.setDistribution(distributionRDAMapper.toRDA(distributionNodes, this.getAllValueFields(descriptionEntity.getProperties())));
}else{
rda.setDistribution(new ArrayList<>());
}
Map<String, List<FieldModel>> keywordNodes = this.templateFieldSearcherService.searchFieldsBySemantics(descriptionEntity.getDescriptionTemplate(), "rda.dataset.keyword");
for (Map.Entry<String, List<FieldModel>> entry : keywordNodes.entrySet()) {
for (FieldModel keywordNode : entry.getValue()) {
List<org.opencdmp.commonmodels.models.description.FieldModel> fieldValues = this.findValueField(keywordNode, descriptionEntity.getProperties());
if (!fieldValues.isEmpty()) {
rda.setKeyword(fieldValues.stream().filter(x -> (x.getTextValue() != null && x.getTextValue().isBlank()) || (x.getTextListValue() != null && !x.getTextListValue().isEmpty())).map(x -> {
if (x.getTextListValue() != null && !x.getTextListValue().isEmpty()) {
return x.getTextListValue().stream().map(node -> keywordRDAMapper.toRDA(node)).collect(Collectors.toList());
} else {
return List.of(keywordRDAMapper.toRDA(x.getTextValue()));
}
}).filter(Objects::nonNull).flatMap(Collection::stream).collect(Collectors.toList()));
}
}
}
if (rda.getKeyword() != null){
int i = 0 ;
for (String keyword : rda.getKeyword()) {
rda.setAdditionalProperty("keyword" + (i + 1), keyword);
i++;
}
}
// else if (apiContext.getOperationsContext().getElasticRepository().getDatasetRepository().exists()) { //TODO
// List<String> tags = apiContext.getOperationsContext().getElasticRepository().getDatasetRepository().findDocument(descriptionEntity.getId().toString()).getTags().stream().map(Tag::getName).collect(Collectors.toList());
// rda.setKeyword(tags);
// }
Map<String, List<FieldModel>> personalDataNodes = this.templateFieldSearcherService.searchFieldsBySemantics(descriptionEntity.getDescriptionTemplate(), "rda.dataset.sensitive_data");
for (Map.Entry<String, List<FieldModel>> entry : personalDataNodes.entrySet()) {
for (FieldModel personalDataNode : entry.getValue()) {
List<org.opencdmp.commonmodels.models.description.FieldModel> fieldValues = this.findValueField(personalDataNode, descriptionEntity.getProperties());
if (!fieldValues.isEmpty()) rda.setPersonalData(fieldValues.stream().filter(x -> x.getTextValue() != null && x.getTextValue().isBlank()).map(x -> Dataset.PersonalData.fromValue(x.getTextValue())).findFirst().orElse(null));
if (rda.getPersonalData() != null) break;
}
}
if (rda.getPersonalData() != null) rda.setPersonalData(Dataset.PersonalData.UNKNOWN);
Map<String, List<FieldModel>> securityAndPrivacyNodes = this.templateFieldSearcherService.searchFieldsBySemantics(descriptionEntity.getDescriptionTemplate(), "rda.dataset.security_and_privacy");
if (!securityAndPrivacyNodes.isEmpty()) {
rda.setSecurityAndPrivacy(securityAndPrivacyRDAMapper.toRDA(securityAndPrivacyNodes, this.getAllValueFields(descriptionEntity.getProperties())));
}else{
rda.setSecurityAndPrivacy(new ArrayList<>());
}
Map<String, List<FieldModel>> sensitiveDataNodes = this.templateFieldSearcherService.searchFieldsBySemantics(descriptionEntity.getDescriptionTemplate(), "rda.dataset.sensitive_data");
for (Map.Entry<String, List<FieldModel>> entry : sensitiveDataNodes.entrySet()) {
for (FieldModel sensitiveDataNode : entry.getValue()) {
List<org.opencdmp.commonmodels.models.description.FieldModel> fieldValues = this.findValueField(sensitiveDataNode, descriptionEntity.getProperties());
if (!fieldValues.isEmpty()) rda.setSensitiveData(fieldValues.stream().filter(x -> x.getTextValue() != null && x.getTextValue().isBlank()).map(x -> Dataset.SensitiveData.fromValue(x.getTextValue())).findFirst().orElse(null));
if (rda.getSensitiveData() != null) break;
}
}
if (rda.getSensitiveData() != null) rda.setSensitiveData(Dataset.SensitiveData.UNKNOWN);
Map<String, List<FieldModel>> technicalResourceNodes = this.templateFieldSearcherService.searchFieldsBySemantics(descriptionEntity.getDescriptionTemplate(), "rda.dataset.technical_resource");
if (!technicalResourceNodes.isEmpty()) {
rda.setTechnicalResource(technicalResourceRDAMapper.toRDA(technicalResourceNodes, this.getAllValueFields(descriptionEntity.getProperties())));
}else{
rda.setTechnicalResource(new ArrayList<>());
}
Map<String, List<FieldModel>> issuedNodes = this.templateFieldSearcherService.searchFieldsBySemantics(descriptionEntity.getDescriptionTemplate(), "rda.dataset.issued");
for (Map.Entry<String, List<FieldModel>> entry : issuedNodes.entrySet()) {
for (FieldModel issuedNode : entry.getValue()) {
List<org.opencdmp.commonmodels.models.description.FieldModel> fieldValues = this.findValueField(issuedNode, descriptionEntity.getProperties());
if (!fieldValues.isEmpty()) rda.setIssued(fieldValues.stream().filter(x -> x.getTextValue() != null && x.getTextValue().isBlank()).map(org.opencdmp.commonmodels.models.description.FieldModel::getTextValue).findFirst().orElse(null));
if (rda.getIssued() != null && !rda.getIssued().isBlank()) break;
}
}
//TODO
// List<FieldModel> contributorNodes = this.templateFieldSearcherService.searchFieldsBySemantics(descriptionEntity.getDescriptionTemplate(), "rda.dmp.contributor");
// if (!contributorNodes.isEmpty()) {
// dmp.getContributor().addAll(contributorNodes.stream().filter(contributorNode -> contributorNode.getData() != null).map(contributorNode -> {
// try {
// JsonNode value = mapper.readTree(contributorNode.getData().getValue());
// if (value.isArray()) {
// return StreamSupport.stream(value.spliterator(), false).map(node -> DmpUserContributorRDAMapper.toRDA(node.asText())).collect(Collectors.toList());
// } else {
// return Collections.singletonList(new Contributor());
// }
// }catch (JsonProcessingException e) {
// return null;
// }
// }).filter(Objects::nonNull).flatMap(Collection::stream).toList());
// dmp.setContributor(dmp.getContributor().stream().filter(contributor -> contributor.getContributorId() != null && contributor.getName() != null).collect(Collectors.toList()));
// }
Map<String, List<FieldModel>> costNodes = this.templateFieldSearcherService.searchFieldsBySemantics(descriptionEntity.getDescriptionTemplate(), "rda.dmp.cost");
if (!costNodes.isEmpty()) {
dmp.getCost().addAll(costRDAMapper.toRDA(costNodes, this.getAllValueFields(descriptionEntity.getProperties())));
}
Map<String, List<FieldModel>> ethicsNodes = this.templateFieldSearcherService.searchFieldsBySemantics(descriptionEntity.getDescriptionTemplate(), "rda.dmp.ethical_issues");
if (!ethicsNodes.isEmpty()) {
for(Map.Entry<String, List<FieldModel>> entry: ethicsNodes.entrySet()) {
for (FieldModel node : entry.getValue()) {
List<org.opencdmp.commonmodels.models.description.FieldModel> fieldValues = this.findValueField(node, descriptionEntity.getProperties());
org.opencdmp.commonmodels.models.description.FieldModel fieldValue = fieldValues.getFirst();
String rdaProperty = node.getSchematics().stream().filter(schematic -> schematic.startsWith("rda.dmp.ethical_issues")).findFirst().orElse("");
if (fieldValue == null) {
continue;
}
String rdaValue = fieldValue.getTextValue();
if (rdaValue == null || rdaValue.isEmpty()) {
continue;
}
if (rdaProperty.contains("exist")) {
try {
Dmp.EthicalIssuesExist exists = dmp.getEthicalIssuesExist();
if (exists == null
|| ((exists == Dmp.EthicalIssuesExist.NO || exists == Dmp.EthicalIssuesExist.UNKNOWN) && rdaValue.equals("yes"))
|| (exists == Dmp.EthicalIssuesExist.YES && !(rdaValue.equals("no") || rdaValue.equals("unknown")))
|| (exists == Dmp.EthicalIssuesExist.UNKNOWN && rdaValue.equals("no"))) {
dmp.setEthicalIssuesExist(Dmp.EthicalIssuesExist.fromValue(rdaValue));
}
} catch (IllegalArgumentException e) {
logger.warn(e.getLocalizedMessage() + ". Setting ethical_issues_exist to unknown");
dmp.setEthicalIssuesExist(Dmp.EthicalIssuesExist.UNKNOWN);
}
} else if (rdaProperty.contains("description")) {
if (dmp.getEthicalIssuesDescription() == null) {
dmp.setEthicalIssuesDescription(rdaValue);
} else {
dmp.setEthicalIssuesDescription(dmp.getEthicalIssuesDescription() + ", " + rdaValue);
}
} else if (rdaProperty.contains("report")) {
try {
dmp.setEthicalIssuesReport(URI.create(rdaValue));
} catch (IllegalArgumentException e) {
logger.warn(e.getLocalizedMessage() + ". Skipping url parsing");
}
}
}
}
}
//TODO
// List<FieldModel> foundNodes = Stream.of(typeNodes, languageNodes, metadataNodes, qaNodes , preservationNodes, distributionNodes,
// keywordNodes, personalDataNodes, securityAndPrivacyNodes, sensitiveDataNodes, technicalResourceNodes).flatMap(Collection::stream).toList();
// templateIdsToValues.entrySet().forEach(entry -> {
// boolean isFound = foundNodes.stream().anyMatch(node -> node.getId().equals(entry.getKey()));
// if (!isFound && entry.getValue() != null && !entry.getValue().toString().isEmpty()) {
// try {
// Instant time = Instant.parse(entry.getValue().toString());
// rda.setAdditionalProperty(entry.getKey(), DateTimeFormatter.ofPattern("yyyy-MM-dd").withZone(ZoneId.systemDefault()).format(time));
// } catch (DateTimeParseException e) {
// rda.setAdditionalProperty(entry.getKey(), entry.getValue());
// }
// }
// });
} catch (Exception e) {
logger.error(e.getMessage(), e);
}
return rda;
}
public DescriptionModel toEntity(Dataset rda, DescriptionTemplateModel defaultProfile) {
throw new UnsupportedOperationException();
}
}