Even more improvements to RDA mapping (partial contribution by M.Aldo)

This commit is contained in:
George Kalampokis 2021-12-17 12:59:10 +02:00
parent cff9af75c6
commit eb7d0d04fc
8 changed files with 96 additions and 57 deletions

View File

@ -39,8 +39,9 @@ public class DatasetIdRDAMapper {
finalRDAMap(data, rdaProperty, rdaValue);
}
} catch (IOException e) {
logger.warn(e.getMessage() + ".Passing value as is");
finalRDAMap(data, rdaProperty, rdaValue);
logger.error(e.getMessage(), e);
}
}

View File

@ -68,13 +68,13 @@ public class DatasetRDAMapper {
rda.setDatasetId(new DatasetId(dataset.getId().toString(), DatasetId.Type.OTHER));
}
List<JsonNode> typeNodes = JsonSearcher.findNodes(datasetDescriptionObj, "rdaProperty", "dataset.type");
if (!typeNodes.isEmpty()) {
if (!typeNodes.isEmpty() && !typeNodes.get(0).get("value").asText().isEmpty()) {
rda.setType(typeNodes.get(0).get("value").asText());
} else {
rda.setType("DMP Dataset");
}
List<JsonNode> languageNodes = JsonSearcher.findNodes(datasetDescriptionObj, "rdaProperty", "dataset.language");
if (!languageNodes.isEmpty()) {
if (!languageNodes.isEmpty() && !languageNodes.get(0).get("value").asText().isEmpty()) {
rda.setLanguage(Language.fromValue(languageNodes.get(0).get("value").asText()));
} else {
rda.setLanguage(LanguageRDAMapper.mapLanguageIsoToRDAIso(dataset.getProfile().getLanguage()));
@ -82,6 +82,8 @@ public class DatasetRDAMapper {
List<JsonNode> metadataNodes = JsonSearcher.findNodes(datasetDescriptionObj, "rdaProperty", "dataset.metadata");
if (!metadataNodes.isEmpty()) {
rda.setMetadata(MetadataRDAMapper.toRDAList(metadataNodes));
}else{
rda.setMetadata(new ArrayList<>());
}
List<JsonNode> qaNodes = JsonSearcher.findNodes(datasetDescriptionObj, "rdaProperty", "dataset.data_quality_assurance");
if (!qaNodes.isEmpty()) {
@ -89,19 +91,22 @@ public class DatasetRDAMapper {
for (int i = 0; i < qaNodes.size(); i++) {
rda.setAdditionalProperty("qaId" + (i + 1), qaNodes.get(i).get("id").asText());
}*/
List<String> qaList = new ArrayList<>();
for(JsonNode qaNode: qaNodes){
qaList.add(qaNode.get("value").asText());
}
List<String> qaList = qaNodes.stream()
.map(qaNode -> qaNode.get("value").asText())
.collect(Collectors.toList());
rda.setDataQualityAssurance(qaList);
}else{
rda.setDataQualityAssurance(new ArrayList<>());
}
List<JsonNode> preservationNodes = JsonSearcher.findNodes(datasetDescriptionObj, "rdaProperty", "dataset.preservation_statement");
if (!preservationNodes.isEmpty()) {
if (!preservationNodes.isEmpty() && !preservationNodes.get(0).get("value").asText().isEmpty()) {
rda.setPreservationStatement(preservationNodes.get(0).get("value").asText());
}
List<JsonNode> distributionNodes = JsonSearcher.findNodes(datasetDescriptionObj, "rdaProperty", "dataset.distribution");
if (!distributionNodes.isEmpty()) {
rda.setDistribution(DistributionRDAMapper.toRDAList(distributionNodes));
}else{
rda.setDistribution(new ArrayList<>());
}
List<JsonNode> keywordNodes = JsonSearcher.findNodes(datasetDescriptionObj, "rdaProperty", "dataset.keyword");
if (!keywordNodes.isEmpty()) {
@ -129,6 +134,8 @@ public class DatasetRDAMapper {
List<JsonNode> securityAndPrivacyNodes = JsonSearcher.findNodes(datasetDescriptionObj, "rdaProperty", "dataset.security_and_privacy");
if (!securityAndPrivacyNodes.isEmpty()) {
rda.setSecurityAndPrivacy(SecurityAndPrivacyRDAMapper.toRDAList(securityAndPrivacyNodes));
}else{
rda.setSecurityAndPrivacy(new ArrayList<>());
}
List<JsonNode> sensitiveDataNodes = JsonSearcher.findNodes(datasetDescriptionObj, "rdaProperty", "dataset.sensitive_data");
if (!sensitiveDataNodes.isEmpty()) {
@ -139,9 +146,11 @@ public class DatasetRDAMapper {
List<JsonNode> technicalResourceNodes = JsonSearcher.findNodes(datasetDescriptionObj, "rdaProperty", "dataset.technical_resource");
if (!technicalResourceNodes.isEmpty()) {
rda.setTechnicalResource(TechnicalResourceRDAMapper.toRDAList(technicalResourceNodes));
}else{
rda.setTechnicalResource(new ArrayList<>());
}
List<JsonNode> issuedNodes = JsonSearcher.findNodes(datasetDescriptionObj, "rdaProperty", "dataset.issued");
if (!issuedNodes.isEmpty()) {
if (!issuedNodes.isEmpty() && !issuedNodes.get(0).get("value").asText().isEmpty()) {
rda.setIssued(issuedNodes.get(0).get("value").asText());
}
List<JsonNode> contributorNodes = JsonSearcher.findNodes(datasetDescriptionObj, "rdaProperty", "dmp.contributor");

View File

@ -4,6 +4,7 @@ import com.fasterxml.jackson.databind.JsonNode;
import eu.eudat.logic.utilities.helpers.MyStringUtils;
import eu.eudat.logic.utilities.json.JsonSearcher;
import eu.eudat.models.rda.Distribution;
import eu.eudat.models.rda.License;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -20,6 +21,9 @@ public class DistributionRDAMapper {
for (JsonNode node: nodes) {
String rdaProperty = node.get("rdaProperty").asText();
String rdaValue = node.get("value").asText();
if(rdaValue == null || rdaValue.isEmpty()){
continue;
}
String key = node.get("numbering").asText();
if(!key.contains("mult")){
key = "0";
@ -70,10 +74,11 @@ public class DistributionRDAMapper {
break;
case LICENSE:
List<JsonNode> licenseNodes = nodes.stream().filter(lnode -> lnode.get("rdaProperty").asText().toLowerCase().contains("license")).collect(Collectors.toList());
rda.setLicense(Collections.singletonList(LicenseRDAMapper.toRDA(licenseNodes)));
License license = LicenseRDAMapper.toRDA(licenseNodes);
rda.setLicense(license != null? Collections.singletonList(license): new ArrayList<>());
break;
case FORMAT:
rda.setFormat(Collections.singletonList(rdaValue));
rda.setFormat(new ArrayList<>(Arrays.asList(rdaValue.replace(" ", "").split(","))));
rda.setAdditionalProperty(ImportPropertyName.FORMAT.getName(), node.get("id").asText());
break;
case TITLE:
@ -89,8 +94,8 @@ public class DistributionRDAMapper {
}
}
}
return new ArrayList<>(rdaMap.values());
return rdaMap.values().stream()
.filter(distro -> distro.getTitle() != null).collect(Collectors.toList());
}
public static Map<String, String> toProperties(List<Distribution> rdas) {

View File

@ -8,10 +8,8 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.net.URI;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.*;
import java.util.stream.Collectors;
public class HostRDAMapper {
private static final Logger logger = LoggerFactory.getLogger(HostRDAMapper.class);
@ -24,6 +22,9 @@ public class HostRDAMapper {
int firstDiff = MyStringUtils.getFirstDifference(numbering, node.get("numbering").asText());
if (firstDiff == -1 || firstDiff > 2) {
String rdaValue = node.get("value").asText();
if(rdaValue == null || rdaValue.isEmpty()){
continue;
}
for (ExportPropertyName propertyName: ExportPropertyName.values()) {
if (rdaProperty.contains(propertyName.getName())) {
switch (propertyName) {
@ -39,9 +40,6 @@ public class HostRDAMapper {
rda.setBackupType(rdaValue);
break;
case CERTIFIED_WITH:
if(rdaValue == null || rdaValue.isEmpty()){
break;
}
rda.setCertifiedWith(Host.CertifiedWith.fromValue(rdaValue));
rda.setAdditionalProperty(ImportPropertyName.CERTIFIED_WITH.getName(), node.get("id").asText());
break;
@ -50,24 +48,30 @@ public class HostRDAMapper {
rda.setAdditionalProperty(ImportPropertyName.DESCRIPTION.getName(), node.get("id").asText());
break;
case GEO_LOCATION:
if(rdaValue == null || rdaValue.isEmpty()){
break;
}
rda.setGeoLocation(Host.GeoLocation.fromValue(rdaValue));
rda.setAdditionalProperty(ImportPropertyName.GEO_LOCATION.getName(), node.get("id").asText());
break;
case PID_SYSTEM:
rda.setPidSystem(Collections.singletonList(PidSystem.fromValue(rdaValue)));
rda.setAdditionalProperty(ImportPropertyName.PID_SYSTEM.getName(), node.get("id").asText());
try{
Iterator<JsonNode> iter = node.get("value").elements();
List<String> pList = new ArrayList<>();
while(iter.hasNext()) {
pList.add(iter.next().asText());
}
List<PidSystem> pidList = pList.stream().map(PidSystem::fromValue).collect(Collectors.toList());
rda.setPidSystem(pidList);
rda.setAdditionalProperty(ImportPropertyName.PID_SYSTEM.getName(), node.get("id").asText());
}
catch (IllegalArgumentException e){
rda.setPidSystem(new ArrayList<>());
break;
}
break;
case STORAGE_TYPE:
rda.setStorageType(rdaValue);
rda.setAdditionalProperty(ImportPropertyName.STORAGE_TYPE.getName(), node.get("id").asText());
break;
case SUPPORT_VERSIONING:
if(rdaValue == null || rdaValue.isEmpty()){
break;
}
rda.setSupportVersioning(Host.SupportVersioning.fromValue(rdaValue));
rda.setAdditionalProperty(ImportPropertyName.SUPPORT_VERSIONING.getName(), node.get("id").asText());
break;
@ -76,8 +80,12 @@ public class HostRDAMapper {
rda.setAdditionalProperty(ImportPropertyName.TITLE.getName(), node.get("id").asText());
break;
case URL:
rda.setUrl(URI.create(rdaValue));
rda.setAdditionalProperty(ImportPropertyName.URL.getName(), node.get("id").asText());
try {
rda.setUrl(URI.create(rdaValue));
rda.setAdditionalProperty(ImportPropertyName.URL.getName(), node.get("id").asText());
} catch (IllegalArgumentException e) {
logger.warn(e.getLocalizedMessage() + ". Skipping url parsing");
}
break;
}
}
@ -85,13 +93,9 @@ public class HostRDAMapper {
}
}
}
if (rda.getTitle() == null) {
throw new IllegalArgumentException("Host Title is missing");
}
if (rda.getUrl() == null) {
throw new IllegalArgumentException("Host Url is missing");
if(rda.getTitle() == null || rda.getUrl() == null){
return null;
}
return rda;

View File

@ -1,5 +1,6 @@
package eu.eudat.models.rda.mapper;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.eudat.elastic.entities.Tag;
import eu.eudat.logic.utilities.json.JavaToJson;
@ -15,16 +16,20 @@ public class KeywordRDAMapper {
public static List<String> toRDA(String value) {
ObjectMapper mapper = new ObjectMapper();
try {
value = JavaToJson.objectStringToJson(value);
if (!value.isEmpty()) {
List<Tag> tags = Arrays.asList(mapper.readValue(value, Tag[].class));
List<String> keywordNames = tags.stream().map(Tag::getName).collect(Collectors.toList());
return keywordNames;
try {
List<Tag> tags = Arrays.asList(mapper.readValue(value, Tag[].class));
List<String> keywordNames = tags.stream().map(Tag::getName).collect(Collectors.toList());
return keywordNames;
} catch (JsonProcessingException e) {
logger.warn(e.getMessage() + ". Attempting to parse it as a String list.");
if(!value.isEmpty()) {
return new ArrayList<>(Arrays.asList(value.replace(" ", "").split(",")));
}
}
}
} catch (IOException e) {
logger.error(e.getMessage(), e);
}
return new ArrayList<>();
}

View File

@ -3,6 +3,8 @@ package eu.eudat.models.rda.mapper;
import com.fasterxml.jackson.databind.JsonNode;
import eu.eudat.logic.utilities.json.JsonSearcher;
import eu.eudat.models.rda.License;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.net.URI;
import java.util.HashMap;
@ -10,18 +12,25 @@ import java.util.List;
import java.util.Map;
public class LicenseRDAMapper {
private static final Logger logger = LoggerFactory.getLogger(LicenseRDAMapper.class);
public static License toRDA(List<JsonNode> nodes) {
License rda = new License();
for (JsonNode node: nodes) {
String rdaProperty = node.get("rdaProperty").asText();
String value = node.get("value").asText();
if(value == null || value.isEmpty()){
continue;
}
for (LicenceProperties licenceProperties: LicenceProperties.values()) {
if (rdaProperty.contains(licenceProperties.getName())) {
switch (licenceProperties) {
case LICENSE_REF:
rda.setLicenseRef(URI.create(value));
try {
rda.setLicenseRef(URI.create(value));
} catch (IllegalArgumentException e) {
logger.warn(e.getLocalizedMessage() + ". Skipping url parsing");
}
break;
case START_DATE:
rda.setStartDate(value);
@ -38,13 +47,9 @@ public class LicenseRDAMapper {
rda.setAdditionalProperty("start_dateId", node.get("id").asText());
}*/
}
if (rda.getLicenseRef() == null) {
throw new IllegalArgumentException("Licence Reference is missing");
}
if (rda.getStartDate() == null) {
throw new IllegalArgumentException("License Start Date is missing");
if(rda.getLicenseRef() == null || rda.getStartDate() == null){
return null;
}
return rda;

View File

@ -7,6 +7,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.*;
import java.util.stream.Collectors;
public class SecurityAndPrivacyRDAMapper {
private static final Logger logger = LoggerFactory.getLogger(SecurityAndPrivacyRDAMapper.class);
@ -17,7 +18,9 @@ public class SecurityAndPrivacyRDAMapper {
for (JsonNode node: nodes) {
String rdaProperty = node.get("rdaProperty").asText();
String rdaValue = node.get("value").asText();
if(rdaValue == null || rdaValue.isEmpty()){
continue;
}
SecurityAndPrivacy rda = getRelative(rdaMap, node.get("numbering").asText());
if (!rdaMap.containsValue(rda)) {
rdaMap.put(node.get("numbering").asText(), rda);
@ -38,7 +41,9 @@ public class SecurityAndPrivacyRDAMapper {
}
}
return new ArrayList<>(rdaMap.values());
return rdaMap.values().stream()
.filter(sap -> sap.getTitle() != null)
.collect(Collectors.toList());
}
public static Map<String, String> toProperties(List<SecurityAndPrivacy> rdas) {

View File

@ -7,6 +7,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.*;
import java.util.stream.Collectors;
public class TechnicalResourceRDAMapper {
private static final Logger logger = LoggerFactory.getLogger(TechnicalResourceRDAMapper.class);
@ -17,7 +18,9 @@ public class TechnicalResourceRDAMapper {
for (JsonNode node: nodes) {
String rdaProperty = node.get("rdaProperty").asText();
String rdaValue = node.get("value").asText();
if(rdaValue == null || rdaValue.isEmpty()){
continue;
}
TechnicalResource rda = getRelative(rdaMap, node.get("numbering").asText());
if (!rdaMap.containsValue(rda)) {
rdaMap.put(node.get("numbering").asText(), rda);
@ -38,7 +41,9 @@ public class TechnicalResourceRDAMapper {
}
}
return new ArrayList<>(rdaMap.values());
return rdaMap.values().stream()
.filter(tr -> tr.getName() != null)
.collect(Collectors.toList());
}
public static Map<String, String> toProperties(List<TechnicalResource> rdas) {