package eu.eudat.models.data.rda; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; import com.jayway.jsonpath.JsonPath; import eu.eudat.data.entities.Dataset; import eu.eudat.logic.managers.DatasetManager; import eu.eudat.logic.utilities.builders.XmlBuilder; import eu.eudat.models.data.security.Principal; import org.json.JSONArray; import org.json.JSONObject; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.w3c.dom.Document; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import javax.xml.xpath.*; import java.text.DateFormat; import java.util.*; import static java.util.stream.Collectors.groupingBy; public class DatasetRDAExportModel { private static final Logger logger = LoggerFactory.getLogger(DatasetRDAExportModel.class); private static final ObjectMapper mapper = new ObjectMapper().configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); private Map multiplicityIdToFieldSetId = new HashMap<>(); private List data_quality_assurance; private IdRDAExportModel dataset_id; private String description; private List distribution; private String issued; // Created Date, could also use finalized one. private List keyword; private String language; private List metadata; private String personal_data; // Allowed Values: yes no unknown. private String preservation_statement; private List security_and_privacy; private String sensitive_data; // Allowed Values: yes no unknown. private List technical_resource; private String title; private String type; // Type according to: http://vocabularies.coar-repositories.org/pubby/resource_type.html public List getData_quality_assurance() { return data_quality_assurance; } public void setData_quality_assurance(List data_quality_assurance) { this.data_quality_assurance = data_quality_assurance; } public IdRDAExportModel getDataset_id() { return dataset_id; } public void setDataset_id(IdRDAExportModel dataset_id) { this.dataset_id = dataset_id; } public String getDescription() { return description; } public void setDescription(String description) { this.description = description; } public List getDistribution() { return distribution; } public void setDistribution(List distribution) { this.distribution = distribution; } public String getIssued() { return issued; } public void setIssued(String issued) { this.issued = issued; } public List getKeyword() { return keyword; } public void setKeyword(List keyword) { this.keyword = keyword; } public String getLanguage() { return language; } public void setLanguage(String language) { this.language = language; } public List getMetadata() { return metadata; } public void setMetadata(List metadata) { this.metadata = metadata; } public String getPersonal_data() { return personal_data; } public void setPersonal_data(String personal_data) { this.personal_data = personal_data; } public String getPreservation_statement() { return preservation_statement; } public void setPreservation_statement(String preservation_statement) { this.preservation_statement = preservation_statement; } public List getSecurity_and_privacy() { return security_and_privacy; } public void setSecurity_and_privacy(List security_and_privacy) { this.security_and_privacy = security_and_privacy; } public String getSensitive_data() { return sensitive_data; } public void setSensitive_data(String sensitive_data) { this.sensitive_data = sensitive_data; } public List getTechnical_resource() { return technical_resource; } public void setTechnical_resource(List technical_resource) { this.technical_resource = technical_resource; } public String getTitle() { return title; } public void setTitle(String title) { this.title = title; } public String getType() { return type; } public void setType(String type) { this.type = type; } public DatasetRDAExportModel fromDataModel(Dataset dataset, DatasetManager datasetManager, Principal principal) { // Map of template Ids to rda values. JSONObject jObject = new JSONObject(dataset.getProperties()); Map templateIdsToValues = jObject.toMap(); /*--------- Building dataset rda export model ---------*/ DatasetRDAExportModel datasetRDAExportModel = new DatasetRDAExportModel(); datasetRDAExportModel.setDataset_id(new IdRDAExportModel(dataset.getId().toString(), "other")); if (dataset.getDescription() != null) datasetRDAExportModel.setDescription(dataset.getDescription().replace("\n", " ")); datasetRDAExportModel.setIssued(DateFormat.getDateInstance(DateFormat.SHORT).format(dataset.getCreated())); datasetRDAExportModel.setLanguage("en"); // mock data datasetRDAExportModel.setTitle(dataset.getLabel()); // Transform the answered dataset description to json so we can parse it and fill the rda model. JSONObject datasetDescriptionJson = null; try { String jsonResult = mapper.writeValueAsString(datasetManager.getSingle(dataset.getId().toString(), principal).getDatasetProfileDefinition()); datasetDescriptionJson = new JSONObject(jsonResult); } catch (JsonProcessingException e) { logger.error(e.getMessage(), e); } setMultiplicityIdToFieldSetId(datasetDescriptionJson); /*--------- Building personal data. ---------*/ String personalData = buildSingleProperties("dataset.personal_data", datasetDescriptionJson, templateIdsToValues); if (personalData != null) { datasetRDAExportModel.setPersonal_data(personalData); } else { datasetRDAExportModel.setPersonal_data("unknown"); } /*--------- Building preservation statement. ---------*/ datasetRDAExportModel.setPreservation_statement(buildSingleProperties("dataset.preservation_statement", datasetDescriptionJson, templateIdsToValues)); /*--------- Building sensitive data. ---------*/ String sensitiveData = buildSingleProperties("dataset.sensitive_data", datasetDescriptionJson, templateIdsToValues); if (personalData != null) { datasetRDAExportModel.setSensitive_data(sensitiveData); } else { datasetRDAExportModel.setSensitive_data("unknown"); } /*--------- Building type. ---------*/ datasetRDAExportModel.setType(buildSingleProperties("dataset.type", datasetDescriptionJson, templateIdsToValues)); /*--------- Building data_quality_assurance. ---------*/ datasetRDAExportModel.setData_quality_assurance(buildDataQualityAssurance(datasetDescriptionJson, templateIdsToValues, dataset.getProfile().getDefinition())); /*--------- Building distribution. ---------*/ datasetRDAExportModel.setDistribution(buildDistribution(datasetDescriptionJson, templateIdsToValues, dataset.getProfile().getDefinition())); /*--------- Building keywords. ---------*/ datasetRDAExportModel.setKeyword(buildKeywords(datasetDescriptionJson, templateIdsToValues, dataset.getProfile().getDefinition())); /*--------- Building metadata items. ---------*/ datasetRDAExportModel.setMetadata(buildMetadata(datasetDescriptionJson, templateIdsToValues, dataset.getProfile().getDefinition())); /*--------- Building security and privacy items. ---------*/ datasetRDAExportModel.setSecurity_and_privacy(buildSecurityAndPrivacy(datasetDescriptionJson, templateIdsToValues, dataset.getProfile().getDefinition())); /*--------- Building technical_resource. ---------*/ datasetRDAExportModel.setTechnical_resource(buildTechnicalResource(datasetDescriptionJson, templateIdsToValues, dataset.getProfile().getDefinition())); return datasetRDAExportModel; } private String buildSingleProperties(String rdaKey, JSONObject datasetDescriptionJson, Map templateIdsToValues) { String expression = "$..fields[*][?(@.rdaProperty == \"" + rdaKey + "\" )].id"; List list = jsonValueListFromExpression(datasetDescriptionJson, expression); if (!list.isEmpty()) { return templateIdsToValues.get(list.get(0)).toString(); } else { return null; } } private List buildDataQualityAssurance(JSONObject datasetDescriptionJson, Map templateIdsToValues, String datasetProfileDefinition) { List dataQualityFields = getRDAFieldsFromJson(datasetDescriptionJson, new String[]{"dataset.data_quality_assurance"}, datasetProfileDefinition); for (RdaField rdaField : dataQualityFields) { rdaField.setRdaValue(templateIdsToValues.get(rdaField.getFieldId()).toString()); } List dataQualityAssuranceList = new LinkedList<>(); for (RdaField rdaField : dataQualityFields) { dataQualityAssuranceList.add(rdaField.getRdaValue()); } return dataQualityAssuranceList; } private List buildDistribution(JSONObject datasetDescriptionJson, Map templateIdsToValues, String datasetProfileDefinition) { DatasetDistributionRDAExportModel distributionModel = new DatasetDistributionRDAExportModel(); distributionModel.setAccess_url(buildSingleProperties("dataset.distribution.access_url", datasetDescriptionJson, templateIdsToValues)); distributionModel.setAvailable_till(buildSingleProperties("dataset.distribution.available_till", datasetDescriptionJson, templateIdsToValues)); distributionModel.setByte_size(buildSingleProperties("dataset.distribution.byte_size", datasetDescriptionJson, templateIdsToValues)); distributionModel.setData_access(buildSingleProperties("dataset.distribution.data_access", datasetDescriptionJson, templateIdsToValues)); distributionModel.setDescription(buildSingleProperties("dataset.distribution.description", datasetDescriptionJson, templateIdsToValues)); distributionModel.setDownload_url(buildSingleProperties("dataset.distribution.download_url", datasetDescriptionJson, templateIdsToValues)); distributionModel.setTitle(buildSingleProperties("dataset.distribution.title", datasetDescriptionJson, templateIdsToValues)); /*--------- Building format. ---------*/ // We currently support the return of only one distribution. List distributionList = new LinkedList<>(); if (distributionModel.isValid()) { distributionList.add(distributionModel); } else { DatasetDistributionRDAExportModel model = new DatasetDistributionRDAExportModel(); model.setDescription("Distribution data was not valid"); distributionList.add(model); } return distributionList; } private List buildKeywords(JSONObject datasetDescriptionJson, Map templateIdsToValues, String datasetProfileDefinition) { List keywordFields = getRDAFieldsFromJson(datasetDescriptionJson, new String[]{"dataset.keyword"}, datasetProfileDefinition); for (RdaField rdaField : keywordFields) { rdaField.setRdaValue(templateIdsToValues.get(rdaField.getFieldId()).toString()); } List keywordsList = new LinkedList<>(); for (RdaField rdaField : keywordFields) { keywordsList.add(rdaField.getRdaValue()); } return keywordsList; } private List buildMetadata(JSONObject datasetDescriptionJson, Map templateIdsToValues, String datasetProfileDefinition) { List metadataFields = getRDAFieldsFromJson(datasetDescriptionJson, new String[]{"dataset.metadata.metadata_standard_id.type", "dataset.metadata.metadata_standard_id.identifier", "dataset.metadata.description", "dataset.metadata.language", "dataset.metadata.metadata_standard_id"}, datasetProfileDefinition); // Adding rdaValue and FieldSetIds on metadataFields. for (RdaField rdaField : metadataFields) { rdaField.setRdaValue(templateIdsToValues.get(rdaField.getFieldId()).toString()); } // Group metadataFields based on their field set id. Map> groupedMetadataFields = metadataFields.stream().collect(groupingBy(RdaField::getFieldSetId)); // Creating the metadata. List metadataRDAExportModelList = new LinkedList<>(); for (String fieldSetId : groupedMetadataFields.keySet()) { DatasetMetadataRDAExportModel metadataRda = new DatasetMetadataRDAExportModel(); for (RdaField rdaField : groupedMetadataFields.get(fieldSetId)) { if (rdaField.getRdaProperty().equals("dataset.metadata.metadata_standard_id.identifier")) { if (metadataRda.getMetadata_standard_id() != null) { metadataRda.getMetadata_standard_id().setIdentifier(rdaField.getRdaValue()); } else { metadataRda.setMetadata_standard_id(new IdRDAExportModel(rdaField.getRdaValue(), "other")); } } if (rdaField.getRdaProperty().equals("dataset.metadata.metadata_standard_id.type")) { if (metadataRda.getMetadata_standard_id() != null) { metadataRda.getMetadata_standard_id().setType(rdaField.getRdaValue()); } else { metadataRda.setMetadata_standard_id(new IdRDAExportModel("", rdaField.getRdaValue())); } } if (rdaField.getRdaProperty().equals("dataset.metadata.description")) { metadataRda.setDescription(rdaField.getRdaValue()); } if (rdaField.getRdaProperty().equals("dataset.metadata.language")) { metadataRda.setLanguage(rdaField.getRdaValue()); } if (rdaField.getRdaProperty().equals("dataset.metadata.metadata_standard_id") && !rdaField.getRdaValue().isEmpty()) { JSONArray jsonArray = new JSONArray(rdaField.getRdaValue()); for (int i = 0; i < jsonArray.length(); i++) { JSONObject jsonObject = jsonArray.getJSONObject(i); Map jsonObjectMap = jsonObject.toMap(); DatasetMetadataRDAExportModel metadataRda1 = new DatasetMetadataRDAExportModel(); // metadataRda1.setMetadata_standard_id(new IdRDAExportModel(jsonObjectMap.get("label").toString(), jsonObjectMap.get("source").toString())); metadataRda1.setMetadata_standard_id(new IdRDAExportModel(jsonObjectMap.get("uri").toString(), "url")); metadataRDAExportModelList.add(metadataRda1); } } } if (metadataRda.isValid()) { metadataRDAExportModelList.add(metadataRda); } } return new LinkedList<>(metadataRDAExportModelList); } private List buildSecurityAndPrivacy(JSONObject datasetDescriptionJson, Map templateIdsToValues, String datasetProfileDefinition) { List secAndPrFields = getRDAFieldsFromJson( datasetDescriptionJson, new String[]{"dataset.security_and_privacy.description", "dataset.security_and_privacy.title", "dataset.security_and_privacy"}, datasetProfileDefinition); for (RdaField rdaField : secAndPrFields) { rdaField.setRdaValue(templateIdsToValues.get(rdaField.getFieldId()).toString()); } Map> groupedSecurityAndPrivacyFields = secAndPrFields.stream().collect(groupingBy(RdaField::getFieldSetId)); List securityAndPrivacyRDAExportModelList = new LinkedList<>(); for (String fieldSetId : groupedSecurityAndPrivacyFields.keySet()) { DatasetSecurityAndPrivacyRDAExportModel securityAndPrivacyModel = new DatasetSecurityAndPrivacyRDAExportModel(); for (RdaField rdaField : groupedSecurityAndPrivacyFields.get(fieldSetId)) { if (rdaField.getRdaProperty().equals("dataset.security_and_privacy.description")) { securityAndPrivacyModel.setDescription(rdaField.getRdaValue()); } if (rdaField.getRdaProperty().equals("dataset.security_and_privacy.title")) { securityAndPrivacyModel.setTitle(rdaField.getRdaValue()); } if (rdaField.getRdaProperty().equals("dataset.security_and_privacy")) { JSONArray jsonArray = new JSONArray(rdaField.getRdaValue()); for (int i = 0; i < jsonArray.length(); i++) { JSONObject jsonObject = jsonArray.getJSONObject(i); Map jsonObjectMap = jsonObject.toMap(); DatasetSecurityAndPrivacyRDAExportModel secAndPrivacy = new DatasetSecurityAndPrivacyRDAExportModel(jsonObjectMap.get("label").toString(), jsonObjectMap.get("source").toString()); securityAndPrivacyRDAExportModelList.add(secAndPrivacy); } } } securityAndPrivacyRDAExportModelList.add(securityAndPrivacyModel); } return securityAndPrivacyRDAExportModelList; } private List buildTechnicalResource(JSONObject datasetDescriptionJson, Map templateIdsToValues, String datasetProfileDefinition) { List dataQualityFields = getRDAFieldsFromJson(datasetDescriptionJson, new String[]{"dataset.technical_resource.technical_resource", "dataset.technical_resource.technical_resource.description", "dataset.technical_resource.technical_resource.name"}, datasetProfileDefinition); for (RdaField rdaField : dataQualityFields) { rdaField.setRdaValue(templateIdsToValues.get(rdaField.getFieldId()).toString()); } List technicalResourceList = new LinkedList<>(); Map> groupedDataQualityFields = dataQualityFields.stream().collect(groupingBy(RdaField::getFieldSetId)); for (String fieldSetId : groupedDataQualityFields.keySet()) { DatasetTechnicalResourceRDAExportModel technicalResourceModel = new DatasetTechnicalResourceRDAExportModel(); for (RdaField rdaField : groupedDataQualityFields.get(fieldSetId)) { if (rdaField.getRdaProperty().equals("dataset.technical_resource.technical_resource.description")) { technicalResourceModel.setDescription(rdaField.getRdaValue()); } if (rdaField.getRdaProperty().equals("dataset.technical_resource.technical_resource.name")) { technicalResourceModel.setName(rdaField.getRdaValue()); } if (rdaField.getRdaProperty().equals("dataset.security_and_privacy")) { JSONArray jsonArray = new JSONArray(rdaField.getRdaValue()); for (int i = 0; i < jsonArray.length(); i++) { JSONObject jsonObject = jsonArray.getJSONObject(i); Map jsonObjectMap = jsonObject.toMap(); DatasetTechnicalResourceRDAExportModel technicalResource = new DatasetTechnicalResourceRDAExportModel(jsonObjectMap.get("label").toString(), jsonObjectMap.get("label").toString()); technicalResourceList.add(technicalResource); } } } technicalResourceList.add(technicalResourceModel); } return technicalResourceList; } private void setMultiplicityIdToFieldSetId(JSONObject json) { String multiplicityItemsFieldSetIdExp = "$..multiplicityItems[*].id"; List multiplicityItemsFieldSetIdList = jsonValueListFromExpression(json, multiplicityItemsFieldSetIdExp); for (String fieldSetId : multiplicityItemsFieldSetIdList) { String fieldsFromFieldSetIdExp = "$..multiplicityItems[*][?(@.id == \""+ fieldSetId +"\")].fields[*].id"; List fieldsIdList = jsonValueListFromExpression(json, fieldsFromFieldSetIdExp); for (String fieldId : fieldsIdList) { this.multiplicityIdToFieldSetId.put(fieldId, fieldSetId); } } } private List getRDAFieldsFromJson(JSONObject json, String[] rdaKey, String datasetProfileDefinition) { List rdaFields = new LinkedList<>(); for (String key : rdaKey) { String fieldIdExpression = "$..fields[*][?(@.rdaProperty == \"" + key + "\" )].id"; List listFromExpression = jsonValueListFromExpression(json, fieldIdExpression); for (String fieldId : listFromExpression) { RdaField rdaField = new RdaField(); rdaField.setRdaProperty(key); rdaField.setFieldId(fieldId); if (fieldId.startsWith("multiple_")) { rdaField.setFieldSetId(this.multiplicityIdToFieldSetId.get(fieldId)); } else { rdaField.setFieldSetId(getFieldSetIdForFieldFromXML(datasetProfileDefinition, fieldId)); } rdaFields.add(rdaField); } } return rdaFields; } private List jsonValueListFromExpression(JSONObject json, String expression) { net.minidev.json.JSONArray jsonArray = JsonPath.parse(json.toString()).read(expression); List valueList = new LinkedList<>(); for (Object o : jsonArray) { valueList.add(o.toString()); } return valueList; } private String getFieldSetIdForFieldFromXML(String datasetProfileDefinition, String fieldId) { String fieldSetIdExpression = "//field[@id ='" + fieldId + "']/ancestor::fieldSet/@id"; List listFromExpression = xmlValueListFromExpression(datasetProfileDefinition, fieldSetIdExpression); if (listFromExpression.size() == 1) return listFromExpression.get(0); return null; } private List xmlValueListFromExpression(String xml, String expression) { List valuesList = new LinkedList<>(); Document document = XmlBuilder.fromXml(xml); XPathFactory xpathFactory = XPathFactory.newInstance(); XPath xpath = xpathFactory.newXPath(); try { XPathExpression expr = xpath.compile(expression); NodeList nodeList = (NodeList) expr.evaluate(document, XPathConstants.NODESET); for (int i = 0; i < nodeList.getLength(); i++) { Node node = nodeList.item(i); valuesList.add(node.getNodeValue()); } } catch (XPathExpressionException e) { logger.error(e.getMessage(), e); } return valuesList; } }