You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
argos/dmp-backend/web/src/main/java/eu/eudat/models/data/rda/DatasetRDAExportModel.java

475 lines
21 KiB
Java

package eu.eudat.models.data.rda;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.jayway.jsonpath.JsonPath;
import eu.eudat.data.entities.Dataset;
import eu.eudat.logic.managers.DatasetManager;
import eu.eudat.logic.utilities.builders.XmlBuilder;
import eu.eudat.models.data.security.Principal;
import org.json.JSONArray;
import org.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import javax.xml.xpath.*;
import java.text.DateFormat;
import java.util.*;
import static java.util.stream.Collectors.groupingBy;
public class DatasetRDAExportModel {
private static final Logger logger = LoggerFactory.getLogger(DatasetRDAExportModel.class);
private static final ObjectMapper mapper = new ObjectMapper().configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
private Map<String, String> multiplicityIdToFieldSetId = new HashMap<>();
private List<String> data_quality_assurance;
private IdRDAExportModel dataset_id;
private String description;
private List<DatasetDistributionRDAExportModel> distribution;
private String issued; // Created Date, could also use finalized one.
private List<String> keyword;
private String language;
private List<DatasetMetadataRDAExportModel> metadata;
private String personal_data; // Allowed Values: yes no unknown.
private String preservation_statement;
private List<DatasetSecurityAndPrivacyRDAExportModel> security_and_privacy;
private String sensitive_data; // Allowed Values: yes no unknown.
private List<DatasetTechnicalResourceRDAExportModel> technical_resource;
private String title;
private String type; // Type according to: http://vocabularies.coar-repositories.org/pubby/resource_type.html
public List<String> getData_quality_assurance() {
return data_quality_assurance;
}
public void setData_quality_assurance(List<String> data_quality_assurance) {
this.data_quality_assurance = data_quality_assurance;
}
public IdRDAExportModel getDataset_id() {
return dataset_id;
}
public void setDataset_id(IdRDAExportModel dataset_id) {
this.dataset_id = dataset_id;
}
public String getDescription() {
return description;
}
public void setDescription(String description) {
this.description = description;
}
public List<DatasetDistributionRDAExportModel> getDistribution() {
return distribution;
}
public void setDistribution(List<DatasetDistributionRDAExportModel> distribution) {
this.distribution = distribution;
}
public String getIssued() {
return issued;
}
public void setIssued(String issued) {
this.issued = issued;
}
public List<String> getKeyword() {
return keyword;
}
public void setKeyword(List<String> keyword) {
this.keyword = keyword;
}
public String getLanguage() {
return language;
}
public void setLanguage(String language) {
this.language = language;
}
public List<DatasetMetadataRDAExportModel> getMetadata() {
return metadata;
}
public void setMetadata(List<DatasetMetadataRDAExportModel> metadata) {
this.metadata = metadata;
}
public String getPersonal_data() {
return personal_data;
}
public void setPersonal_data(String personal_data) {
this.personal_data = personal_data;
}
public String getPreservation_statement() {
return preservation_statement;
}
public void setPreservation_statement(String preservation_statement) {
this.preservation_statement = preservation_statement;
}
public List<DatasetSecurityAndPrivacyRDAExportModel> getSecurity_and_privacy() {
return security_and_privacy;
}
public void setSecurity_and_privacy(List<DatasetSecurityAndPrivacyRDAExportModel> security_and_privacy) {
this.security_and_privacy = security_and_privacy;
}
public String getSensitive_data() {
return sensitive_data;
}
public void setSensitive_data(String sensitive_data) {
this.sensitive_data = sensitive_data;
}
public List<DatasetTechnicalResourceRDAExportModel> getTechnical_resource() {
return technical_resource;
}
public void setTechnical_resource(List<DatasetTechnicalResourceRDAExportModel> technical_resource) {
this.technical_resource = technical_resource;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
public DatasetRDAExportModel fromDataModel(Dataset dataset, DatasetManager datasetManager, Principal principal) {
// Map of template Ids to rda values.
JSONObject jObject = new JSONObject(dataset.getProperties());
Map<String, Object> templateIdsToValues = jObject.toMap();
/*--------- Building dataset rda export model ---------*/
DatasetRDAExportModel datasetRDAExportModel = new DatasetRDAExportModel();
datasetRDAExportModel.setDataset_id(new IdRDAExportModel(dataset.getId().toString(), "other"));
if (dataset.getDescription() != null) datasetRDAExportModel.setDescription(dataset.getDescription().replace("\n", " "));
datasetRDAExportModel.setIssued(DateFormat.getDateInstance(DateFormat.SHORT).format(dataset.getCreated()));
datasetRDAExportModel.setLanguage("en"); // mock data
datasetRDAExportModel.setTitle(dataset.getLabel());
// Transform the answered dataset description to json so we can parse it and fill the rda model.
JSONObject datasetDescriptionJson = null;
try {
String jsonResult = mapper.writeValueAsString(datasetManager.getSingle(dataset.getId().toString(), principal).getDatasetProfileDefinition());
datasetDescriptionJson = new JSONObject(jsonResult);
} catch (JsonProcessingException e) {
logger.error(e.getMessage(), e);
}
setMultiplicityIdToFieldSetId(datasetDescriptionJson);
/*--------- Building personal data. ---------*/
String personalData = buildSingleProperties("dataset.personal_data", datasetDescriptionJson, templateIdsToValues);
if (personalData != null) {
datasetRDAExportModel.setPersonal_data(personalData);
} else {
datasetRDAExportModel.setPersonal_data("unknown");
}
/*--------- Building preservation statement. ---------*/
datasetRDAExportModel.setPreservation_statement(buildSingleProperties("dataset.preservation_statement", datasetDescriptionJson, templateIdsToValues));
/*--------- Building sensitive data. ---------*/
String sensitiveData = buildSingleProperties("dataset.sensitive_data", datasetDescriptionJson, templateIdsToValues);
if (personalData != null) {
datasetRDAExportModel.setSensitive_data(sensitiveData);
} else {
datasetRDAExportModel.setSensitive_data("unknown");
}
/*--------- Building type. ---------*/
datasetRDAExportModel.setType(buildSingleProperties("dataset.type", datasetDescriptionJson, templateIdsToValues));
/*--------- Building data_quality_assurance. ---------*/
datasetRDAExportModel.setData_quality_assurance(buildDataQualityAssurance(datasetDescriptionJson, templateIdsToValues, dataset.getProfile().getDefinition()));
/*--------- Building distribution. ---------*/
datasetRDAExportModel.setDistribution(buildDistribution(datasetDescriptionJson, templateIdsToValues, dataset.getProfile().getDefinition()));
/*--------- Building keywords. ---------*/
datasetRDAExportModel.setKeyword(buildKeywords(datasetDescriptionJson, templateIdsToValues, dataset.getProfile().getDefinition()));
/*--------- Building metadata items. ---------*/
datasetRDAExportModel.setMetadata(buildMetadata(datasetDescriptionJson, templateIdsToValues, dataset.getProfile().getDefinition()));
/*--------- Building security and privacy items. ---------*/
datasetRDAExportModel.setSecurity_and_privacy(buildSecurityAndPrivacy(datasetDescriptionJson, templateIdsToValues, dataset.getProfile().getDefinition()));
/*--------- Building technical_resource. ---------*/
datasetRDAExportModel.setTechnical_resource(buildTechnicalResource(datasetDescriptionJson, templateIdsToValues, dataset.getProfile().getDefinition()));
return datasetRDAExportModel;
}
private String buildSingleProperties(String rdaKey, JSONObject datasetDescriptionJson, Map<String, Object> templateIdsToValues) {
String expression = "$..fields[*][?(@.rdaProperty == \"" + rdaKey + "\" )].id";
List<String> list = jsonValueListFromExpression(datasetDescriptionJson, expression);
if (!list.isEmpty()) {
return templateIdsToValues.get(list.get(0)).toString();
} else {
return null;
}
}
private List<String> buildDataQualityAssurance(JSONObject datasetDescriptionJson, Map<String, Object> templateIdsToValues, String datasetProfileDefinition) {
List<RdaField> dataQualityFields = getRDAFieldsFromJson(datasetDescriptionJson, new String[]{"dataset.data_quality_assurance"}, datasetProfileDefinition);
for (RdaField rdaField : dataQualityFields) {
rdaField.setRdaValue(templateIdsToValues.get(rdaField.getFieldId()).toString());
}
List<String> dataQualityAssuranceList = new LinkedList<>();
for (RdaField rdaField : dataQualityFields) {
dataQualityAssuranceList.add(rdaField.getRdaValue());
}
return dataQualityAssuranceList;
}
private List<DatasetDistributionRDAExportModel> buildDistribution(JSONObject datasetDescriptionJson, Map<String, Object> templateIdsToValues, String datasetProfileDefinition) {
DatasetDistributionRDAExportModel distributionModel = new DatasetDistributionRDAExportModel();
distributionModel.setAccess_url(buildSingleProperties("dataset.distribution.access_url", datasetDescriptionJson, templateIdsToValues));
distributionModel.setAvailable_till(buildSingleProperties("dataset.distribution.available_till", datasetDescriptionJson, templateIdsToValues));
distributionModel.setByte_size(buildSingleProperties("dataset.distribution.byte_size", datasetDescriptionJson, templateIdsToValues));
distributionModel.setData_access(buildSingleProperties("dataset.distribution.data_access", datasetDescriptionJson, templateIdsToValues));
distributionModel.setDescription(buildSingleProperties("dataset.distribution.description", datasetDescriptionJson, templateIdsToValues));
distributionModel.setDownload_url(buildSingleProperties("dataset.distribution.download_url", datasetDescriptionJson, templateIdsToValues));
distributionModel.setTitle(buildSingleProperties("dataset.distribution.title", datasetDescriptionJson, templateIdsToValues));
/*--------- Building format. ---------*/
// We currently support the return of only one distribution.
List<DatasetDistributionRDAExportModel> distributionList = new LinkedList<>();
if (distributionModel.isValid()) {
distributionList.add(distributionModel);
} else {
DatasetDistributionRDAExportModel model = new DatasetDistributionRDAExportModel();
model.setDescription("Distribution data was not valid");
distributionList.add(model);
}
return distributionList;
}
private List<String> buildKeywords(JSONObject datasetDescriptionJson, Map<String, Object> templateIdsToValues, String datasetProfileDefinition) {
List<RdaField> keywordFields = getRDAFieldsFromJson(datasetDescriptionJson, new String[]{"dataset.keyword"}, datasetProfileDefinition);
for (RdaField rdaField : keywordFields) {
rdaField.setRdaValue(templateIdsToValues.get(rdaField.getFieldId()).toString());
}
List<String> keywordsList = new LinkedList<>();
for (RdaField rdaField : keywordFields) {
keywordsList.add(rdaField.getRdaValue());
}
return keywordsList;
}
private List<DatasetMetadataRDAExportModel> buildMetadata(JSONObject datasetDescriptionJson, Map<String, Object> templateIdsToValues, String datasetProfileDefinition) {
List<RdaField> metadataFields = getRDAFieldsFromJson(datasetDescriptionJson,
new String[]{"dataset.metadata.metadata_standard_id.type", "dataset.metadata.metadata_standard_id.identifier", "dataset.metadata.description", "dataset.metadata.language", "dataset.metadata.metadata_standard_id"},
datasetProfileDefinition);
// Adding rdaValue and FieldSetIds on metadataFields.
for (RdaField rdaField : metadataFields) {
rdaField.setRdaValue(templateIdsToValues.get(rdaField.getFieldId()).toString());
}
// Group metadataFields based on their field set id.
Map<String, List<RdaField>> groupedMetadataFields = metadataFields.stream().collect(groupingBy(RdaField::getFieldSetId));
// Creating the metadata.
List<DatasetMetadataRDAExportModel> metadataRDAExportModelList = new LinkedList<>();
for (String fieldSetId : groupedMetadataFields.keySet()) {
DatasetMetadataRDAExportModel metadataRda = new DatasetMetadataRDAExportModel();
for (RdaField rdaField : groupedMetadataFields.get(fieldSetId)) {
if (rdaField.getRdaProperty().equals("dataset.metadata.metadata_standard_id.identifier")) {
if (metadataRda.getMetadata_standard_id() != null) {
metadataRda.getMetadata_standard_id().setIdentifier(rdaField.getRdaValue());
} else {
metadataRda.setMetadata_standard_id(new IdRDAExportModel(rdaField.getRdaValue(), "other"));
}
}
if (rdaField.getRdaProperty().equals("dataset.metadata.metadata_standard_id.type")) {
if (metadataRda.getMetadata_standard_id() != null) {
metadataRda.getMetadata_standard_id().setType(rdaField.getRdaValue());
} else {
metadataRda.setMetadata_standard_id(new IdRDAExportModel("", rdaField.getRdaValue()));
}
}
if (rdaField.getRdaProperty().equals("dataset.metadata.description")) {
metadataRda.setDescription(rdaField.getRdaValue());
}
if (rdaField.getRdaProperty().equals("dataset.metadata.language")) {
metadataRda.setLanguage(rdaField.getRdaValue());
}
if (rdaField.getRdaProperty().equals("dataset.metadata.metadata_standard_id") && !rdaField.getRdaValue().isEmpty()) {
JSONArray jsonArray = new JSONArray(rdaField.getRdaValue());
for (int i = 0; i < jsonArray.length(); i++) {
JSONObject jsonObject = jsonArray.getJSONObject(i);
Map<String, Object> jsonObjectMap = jsonObject.toMap();
DatasetMetadataRDAExportModel metadataRda1 = new DatasetMetadataRDAExportModel();
// metadataRda1.setMetadata_standard_id(new IdRDAExportModel(jsonObjectMap.get("label").toString(), jsonObjectMap.get("source").toString()));
metadataRda1.setMetadata_standard_id(new IdRDAExportModel(jsonObjectMap.get("uri").toString(), "url"));
metadataRDAExportModelList.add(metadataRda1);
}
}
}
if (metadataRda.isValid()) {
metadataRDAExportModelList.add(metadataRda);
}
}
return new LinkedList<>(metadataRDAExportModelList);
}
private List<DatasetSecurityAndPrivacyRDAExportModel> buildSecurityAndPrivacy(JSONObject datasetDescriptionJson, Map<String, Object> templateIdsToValues, String datasetProfileDefinition) {
List<RdaField> secAndPrFields = getRDAFieldsFromJson(
datasetDescriptionJson,
new String[]{"dataset.security_and_privacy.description", "dataset.security_and_privacy.title", "dataset.security_and_privacy"},
datasetProfileDefinition);
for (RdaField rdaField : secAndPrFields) {
rdaField.setRdaValue(templateIdsToValues.get(rdaField.getFieldId()).toString());
}
Map<String, List<RdaField>> groupedSecurityAndPrivacyFields = secAndPrFields.stream().collect(groupingBy(RdaField::getFieldSetId));
List<DatasetSecurityAndPrivacyRDAExportModel> securityAndPrivacyRDAExportModelList = new LinkedList<>();
for (String fieldSetId : groupedSecurityAndPrivacyFields.keySet()) {
DatasetSecurityAndPrivacyRDAExportModel securityAndPrivacyModel = new DatasetSecurityAndPrivacyRDAExportModel();
for (RdaField rdaField : groupedSecurityAndPrivacyFields.get(fieldSetId)) {
if (rdaField.getRdaProperty().equals("dataset.security_and_privacy.description")) {
securityAndPrivacyModel.setDescription(rdaField.getRdaValue());
}
if (rdaField.getRdaProperty().equals("dataset.security_and_privacy.title")) {
securityAndPrivacyModel.setTitle(rdaField.getRdaValue());
}
if (rdaField.getRdaProperty().equals("dataset.security_and_privacy")) {
JSONArray jsonArray = new JSONArray(rdaField.getRdaValue());
for (int i = 0; i < jsonArray.length(); i++) {
JSONObject jsonObject = jsonArray.getJSONObject(i);
Map<String, Object> jsonObjectMap = jsonObject.toMap();
DatasetSecurityAndPrivacyRDAExportModel secAndPrivacy = new DatasetSecurityAndPrivacyRDAExportModel(jsonObjectMap.get("label").toString(), jsonObjectMap.get("source").toString());
securityAndPrivacyRDAExportModelList.add(secAndPrivacy);
}
}
}
securityAndPrivacyRDAExportModelList.add(securityAndPrivacyModel);
}
return securityAndPrivacyRDAExportModelList;
}
private List<DatasetTechnicalResourceRDAExportModel> buildTechnicalResource(JSONObject datasetDescriptionJson, Map<String, Object> templateIdsToValues, String datasetProfileDefinition) {
List<RdaField> dataQualityFields = getRDAFieldsFromJson(datasetDescriptionJson,
new String[]{"dataset.technical_resource.technical_resource", "dataset.technical_resource.technical_resource.description", "dataset.technical_resource.technical_resource.name"},
datasetProfileDefinition);
for (RdaField rdaField : dataQualityFields) {
rdaField.setRdaValue(templateIdsToValues.get(rdaField.getFieldId()).toString());
}
List<DatasetTechnicalResourceRDAExportModel> technicalResourceList = new LinkedList<>();
Map<String, List<RdaField>> groupedDataQualityFields = dataQualityFields.stream().collect(groupingBy(RdaField::getFieldSetId));
for (String fieldSetId : groupedDataQualityFields.keySet()) {
DatasetTechnicalResourceRDAExportModel technicalResourceModel = new DatasetTechnicalResourceRDAExportModel();
for (RdaField rdaField : groupedDataQualityFields.get(fieldSetId)) {
if (rdaField.getRdaProperty().equals("dataset.technical_resource.technical_resource.description")) {
technicalResourceModel.setDescription(rdaField.getRdaValue());
}
if (rdaField.getRdaProperty().equals("dataset.technical_resource.technical_resource.name")) {
technicalResourceModel.setName(rdaField.getRdaValue());
}
if (rdaField.getRdaProperty().equals("dataset.security_and_privacy")) {
JSONArray jsonArray = new JSONArray(rdaField.getRdaValue());
for (int i = 0; i < jsonArray.length(); i++) {
JSONObject jsonObject = jsonArray.getJSONObject(i);
Map<String, Object> jsonObjectMap = jsonObject.toMap();
DatasetTechnicalResourceRDAExportModel technicalResource = new DatasetTechnicalResourceRDAExportModel(jsonObjectMap.get("label").toString(), jsonObjectMap.get("label").toString());
technicalResourceList.add(technicalResource);
}
}
}
technicalResourceList.add(technicalResourceModel);
}
return technicalResourceList;
}
private void setMultiplicityIdToFieldSetId(JSONObject json) {
String multiplicityItemsFieldSetIdExp = "$..multiplicityItems[*].id";
List<String> multiplicityItemsFieldSetIdList = jsonValueListFromExpression(json, multiplicityItemsFieldSetIdExp);
for (String fieldSetId : multiplicityItemsFieldSetIdList) {
String fieldsFromFieldSetIdExp = "$..multiplicityItems[*][?(@.id == \""+ fieldSetId +"\")].fields[*].id";
List<String> fieldsIdList = jsonValueListFromExpression(json, fieldsFromFieldSetIdExp);
for (String fieldId : fieldsIdList) {
this.multiplicityIdToFieldSetId.put(fieldId, fieldSetId);
}
}
}
private List<RdaField> getRDAFieldsFromJson(JSONObject json, String[] rdaKey, String datasetProfileDefinition) {
List<RdaField> rdaFields = new LinkedList<>();
for (String key : rdaKey) {
String fieldIdExpression = "$..fields[*][?(@.rdaProperty == \"" + key + "\" )].id";
List<String> listFromExpression = jsonValueListFromExpression(json, fieldIdExpression);
for (String fieldId : listFromExpression) {
RdaField rdaField = new RdaField();
rdaField.setRdaProperty(key);
rdaField.setFieldId(fieldId);
if (fieldId.startsWith("multiple_")) {
rdaField.setFieldSetId(this.multiplicityIdToFieldSetId.get(fieldId));
} else {
rdaField.setFieldSetId(getFieldSetIdForFieldFromXML(datasetProfileDefinition, fieldId));
}
rdaFields.add(rdaField);
}
}
return rdaFields;
}
private List<String> jsonValueListFromExpression(JSONObject json, String expression) {
net.minidev.json.JSONArray jsonArray = JsonPath.parse(json.toString()).read(expression);
List<String> valueList = new LinkedList<>();
for (Object o : jsonArray) {
valueList.add(o.toString());
}
return valueList;
}
private String getFieldSetIdForFieldFromXML(String datasetProfileDefinition, String fieldId) {
String fieldSetIdExpression = "//field[@id ='" + fieldId + "']/ancestor::fieldSet/@id";
List<String> listFromExpression = xmlValueListFromExpression(datasetProfileDefinition, fieldSetIdExpression);
if (listFromExpression.size() == 1) return listFromExpression.get(0);
return null;
}
private List<String> xmlValueListFromExpression(String xml, String expression) {
List<String> valuesList = new LinkedList<>();
Document document = XmlBuilder.fromXml(xml);
XPathFactory xpathFactory = XPathFactory.newInstance();
XPath xpath = xpathFactory.newXPath();
try {
XPathExpression expr = xpath.compile(expression);
NodeList nodeList = (NodeList) expr.evaluate(document, XPathConstants.NODESET);
for (int i = 0; i < nodeList.getLength(); i++) {
Node node = nodeList.item(i);
valuesList.add(node.getNodeValue());
}
} catch (XPathExpressionException e) {
logger.error(e.getMessage(), e);
}
return valuesList;
}
}