2020-09-30 10:36:30 +02:00
|
|
|
package org.gcube.common.metadataprofilediscovery;
|
|
|
|
|
|
|
|
import java.io.BufferedReader;
|
|
|
|
import java.io.IOException;
|
|
|
|
import java.io.InputStream;
|
|
|
|
import java.io.InputStreamReader;
|
|
|
|
import java.io.StringReader;
|
|
|
|
import java.net.URL;
|
|
|
|
import java.util.ArrayList;
|
|
|
|
import java.util.HashMap;
|
|
|
|
import java.util.List;
|
|
|
|
import java.util.Map;
|
|
|
|
import java.util.stream.Collectors;
|
|
|
|
|
|
|
|
import javax.xml.XMLConstants;
|
2020-09-30 14:10:09 +02:00
|
|
|
import javax.xml.bind.JAXBContext;
|
|
|
|
import javax.xml.bind.JAXBException;
|
|
|
|
import javax.xml.bind.Unmarshaller;
|
2020-09-30 10:36:30 +02:00
|
|
|
import javax.xml.parsers.DocumentBuilder;
|
|
|
|
import javax.xml.parsers.DocumentBuilderFactory;
|
|
|
|
import javax.xml.parsers.ParserConfigurationException;
|
|
|
|
import javax.xml.transform.Source;
|
|
|
|
import javax.xml.transform.dom.DOMSource;
|
|
|
|
import javax.xml.transform.stream.StreamSource;
|
|
|
|
import javax.xml.validation.Schema;
|
|
|
|
import javax.xml.validation.SchemaFactory;
|
|
|
|
import javax.xml.validation.Validator;
|
|
|
|
|
|
|
|
import org.gcube.common.metadataprofilediscovery.bean.MetadataProfile;
|
|
|
|
import org.gcube.common.metadataprofilediscovery.jaxb.MetadataFormat;
|
|
|
|
import org.gcube.common.metadataprofilediscovery.jaxb.NamespaceCategory;
|
|
|
|
import org.gcube.common.metadataprofilediscovery.reader.MetadataFormatDiscovery;
|
|
|
|
import org.gcube.common.metadataprofilediscovery.reader.MetadataFormatReader;
|
|
|
|
import org.gcube.common.metadataprofilediscovery.reader.NamespaceCategoryReader;
|
|
|
|
import org.gcube.common.scope.api.ScopeProvider;
|
|
|
|
import org.gcube.common.scope.impl.ScopeBean;
|
|
|
|
import org.slf4j.Logger;
|
|
|
|
import org.slf4j.LoggerFactory;
|
|
|
|
import org.w3c.dom.Document;
|
|
|
|
import org.xml.sax.InputSource;
|
|
|
|
import org.xml.sax.SAXException;
|
|
|
|
|
|
|
|
/**
|
2020-09-30 14:10:09 +02:00
|
|
|
* The Class MetadataProfileReader.
|
2020-09-30 10:36:30 +02:00
|
|
|
*
|
2022-03-01 16:39:43 +01:00
|
|
|
* @author Francesco Mangiacrapa at ISTI-CNR francesco.mangiacrapa@isti.cnr.it
|
2020-09-30 14:10:09 +02:00
|
|
|
*
|
2022-03-01 16:39:43 +01:00
|
|
|
* Mar 1, 2022
|
2020-09-30 10:36:30 +02:00
|
|
|
*/
|
2020-09-30 14:10:09 +02:00
|
|
|
public class MetadataProfileReader implements MetadataProfileDiscovery {
|
|
|
|
|
|
|
|
public static final String URL_OF_GCDCMETADATAPROFILEV3_XSD = "https://wiki.gcube-system.org/images_gcube/e/e8/Gcdcmetadataprofilev3.xsd";
|
2020-09-30 10:36:30 +02:00
|
|
|
|
|
|
|
private static String SCHEMA_FILENAME = "Gdcmetadataprofilev3.xsd";
|
|
|
|
|
|
|
|
private MetadataFormatDiscovery medataFormatDiscovery;
|
|
|
|
private ScopeBean scope;
|
|
|
|
private Map<String, MetadataFormat> hashMetadataFormats = null;
|
|
|
|
private List<NamespaceCategory> namespaceCategories = null;
|
|
|
|
|
|
|
|
private String profileSchema = null;
|
|
|
|
|
2020-09-30 14:10:09 +02:00
|
|
|
private String grMetadataProfileSecondaryType;
|
2020-09-30 10:36:30 +02:00
|
|
|
|
2022-03-01 16:39:43 +01:00
|
|
|
private String grMetadataProfileResourceName;
|
|
|
|
|
2020-09-30 14:10:09 +02:00
|
|
|
private static Logger logger = LoggerFactory.getLogger(MetadataProfileReader.class);
|
2022-03-01 16:39:43 +01:00
|
|
|
|
2020-09-30 10:36:30 +02:00
|
|
|
/**
|
2020-09-30 14:10:09 +02:00
|
|
|
* Instantiates a new metadata profile reader.
|
2020-09-30 10:36:30 +02:00
|
|
|
*
|
2022-03-01 16:39:43 +01:00
|
|
|
* @param grMetadataProfileSecondaryType the SecondaryType that must be used to
|
|
|
|
* discover the "gCube Metadata Profiles"
|
|
|
|
* from Information System
|
2020-09-30 10:36:30 +02:00
|
|
|
* @throws Exception the exception
|
|
|
|
*/
|
2020-09-30 14:10:09 +02:00
|
|
|
public MetadataProfileReader(String grMetadataProfileSecondaryType) throws Exception {
|
2022-03-01 16:39:43 +01:00
|
|
|
|
|
|
|
if (grMetadataProfileSecondaryType == null || grMetadataProfileSecondaryType.isEmpty())
|
|
|
|
throw new NullPointerException("Invalid input parameter: grMetadataProfileSecondaryType");
|
|
|
|
|
2020-09-30 14:10:09 +02:00
|
|
|
this.grMetadataProfileSecondaryType = grMetadataProfileSecondaryType;
|
2022-03-01 16:39:43 +01:00
|
|
|
|
|
|
|
readNamespacesAndProfiles();
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Instantiates a new metadata profile reader.
|
|
|
|
*
|
|
|
|
* @param grMetadataProfileSecondaryType the gr metadata profile secondary type
|
|
|
|
* @param resourceName the resource name
|
|
|
|
*
|
|
|
|
* the SecondaryType and the ResourceName
|
|
|
|
* that must be used to discover the
|
|
|
|
* "gCube Metadata Profiles" from
|
|
|
|
* Information System
|
|
|
|
* @throws Exception the exception
|
|
|
|
*/
|
|
|
|
public MetadataProfileReader(String grMetadataProfileSecondaryType, String resourceName) throws Exception {
|
|
|
|
|
|
|
|
if (grMetadataProfileSecondaryType == null || grMetadataProfileSecondaryType.isEmpty())
|
|
|
|
throw new NullPointerException("Invalid input parameter: grMetadataProfileSecondaryType");
|
|
|
|
|
|
|
|
if (resourceName == null || resourceName.isEmpty())
|
|
|
|
throw new NullPointerException("Invalid input parameter: resourceName");
|
|
|
|
|
|
|
|
this.grMetadataProfileSecondaryType = grMetadataProfileSecondaryType;
|
|
|
|
this.grMetadataProfileResourceName = resourceName;
|
|
|
|
|
|
|
|
readNamespacesAndProfiles();
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Read namespaces and profiles.
|
|
|
|
*
|
|
|
|
* @throws Exception the exception
|
|
|
|
*/
|
|
|
|
private void readNamespacesAndProfiles() throws Exception {
|
|
|
|
|
2020-09-30 10:36:30 +02:00
|
|
|
String scopeString = ScopeProvider.instance.get();
|
|
|
|
logger.debug("Read scope " + scopeString + " from ScopeProvider");
|
|
|
|
|
|
|
|
if (scopeString == null || scopeString.isEmpty())
|
|
|
|
throw new Exception("Please set a valid scope into ScopeProvider");
|
|
|
|
|
|
|
|
scope = new ScopeBean(scopeString);
|
2022-03-01 16:39:43 +01:00
|
|
|
|
2020-09-30 10:36:30 +02:00
|
|
|
readNamespaces();
|
2020-09-30 14:10:09 +02:00
|
|
|
readMetadataFormats();
|
2020-09-30 10:36:30 +02:00
|
|
|
}
|
2022-03-01 16:39:43 +01:00
|
|
|
|
2020-09-30 10:36:30 +02:00
|
|
|
/**
|
|
|
|
* Read metada formats.
|
|
|
|
*
|
|
|
|
* @throws Exception the exception
|
|
|
|
*/
|
2020-09-30 14:10:09 +02:00
|
|
|
private void readMetadataFormats() throws Exception {
|
2022-03-01 16:39:43 +01:00
|
|
|
|
|
|
|
// reading from Generic Resource for Secondary Type and Resource Name passed in
|
|
|
|
// input
|
|
|
|
medataFormatDiscovery = new MetadataFormatDiscovery(scope, grMetadataProfileSecondaryType,
|
|
|
|
grMetadataProfileResourceName);
|
2020-09-30 14:28:35 +02:00
|
|
|
|
|
|
|
logger.info("MedataFormatDiscovery has retrieved: " + medataFormatDiscovery.getMetadataProfiles().size()
|
|
|
|
+ " metadata type/s");
|
|
|
|
logger.debug("filling cache for MedataFormat");
|
|
|
|
hashMetadataFormats = new HashMap<String, MetadataFormat>(medataFormatDiscovery.getMetadataProfiles().size());
|
|
|
|
for (MetadataProfile mT : medataFormatDiscovery.getMetadataProfiles()) {
|
|
|
|
if (mT == null)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
MetadataFormatReader reader = new MetadataFormatReader(scope, mT.getId());
|
|
|
|
hashMetadataFormats.put(mT.getId(), reader.getMetadataFormat());
|
|
|
|
logger.debug("MetadataType id: " + mT.getId() + " cached as: " + reader.getMetadataFormat());
|
2020-09-30 10:36:30 +02:00
|
|
|
}
|
2022-03-01 16:39:43 +01:00
|
|
|
|
2020-09-30 10:36:30 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Read namespaces.
|
2022-03-01 16:39:43 +01:00
|
|
|
*
|
|
|
|
* @throws Exception the exception
|
2020-09-30 10:36:30 +02:00
|
|
|
*/
|
2020-10-15 13:34:38 +02:00
|
|
|
private void readNamespaces() throws Exception {
|
2020-09-30 10:36:30 +02:00
|
|
|
|
|
|
|
try {
|
|
|
|
if (namespaceCategories == null || namespaceCategories.isEmpty()) {
|
|
|
|
|
|
|
|
if (namespaceCategories == null)
|
|
|
|
namespaceCategories = new ArrayList<NamespaceCategory>();
|
|
|
|
|
|
|
|
NamespaceCategoryReader rd = new NamespaceCategoryReader(scope);
|
|
|
|
namespaceCategories.addAll(rd.getNamespaces().getNamespaceCategories());
|
|
|
|
}
|
|
|
|
} catch (Exception e) {
|
|
|
|
logger.debug("An error occurred during read namespaces for categories: ", e);
|
2020-10-15 13:34:38 +02:00
|
|
|
throw e;
|
2020-09-30 10:36:30 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Gets the profile schema string.
|
|
|
|
*
|
|
|
|
* @return the profile schema string
|
|
|
|
*/
|
|
|
|
public static String getProfileSchemaString() {
|
2020-09-30 14:10:09 +02:00
|
|
|
InputStream inputStream = MetadataProfileReader.getProfileSchemaInputStream();
|
2020-09-30 10:36:30 +02:00
|
|
|
return new BufferedReader(new InputStreamReader(inputStream)).lines().collect(Collectors.joining("\n"));
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Gets the profile schema input stream.
|
|
|
|
*
|
|
|
|
* @return the profile schema input stream
|
|
|
|
*/
|
|
|
|
public static InputStream getProfileSchemaInputStream() {
|
2020-09-30 14:10:09 +02:00
|
|
|
try {
|
2022-03-01 16:39:43 +01:00
|
|
|
logger.info("Trying to return the URL resource: " + URL_OF_GCDCMETADATAPROFILEV3_XSD);
|
2020-09-30 14:10:09 +02:00
|
|
|
return new URL(URL_OF_GCDCMETADATAPROFILEV3_XSD).openStream();
|
2022-03-01 16:39:43 +01:00
|
|
|
|
2020-09-30 14:10:09 +02:00
|
|
|
} catch (Exception e) {
|
2022-03-01 16:39:43 +01:00
|
|
|
logger.warn("Error on reading the URL: " + URL_OF_GCDCMETADATAPROFILEV3_XSD);
|
2020-09-30 14:10:09 +02:00
|
|
|
}
|
2022-03-01 16:39:43 +01:00
|
|
|
logger.info("Returning local resource: " + SCHEMA_FILENAME);
|
2020-09-30 14:10:09 +02:00
|
|
|
return MetadataProfileReader.class.getResourceAsStream(SCHEMA_FILENAME);
|
2020-09-30 10:36:30 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Gets the profile schema URL.
|
|
|
|
*
|
|
|
|
* @return the profile schema URL
|
|
|
|
*/
|
|
|
|
public static URL getProfileSchemaURL() {
|
2020-09-30 14:10:09 +02:00
|
|
|
try {
|
2022-03-01 16:39:43 +01:00
|
|
|
logger.info("Trying to return the URL object: " + URL_OF_GCDCMETADATAPROFILEV3_XSD);
|
2020-09-30 14:10:09 +02:00
|
|
|
return new URL(URL_OF_GCDCMETADATAPROFILEV3_XSD);
|
2022-03-01 16:39:43 +01:00
|
|
|
|
2020-09-30 14:10:09 +02:00
|
|
|
} catch (Exception e) {
|
2022-03-01 16:39:43 +01:00
|
|
|
logger.warn("Error on reading the URL: " + URL_OF_GCDCMETADATAPROFILEV3_XSD);
|
2020-09-30 14:10:09 +02:00
|
|
|
}
|
2022-03-01 16:39:43 +01:00
|
|
|
logger.info("Returning local URL of: " + SCHEMA_FILENAME);
|
2020-09-30 14:10:09 +02:00
|
|
|
return MetadataProfileReader.class.getResource(SCHEMA_FILENAME);
|
2020-09-30 10:36:30 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Validate against XSD.
|
|
|
|
*
|
|
|
|
* @param xml the xml
|
|
|
|
* @param xsd the xsd
|
|
|
|
* @throws SAXException the SAX exception
|
|
|
|
* @throws IOException Signals that an I/O exception has occurred.
|
|
|
|
*/
|
|
|
|
static void validateAgainstXSD(Source xml, URL xsd) throws SAXException, IOException {
|
|
|
|
SchemaFactory factory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);
|
|
|
|
Schema schema = factory.newSchema(xsd);
|
|
|
|
Validator validator = schema.newValidator();
|
|
|
|
validator.validate(xml);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Validate profile.
|
|
|
|
*
|
|
|
|
* @param xml the xml
|
|
|
|
* @throws Exception the exception
|
|
|
|
*/
|
|
|
|
public static void validateProfile(InputStream xml) throws Exception {
|
|
|
|
validateAgainstXSD(new StreamSource(xml), getProfileSchemaURL());
|
|
|
|
}
|
|
|
|
|
2022-03-01 16:39:43 +01:00
|
|
|
/**
|
|
|
|
* Gets the metadata format for metadata profile.
|
|
|
|
*
|
|
|
|
* @param profile the profile
|
|
|
|
* @return the metadata format for metadata profile
|
|
|
|
* @throws Exception the exception
|
|
|
|
*/
|
2020-09-30 10:36:30 +02:00
|
|
|
/*
|
|
|
|
* (non-Javadoc)
|
|
|
|
*
|
|
|
|
* @see
|
|
|
|
* org.gcube.common.metadataprofilediscovery.DataCatalogueMetadataDiscovery#
|
|
|
|
* getMetadataFormatForMetadataProfile(org.gcube.common.metadataprofilediscovery
|
|
|
|
* .bean.MetadataProfile)
|
|
|
|
*/
|
|
|
|
public MetadataFormat getMetadataFormatForMetadataProfile(MetadataProfile profile) throws Exception {
|
|
|
|
|
|
|
|
if (profile == null)
|
|
|
|
throw new Exception("Input " + MetadataProfile.class.getSimpleName() + " is null");
|
|
|
|
|
|
|
|
MetadataFormat format = hashMetadataFormats.get(profile.getId());
|
|
|
|
if (format != null)
|
|
|
|
return format;
|
|
|
|
|
|
|
|
MetadataFormatReader reader = new MetadataFormatReader(scope, profile.getId());
|
|
|
|
return reader.getMetadataFormat();
|
|
|
|
}
|
|
|
|
|
2022-03-01 16:39:43 +01:00
|
|
|
/**
|
|
|
|
* Gets the list of metadata profiles.
|
|
|
|
*
|
|
|
|
* @return the list of metadata profiles
|
|
|
|
* @throws Exception the exception
|
|
|
|
*/
|
2020-09-30 10:36:30 +02:00
|
|
|
/*
|
|
|
|
* (non-Javadoc)
|
|
|
|
*
|
|
|
|
* @see
|
|
|
|
* org.gcube.common.metadataprofilediscovery.DataCatalogueMetadataDiscovery#
|
|
|
|
* getListOfMetadataProfiles()
|
|
|
|
*/
|
|
|
|
public List<MetadataProfile> getListOfMetadataProfiles() throws Exception {
|
|
|
|
|
|
|
|
if (medataFormatDiscovery == null)
|
2020-09-30 14:10:09 +02:00
|
|
|
readMetadataFormats();
|
2020-09-30 10:36:30 +02:00
|
|
|
|
|
|
|
return medataFormatDiscovery.getMetadataProfiles();
|
|
|
|
}
|
|
|
|
|
2022-03-01 16:39:43 +01:00
|
|
|
/**
|
|
|
|
* Gets the list of namespace categories.
|
|
|
|
*
|
|
|
|
* @return the list of namespace categories
|
|
|
|
* @throws Exception the exception
|
|
|
|
*/
|
2020-09-30 10:36:30 +02:00
|
|
|
/*
|
|
|
|
* (non-Javadoc)
|
|
|
|
*
|
|
|
|
* @see
|
|
|
|
* org.gcube.common.metadataprofilediscovery.DataCatalogueMetadataDiscovery#
|
|
|
|
* getListOfNamespaceCategories()
|
|
|
|
*/
|
|
|
|
public List<NamespaceCategory> getListOfNamespaceCategories() throws Exception {
|
|
|
|
|
|
|
|
if (namespaceCategories == null)
|
|
|
|
readNamespaces();
|
|
|
|
|
|
|
|
return namespaceCategories;
|
|
|
|
}
|
|
|
|
|
2022-03-01 16:39:43 +01:00
|
|
|
/**
|
|
|
|
* Reset metadata profile.
|
|
|
|
*/
|
2020-09-30 10:36:30 +02:00
|
|
|
/*
|
|
|
|
* (non-Javadoc)
|
|
|
|
*
|
|
|
|
* @see
|
|
|
|
* org.gcube.common.metadataprofilediscovery.DataCatalogueMetadataDiscovery#
|
|
|
|
* resetMetadataProfile()
|
|
|
|
*/
|
|
|
|
public void resetMetadataProfile() {
|
|
|
|
|
|
|
|
medataFormatDiscovery = null;
|
|
|
|
hashMetadataFormats = null;
|
|
|
|
}
|
|
|
|
|
2022-03-01 16:39:43 +01:00
|
|
|
/**
|
|
|
|
* Reset namespace categories.
|
|
|
|
*/
|
2020-09-30 10:36:30 +02:00
|
|
|
/*
|
|
|
|
* (non-Javadoc)
|
|
|
|
*
|
|
|
|
* @see
|
|
|
|
* org.gcube.common.metadataprofilediscovery.DataCatalogueMetadataDiscovery#
|
|
|
|
* resetNamespaceCategories()
|
|
|
|
*/
|
|
|
|
public void resetNamespaceCategories() {
|
|
|
|
|
|
|
|
namespaceCategories = null;
|
|
|
|
}
|
|
|
|
|
2022-03-01 16:39:43 +01:00
|
|
|
/**
|
|
|
|
* Gets the profile schema.
|
|
|
|
*
|
|
|
|
* @return the profile schema
|
|
|
|
*/
|
2020-09-30 10:36:30 +02:00
|
|
|
/*
|
|
|
|
* (non-Javadoc)
|
|
|
|
*
|
|
|
|
* @see
|
|
|
|
* org.gcube.common.metadataprofilediscovery.DataCatalogueMetadataDiscovery#
|
|
|
|
* getProfileSchema()
|
|
|
|
*/
|
|
|
|
public String getProfileSchema() {
|
|
|
|
if (profileSchema == null) {
|
2020-09-30 14:10:09 +02:00
|
|
|
profileSchema = MetadataProfileReader.getProfileSchemaString();
|
2020-09-30 10:36:30 +02:00
|
|
|
}
|
|
|
|
return profileSchema;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2022-03-01 16:39:43 +01:00
|
|
|
/**
|
|
|
|
* Validate profile.
|
|
|
|
*
|
|
|
|
* @param xmlProfile the xml profile
|
|
|
|
* @throws ParserConfigurationException the parser configuration exception
|
|
|
|
* @throws SAXException the SAX exception
|
|
|
|
* @throws IOException Signals that an I/O exception has
|
|
|
|
* occurred.
|
|
|
|
*/
|
2020-09-30 10:36:30 +02:00
|
|
|
/*
|
|
|
|
* (non-Javadoc)
|
|
|
|
*
|
|
|
|
* @see
|
|
|
|
* org.gcube.common.metadataprofilediscovery.DataCatalogueMetadataDiscovery#
|
|
|
|
* validateProfile(java.lang.String)
|
|
|
|
*/
|
|
|
|
public void validateProfile(String xmlProfile) throws ParserConfigurationException, SAXException, IOException {
|
|
|
|
DocumentBuilder db = DocumentBuilderFactory.newInstance().newDocumentBuilder();
|
|
|
|
Document doc = db.parse(new InputSource(new StringReader(xmlProfile)));
|
|
|
|
DOMSource source = new DOMSource(doc);
|
|
|
|
validateAgainstXSD(source, getProfileSchemaURL());
|
|
|
|
}
|
2022-03-01 16:39:43 +01:00
|
|
|
|
2020-09-30 14:10:09 +02:00
|
|
|
/**
|
2022-03-01 16:39:43 +01:00
|
|
|
* Utility method to get a metadata format from a "gCube Metadata Profiles"
|
|
|
|
* model (XML-based).
|
2020-09-30 14:10:09 +02:00
|
|
|
*
|
|
|
|
* @param metadataProfileStream the metadata profile stream
|
|
|
|
* @return the metadata format
|
|
|
|
* @throws JAXBException the JAXB exception
|
|
|
|
*/
|
2022-03-01 16:39:43 +01:00
|
|
|
public static MetadataFormat toMetadataFormat(InputStream metadataProfileStream) throws JAXBException {
|
|
|
|
|
|
|
|
if (metadataProfileStream == null)
|
2020-09-30 14:10:09 +02:00
|
|
|
throw new NullPointerException("Invalid input parameter");
|
2022-03-01 16:39:43 +01:00
|
|
|
|
2020-09-30 14:10:09 +02:00
|
|
|
JAXBContext jaxbContext = JAXBContext.newInstance(MetadataFormat.class);
|
2022-03-01 16:39:43 +01:00
|
|
|
Unmarshaller jaxbUnmarshaller = jaxbContext.createUnmarshaller();
|
|
|
|
return (MetadataFormat) jaxbUnmarshaller.unmarshal(metadataProfileStream);
|
2020-09-30 14:10:09 +02:00
|
|
|
}
|
|
|
|
|
2020-09-30 10:36:30 +02:00
|
|
|
}
|