package org.gcube.common.metadataprofilediscovery; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.StringReader; import java.net.URL; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.stream.Collectors; import javax.xml.XMLConstants; import javax.xml.bind.JAXBContext; import javax.xml.bind.JAXBException; import javax.xml.bind.Unmarshaller; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.Source; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamSource; import javax.xml.validation.Schema; import javax.xml.validation.SchemaFactory; import javax.xml.validation.Validator; import org.gcube.common.metadataprofilediscovery.bean.MetadataProfile; import org.gcube.common.metadataprofilediscovery.jaxb.MetadataFormat; import org.gcube.common.metadataprofilediscovery.jaxb.NamespaceCategory; import org.gcube.common.metadataprofilediscovery.reader.MetadataFormatDiscovery; import org.gcube.common.metadataprofilediscovery.reader.MetadataFormatReader; import org.gcube.common.metadataprofilediscovery.reader.NamespaceCategoryReader; import org.gcube.common.scope.api.ScopeProvider; import org.gcube.common.scope.impl.ScopeBean; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.w3c.dom.Document; import org.xml.sax.InputSource; import org.xml.sax.SAXException; /** * The Class MetadataProfileReader. * * @author Francesco Mangiacrapa at ISTI-CNR (francesco.mangiacrapa@isti.cnr.it) * * Sep 30, 2020 */ public class MetadataProfileReader implements MetadataProfileDiscovery { public static final String URL_OF_GCDCMETADATAPROFILEV3_XSD = "https://wiki.gcube-system.org/images_gcube/e/e8/Gcdcmetadataprofilev3.xsd"; private static String SCHEMA_FILENAME = "Gdcmetadataprofilev3.xsd"; private MetadataFormatDiscovery medataFormatDiscovery; private ScopeBean scope; private Map hashMetadataFormats = null; private List namespaceCategories = null; private String profileSchema = null; private String grMetadataProfileSecondaryType; private static Logger logger = LoggerFactory.getLogger(MetadataProfileReader.class); /** * Instantiates a new metadata profile reader. * * @param grMetadataProfileSecondaryType the SecondaryType that must be used to discover the "gCube Metadata Profiles" from Information System * @throws Exception the exception */ public MetadataProfileReader(String grMetadataProfileSecondaryType) throws Exception { if(grMetadataProfileSecondaryType==null || grMetadataProfileSecondaryType.isEmpty()) throw new NullPointerException("Invalid input parameter"); this.grMetadataProfileSecondaryType = grMetadataProfileSecondaryType; String scopeString = ScopeProvider.instance.get(); logger.debug("Read scope " + scopeString + " from ScopeProvider"); if (scopeString == null || scopeString.isEmpty()) throw new Exception("Please set a valid scope into ScopeProvider"); scope = new ScopeBean(scopeString); readNamespaces(); readMetadataFormats(); } /** * Read metada formats. * * @throws Exception the exception */ private void readMetadataFormats() throws Exception { //reading from Generic Resource for Secondary Type passed in input medataFormatDiscovery = new MetadataFormatDiscovery(scope, grMetadataProfileSecondaryType); logger.info("MedataFormatDiscovery has retrieved: " + medataFormatDiscovery.getMetadataProfiles().size() + " metadata type/s"); logger.debug("filling cache for MedataFormat"); hashMetadataFormats = new HashMap(medataFormatDiscovery.getMetadataProfiles().size()); for (MetadataProfile mT : medataFormatDiscovery.getMetadataProfiles()) { if (mT == null) continue; MetadataFormatReader reader = new MetadataFormatReader(scope, mT.getId()); hashMetadataFormats.put(mT.getId(), reader.getMetadataFormat()); logger.debug("MetadataType id: " + mT.getId() + " cached as: " + reader.getMetadataFormat()); } } /** * Read namespaces. * @throws Exception */ private void readNamespaces() throws Exception { try { if (namespaceCategories == null || namespaceCategories.isEmpty()) { if (namespaceCategories == null) namespaceCategories = new ArrayList(); NamespaceCategoryReader rd = new NamespaceCategoryReader(scope); namespaceCategories.addAll(rd.getNamespaces().getNamespaceCategories()); } } catch (Exception e) { logger.debug("An error occurred during read namespaces for categories: ", e); throw e; } } /** * Gets the profile schema string. * * @return the profile schema string */ public static String getProfileSchemaString() { InputStream inputStream = MetadataProfileReader.getProfileSchemaInputStream(); return new BufferedReader(new InputStreamReader(inputStream)).lines().collect(Collectors.joining("\n")); } /** * Gets the profile schema input stream. * * @return the profile schema input stream */ public static InputStream getProfileSchemaInputStream() { try { logger.info("Trying to return the URL resource: "+URL_OF_GCDCMETADATAPROFILEV3_XSD); return new URL(URL_OF_GCDCMETADATAPROFILEV3_XSD).openStream(); } catch (Exception e) { logger.warn("Error on reading the URL: "+URL_OF_GCDCMETADATAPROFILEV3_XSD); } logger.info("Returning local resource: "+SCHEMA_FILENAME); return MetadataProfileReader.class.getResourceAsStream(SCHEMA_FILENAME); } /** * Gets the profile schema URL. * * @return the profile schema URL */ public static URL getProfileSchemaURL() { try { logger.info("Trying to return the URL object: "+URL_OF_GCDCMETADATAPROFILEV3_XSD); return new URL(URL_OF_GCDCMETADATAPROFILEV3_XSD); } catch (Exception e) { logger.warn("Error on reading the URL: "+URL_OF_GCDCMETADATAPROFILEV3_XSD); } logger.info("Returning local URL of: "+SCHEMA_FILENAME); return MetadataProfileReader.class.getResource(SCHEMA_FILENAME); } /** * Validate against XSD. * * @param xml the xml * @param xsd the xsd * @throws SAXException the SAX exception * @throws IOException Signals that an I/O exception has occurred. */ static void validateAgainstXSD(Source xml, URL xsd) throws SAXException, IOException { SchemaFactory factory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI); Schema schema = factory.newSchema(xsd); Validator validator = schema.newValidator(); validator.validate(xml); } /** * Validate profile. * * @param xml the xml * @throws Exception the exception */ public static void validateProfile(InputStream xml) throws Exception { validateAgainstXSD(new StreamSource(xml), getProfileSchemaURL()); } /* * (non-Javadoc) * * @see * org.gcube.common.metadataprofilediscovery.DataCatalogueMetadataDiscovery# * getMetadataFormatForMetadataProfile(org.gcube.common.metadataprofilediscovery * .bean.MetadataProfile) */ public MetadataFormat getMetadataFormatForMetadataProfile(MetadataProfile profile) throws Exception { if (profile == null) throw new Exception("Input " + MetadataProfile.class.getSimpleName() + " is null"); MetadataFormat format = hashMetadataFormats.get(profile.getId()); if (format != null) return format; MetadataFormatReader reader = new MetadataFormatReader(scope, profile.getId()); return reader.getMetadataFormat(); } /* * (non-Javadoc) * * @see * org.gcube.common.metadataprofilediscovery.DataCatalogueMetadataDiscovery# * getListOfMetadataProfiles() */ public List getListOfMetadataProfiles() throws Exception { if (medataFormatDiscovery == null) readMetadataFormats(); return medataFormatDiscovery.getMetadataProfiles(); } /* * (non-Javadoc) * * @see * org.gcube.common.metadataprofilediscovery.DataCatalogueMetadataDiscovery# * getListOfNamespaceCategories() */ public List getListOfNamespaceCategories() throws Exception { if (namespaceCategories == null) readNamespaces(); return namespaceCategories; } /* * (non-Javadoc) * * @see * org.gcube.common.metadataprofilediscovery.DataCatalogueMetadataDiscovery# * resetMetadataProfile() */ public void resetMetadataProfile() { medataFormatDiscovery = null; hashMetadataFormats = null; } /* * (non-Javadoc) * * @see * org.gcube.common.metadataprofilediscovery.DataCatalogueMetadataDiscovery# * resetNamespaceCategories() */ public void resetNamespaceCategories() { namespaceCategories = null; } /* * (non-Javadoc) * * @see * org.gcube.common.metadataprofilediscovery.DataCatalogueMetadataDiscovery# * getProfileSchema() */ public String getProfileSchema() { if (profileSchema == null) { profileSchema = MetadataProfileReader.getProfileSchemaString(); } return profileSchema; } /* * (non-Javadoc) * * @see * org.gcube.common.metadataprofilediscovery.DataCatalogueMetadataDiscovery# * validateProfile(java.lang.String) */ public void validateProfile(String xmlProfile) throws ParserConfigurationException, SAXException, IOException { DocumentBuilder db = DocumentBuilderFactory.newInstance().newDocumentBuilder(); Document doc = db.parse(new InputSource(new StringReader(xmlProfile))); DOMSource source = new DOMSource(doc); validateAgainstXSD(source, getProfileSchemaURL()); } /** * Utility method to get a metadata format from a "gCube Metadata Profiles" model (XML-based) * * @param metadataProfileStream the metadata profile stream * @return the metadata format * @throws JAXBException the JAXB exception */ public static MetadataFormat toMetadataFormat(InputStream metadataProfileStream) throws JAXBException{ if(metadataProfileStream==null) throw new NullPointerException("Invalid input parameter"); JAXBContext jaxbContext = JAXBContext.newInstance(MetadataFormat.class); Unmarshaller jaxbUnmarshaller = jaxbContext.createUnmarshaller(); return (MetadataFormat) jaxbUnmarshaller.unmarshal(metadataProfileStream); } }