package eu.dnetlib.ariadneplus.rdf; import java.util.Map; import javax.annotation.PostConstruct; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Component; import com.google.common.collect.Maps; import eu.dnetlib.ariadneplus.publisher.SaxonHelper; import net.sf.saxon.s9api.SaxonApiException; import net.sf.saxon.s9api.Serializer; import net.sf.saxon.s9api.XPathSelector; /** * @author enrico.ottonello * */ @Component public class RecordParserHelper { private static final Log log = LogFactory.getLog(RecordParserHelper.class); public static final String OAI_NAMESPACE_URI = "http://www.openarchives.org/OAI/2.0/"; public static final String DRI_NAMESPACE_URI = "http://www.driver-repository.eu/namespace/dri"; public static final String RDF_NAMESPACE_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"; @Autowired private SaxonHelper saxonHelper; private XPathSelector xpathSelectorObjIdentifier; private XPathSelector xpathSelectorCollectionDate; private XPathSelector xpathSelectorTransformationDate; private XPathSelector xpathSelectorDatasourceName; private XPathSelector xpathSelectorDatasourceApi; private XPathSelector xpathSelectorRDF; @PostConstruct public void init() throws SaxonApiException { prepareXpathSelectors(); } public String getCollectionDate(final String record) { return extractFromRecord(record, xpathSelectorCollectionDate); } public String getTransformationDate(final String record) { return extractFromRecord(record, xpathSelectorTransformationDate); } public String getDatasourceName(final String record) { return extractFromRecord(record, xpathSelectorDatasourceName); } public String getDatasourceApi(final String record) { return extractFromRecord(record, xpathSelectorDatasourceApi); } public String getObjIdentifier(final String record) { return extractFromRecord(record, xpathSelectorObjIdentifier); } public String getRDF(final String record) { return extractFromRecord(record, xpathSelectorRDF); } private String extractFromRecord(final String record, final XPathSelector xPathSelector) { try { return this.saxonHelper.help().setSerializerProperty(Serializer.Property.OMIT_XML_DECLARATION, "yes").evaluateSingleAsString(record, xPathSelector); } catch (SaxonApiException e) { log.error(e); throw new RuntimeException("Cannot extract content from path "+ xPathSelector.toString(), e); } } private void prepareXpathSelectors() throws SaxonApiException { Map namespaces = Maps.newHashMap(); namespaces.put("oai", OAI_NAMESPACE_URI); namespaces.put("dri", DRI_NAMESPACE_URI); namespaces.put("rdf", RDF_NAMESPACE_URI); xpathSelectorObjIdentifier = this.saxonHelper.help().prepareXPathSelector("//oai:header/dri:objIdentifier/text()", namespaces); xpathSelectorCollectionDate = this.saxonHelper.help().prepareXPathSelector("//oai:header/dri:dateOfCollection/text()", namespaces); xpathSelectorTransformationDate = this.saxonHelper.help().prepareXPathSelector("//oai:header/dri:dateOfTransformation/text()", namespaces); xpathSelectorDatasourceName = this.saxonHelper.help().prepareXPathSelector("//oai:header/dri:datasourcename/text()", namespaces); xpathSelectorDatasourceApi = this.saxonHelper.help().prepareXPathSelector("//oai:header/dri:datasourceapi/text()", namespaces); xpathSelectorRDF = this.saxonHelper.help().prepareXPathSelector("//oai:metadata/rdf:RDF", namespaces); } public SaxonHelper getSaxonHelper() { return saxonHelper; } public void setSaxonHelper(final SaxonHelper saxonHelper) { this.saxonHelper = saxonHelper; } }