AriadnePlus/dnet-ariadneplus-graphdb-pu.../src/main/java/eu/dnetlib/ariadneplus/rdf/RecordParserHelper.java

102 lines
3.7 KiB
Java

package eu.dnetlib.ariadneplus.rdf;
import java.util.Map;
import javax.annotation.PostConstruct;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import com.google.common.collect.Maps;
import eu.dnetlib.ariadneplus.publisher.SaxonHelper;
import net.sf.saxon.s9api.SaxonApiException;
import net.sf.saxon.s9api.Serializer;
import net.sf.saxon.s9api.XPathSelector;
/**
* @author enrico.ottonello
*
*/
@Component
public class RecordParserHelper {
private static final Log log = LogFactory.getLog(RecordParserHelper.class);
public static final String OAI_NAMESPACE_URI = "http://www.openarchives.org/OAI/2.0/";
public static final String DRI_NAMESPACE_URI = "http://www.driver-repository.eu/namespace/dri";
public static final String RDF_NAMESPACE_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
@Autowired
private SaxonHelper saxonHelper;
private XPathSelector xpathSelectorObjIdentifier;
private XPathSelector xpathSelectorCollectionDate;
private XPathSelector xpathSelectorTransformationDate;
private XPathSelector xpathSelectorDatasourceName;
private XPathSelector xpathSelectorDatasourceApi;
private XPathSelector xpathSelectorRDF;
@PostConstruct
public void init() throws SaxonApiException {
prepareXpathSelectors();
}
public String getCollectionDate(final String record) {
return extractFromRecord(record, xpathSelectorCollectionDate);
}
public String getTransformationDate(final String record) {
return extractFromRecord(record, xpathSelectorTransformationDate);
}
public String getDatasourceName(final String record) {
return extractFromRecord(record, xpathSelectorDatasourceName);
}
public String getDatasourceApi(final String record) {
return extractFromRecord(record, xpathSelectorDatasourceApi);
}
public String getObjIdentifier(final String record) {
return extractFromRecord(record, xpathSelectorObjIdentifier);
}
public String getRDF(final String record) {
return extractFromRecord(record, xpathSelectorRDF);
}
private String extractFromRecord(final String record, final XPathSelector xPathSelector) {
try {
return this.saxonHelper.help().setSerializerProperty(Serializer.Property.OMIT_XML_DECLARATION, "yes").evaluateSingleAsString(record, xPathSelector);
} catch (SaxonApiException e) {
log.error(e);
throw new RuntimeException("Cannot extract content from path "+ xPathSelector.toString(), e);
}
}
private void prepareXpathSelectors() throws SaxonApiException {
Map<String, String> namespaces = Maps.newHashMap();
namespaces.put("oai", OAI_NAMESPACE_URI);
namespaces.put("dri", DRI_NAMESPACE_URI);
namespaces.put("rdf", RDF_NAMESPACE_URI);
xpathSelectorObjIdentifier = this.saxonHelper.help().prepareXPathSelector("//oai:header/dri:objIdentifier/text()", namespaces);
xpathSelectorCollectionDate = this.saxonHelper.help().prepareXPathSelector("//oai:header/dri:dateOfCollection/text()", namespaces);
xpathSelectorTransformationDate = this.saxonHelper.help().prepareXPathSelector("//oai:header/dri:dateOfTransformation/text()", namespaces);
xpathSelectorDatasourceName = this.saxonHelper.help().prepareXPathSelector("//oai:header/dri:datasourcename/text()", namespaces);
xpathSelectorDatasourceApi = this.saxonHelper.help().prepareXPathSelector("//oai:header/dri:datasourceapi/text()", namespaces);
xpathSelectorRDF = this.saxonHelper.help().prepareXPathSelector("//oai:metadata/rdf:RDF", namespaces);
}
public SaxonHelper getSaxonHelper() {
return saxonHelper;
}
public void setSaxonHelper(final SaxonHelper saxonHelper) {
this.saxonHelper = saxonHelper;
}
}