102 lines
3.7 KiB
Java
102 lines
3.7 KiB
Java
package eu.dnetlib.ariadneplus.rdf;
|
|
|
|
import java.util.Map;
|
|
|
|
import javax.annotation.PostConstruct;
|
|
|
|
import org.apache.commons.logging.Log;
|
|
import org.apache.commons.logging.LogFactory;
|
|
import org.springframework.beans.factory.annotation.Autowired;
|
|
import org.springframework.stereotype.Component;
|
|
|
|
import com.google.common.collect.Maps;
|
|
|
|
import eu.dnetlib.ariadneplus.publisher.SaxonHelper;
|
|
import net.sf.saxon.s9api.SaxonApiException;
|
|
import net.sf.saxon.s9api.Serializer;
|
|
import net.sf.saxon.s9api.XPathSelector;
|
|
|
|
/**
|
|
* @author enrico.ottonello
|
|
*
|
|
*/
|
|
|
|
@Component
|
|
public class RecordParserHelper {
|
|
|
|
private static final Log log = LogFactory.getLog(RecordParserHelper.class);
|
|
|
|
public static final String OAI_NAMESPACE_URI = "http://www.openarchives.org/OAI/2.0/";
|
|
public static final String DRI_NAMESPACE_URI = "http://www.driver-repository.eu/namespace/dri";
|
|
public static final String RDF_NAMESPACE_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
|
|
|
|
@Autowired
|
|
private SaxonHelper saxonHelper;
|
|
|
|
private XPathSelector xpathSelectorObjIdentifier;
|
|
private XPathSelector xpathSelectorCollectionDate;
|
|
private XPathSelector xpathSelectorTransformationDate;
|
|
private XPathSelector xpathSelectorDatasourceName;
|
|
private XPathSelector xpathSelectorDatasourceApi;
|
|
private XPathSelector xpathSelectorRDF;
|
|
|
|
@PostConstruct
|
|
public void init() throws SaxonApiException {
|
|
prepareXpathSelectors();
|
|
}
|
|
|
|
public String getCollectionDate(final String record) {
|
|
return extractFromRecord(record, xpathSelectorCollectionDate);
|
|
}
|
|
|
|
public String getTransformationDate(final String record) {
|
|
return extractFromRecord(record, xpathSelectorTransformationDate);
|
|
}
|
|
|
|
public String getDatasourceName(final String record) {
|
|
return extractFromRecord(record, xpathSelectorDatasourceName);
|
|
}
|
|
|
|
public String getDatasourceApi(final String record) {
|
|
return extractFromRecord(record, xpathSelectorDatasourceApi);
|
|
}
|
|
|
|
public String getObjIdentifier(final String record) {
|
|
return extractFromRecord(record, xpathSelectorObjIdentifier);
|
|
}
|
|
|
|
public String getRDF(final String record) {
|
|
return extractFromRecord(record, xpathSelectorRDF);
|
|
}
|
|
|
|
private String extractFromRecord(final String record, final XPathSelector xPathSelector) {
|
|
try {
|
|
return this.saxonHelper.help().setSerializerProperty(Serializer.Property.OMIT_XML_DECLARATION, "yes").evaluateSingleAsString(record, xPathSelector);
|
|
} catch (SaxonApiException e) {
|
|
log.error(e);
|
|
throw new RuntimeException("Cannot extract content from path "+ xPathSelector.toString(), e);
|
|
}
|
|
}
|
|
|
|
private void prepareXpathSelectors() throws SaxonApiException {
|
|
Map<String, String> namespaces = Maps.newHashMap();
|
|
namespaces.put("oai", OAI_NAMESPACE_URI);
|
|
namespaces.put("dri", DRI_NAMESPACE_URI);
|
|
namespaces.put("rdf", RDF_NAMESPACE_URI);
|
|
xpathSelectorObjIdentifier = this.saxonHelper.help().prepareXPathSelector("//oai:header/dri:objIdentifier/text()", namespaces);
|
|
xpathSelectorCollectionDate = this.saxonHelper.help().prepareXPathSelector("//oai:header/dri:dateOfCollection/text()", namespaces);
|
|
xpathSelectorTransformationDate = this.saxonHelper.help().prepareXPathSelector("//oai:header/dri:dateOfTransformation/text()", namespaces);
|
|
xpathSelectorDatasourceName = this.saxonHelper.help().prepareXPathSelector("//oai:header/dri:datasourcename/text()", namespaces);
|
|
xpathSelectorDatasourceApi = this.saxonHelper.help().prepareXPathSelector("//oai:header/dri:datasourceapi/text()", namespaces);
|
|
xpathSelectorRDF = this.saxonHelper.help().prepareXPathSelector("//oai:metadata/rdf:RDF", namespaces);
|
|
}
|
|
|
|
public SaxonHelper getSaxonHelper() {
|
|
return saxonHelper;
|
|
}
|
|
|
|
public void setSaxonHelper(final SaxonHelper saxonHelper) {
|
|
this.saxonHelper = saxonHelper;
|
|
}
|
|
}
|