forked from D-Net/dnet-hadoop
added Saxon-HE extension functions and Transformer factory class
This commit is contained in:
parent
d3b96f102b
commit
956da2f923
|
@ -42,6 +42,10 @@
|
|||
<groupId>com.rabbitmq</groupId>
|
||||
<artifactId>amqp-client</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>net.sf.saxon</groupId>
|
||||
<artifactId>Saxon-HE</artifactId>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
</project>
|
||||
|
|
|
@ -0,0 +1,32 @@
|
|||
package eu.dnetlib.dhp.utils.saxon;
|
||||
|
||||
import net.sf.saxon.expr.XPathContext;
|
||||
import net.sf.saxon.lib.ExtensionFunctionCall;
|
||||
import net.sf.saxon.lib.ExtensionFunctionDefinition;
|
||||
import net.sf.saxon.om.Sequence;
|
||||
import net.sf.saxon.om.StructuredQName;
|
||||
import net.sf.saxon.trans.XPathException;
|
||||
|
||||
public abstract class AbstractExtensionFunction extends ExtensionFunctionDefinition {
|
||||
|
||||
public static String DEFAULT_SAXON_EXT_NS_URI = "http://www.d-net.research-infrastructures.eu/saxon-extension";
|
||||
|
||||
public abstract String getName();
|
||||
public abstract Sequence doCall(XPathContext context, Sequence[] arguments) throws XPathException;
|
||||
|
||||
@Override
|
||||
public StructuredQName getFunctionQName() {
|
||||
return new StructuredQName("dnet", DEFAULT_SAXON_EXT_NS_URI, getName());
|
||||
}
|
||||
|
||||
@Override
|
||||
public ExtensionFunctionCall makeCallExpression() {
|
||||
return new ExtensionFunctionCall() {
|
||||
@Override
|
||||
public Sequence call(XPathContext context, Sequence[] arguments) throws XPathException {
|
||||
return doCall(context, arguments);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,67 @@
|
|||
package eu.dnetlib.dhp.utils.saxon;
|
||||
|
||||
import net.sf.saxon.expr.XPathContext;
|
||||
import net.sf.saxon.om.Item;
|
||||
import net.sf.saxon.om.Sequence;
|
||||
import net.sf.saxon.trans.XPathException;
|
||||
import net.sf.saxon.value.SequenceType;
|
||||
import net.sf.saxon.value.StringValue;
|
||||
|
||||
import java.text.ParseException;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.Calendar;
|
||||
import java.util.GregorianCalendar;
|
||||
|
||||
public class ExtractYear extends AbstractExtensionFunction {
|
||||
|
||||
private static final String[] dateFormats = { "yyyy-MM-dd", "yyyy/MM/dd" };
|
||||
|
||||
@Override
|
||||
public String getName() {
|
||||
return "extractYear";
|
||||
}
|
||||
|
||||
@Override
|
||||
public Sequence doCall(XPathContext context, Sequence[] arguments) throws XPathException {
|
||||
if (arguments == null | arguments.length == 0) {
|
||||
return new StringValue("");
|
||||
}
|
||||
final Item item = arguments[0].head();
|
||||
if (item == null) {
|
||||
return new StringValue("");
|
||||
}
|
||||
return new StringValue(_year(item.getStringValue()));
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getMinimumNumberOfArguments() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getMaximumNumberOfArguments() {
|
||||
return 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SequenceType[] getArgumentTypes() {
|
||||
return new SequenceType[] { SequenceType.OPTIONAL_ITEM };
|
||||
}
|
||||
|
||||
@Override
|
||||
public SequenceType getResultType(SequenceType[] suppliedArgumentTypes) {
|
||||
return SequenceType.SINGLE_STRING;
|
||||
}
|
||||
|
||||
private String _year(String s) {
|
||||
Calendar c = new GregorianCalendar();
|
||||
for (String format : dateFormats) {
|
||||
try {
|
||||
c.setTime(new SimpleDateFormat(format).parse(s));
|
||||
String year = String.valueOf(c.get(Calendar.YEAR));
|
||||
return year;
|
||||
} catch (ParseException e) {}
|
||||
}
|
||||
return "";
|
||||
}
|
||||
}
|
|
@ -0,0 +1,66 @@
|
|||
package eu.dnetlib.dhp.utils.saxon;
|
||||
|
||||
import net.sf.saxon.expr.XPathContext;
|
||||
import net.sf.saxon.om.Sequence;
|
||||
import net.sf.saxon.trans.XPathException;
|
||||
import net.sf.saxon.value.SequenceType;
|
||||
import net.sf.saxon.value.StringValue;
|
||||
|
||||
import java.text.ParseException;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.Date;
|
||||
|
||||
public class NormalizeDate extends AbstractExtensionFunction {
|
||||
|
||||
private static final String[] normalizeDateFormats = { "yyyy-MM-dd'T'hh:mm:ss", "yyyy-MM-dd", "yyyy/MM/dd", "yyyy" };
|
||||
|
||||
private static final String normalizeOutFormat = new String("yyyy-MM-dd'T'hh:mm:ss'Z'");
|
||||
|
||||
@Override
|
||||
public String getName() {
|
||||
return "normalizeDate";
|
||||
}
|
||||
|
||||
@Override
|
||||
public Sequence doCall(XPathContext context, Sequence[] arguments) throws XPathException {
|
||||
if (arguments == null | arguments.length == 0) {
|
||||
return new StringValue("");
|
||||
}
|
||||
String s = arguments[0].head().getStringValue();
|
||||
return new StringValue(_year(s));
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getMinimumNumberOfArguments() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getMaximumNumberOfArguments() {
|
||||
return 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SequenceType[] getArgumentTypes() {
|
||||
return new SequenceType[] { SequenceType.OPTIONAL_ITEM };
|
||||
}
|
||||
|
||||
@Override
|
||||
public SequenceType getResultType(SequenceType[] suppliedArgumentTypes) {
|
||||
return SequenceType.SINGLE_STRING;
|
||||
}
|
||||
|
||||
private String _year(String s) {
|
||||
final String date = s != null ? s.trim() : "";
|
||||
|
||||
for (String format : normalizeDateFormats) {
|
||||
try {
|
||||
Date parse = new SimpleDateFormat(format).parse(date);
|
||||
String res = new SimpleDateFormat(normalizeOutFormat).format(parse);
|
||||
return res;
|
||||
} catch (ParseException e) {}
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,53 @@
|
|||
package eu.dnetlib.dhp.utils.saxon;
|
||||
|
||||
import net.sf.saxon.expr.XPathContext;
|
||||
import net.sf.saxon.om.Sequence;
|
||||
import net.sf.saxon.trans.XPathException;
|
||||
import net.sf.saxon.value.SequenceType;
|
||||
import net.sf.saxon.value.StringValue;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
public class PickFirst extends AbstractExtensionFunction {
|
||||
|
||||
@Override
|
||||
public String getName() {
|
||||
return "pickFirst";
|
||||
}
|
||||
|
||||
@Override
|
||||
public Sequence doCall(XPathContext context, Sequence[] arguments) throws XPathException {
|
||||
if (arguments == null | arguments.length == 0) {
|
||||
return new StringValue("");
|
||||
}
|
||||
String s1 = arguments[0].head().getStringValue();
|
||||
|
||||
if (arguments.length > 1) {
|
||||
String s2 = arguments[1].head().getStringValue();
|
||||
|
||||
return new StringValue(StringUtils.isNotBlank(s1) ? s1 : StringUtils.isNotBlank(s2) ? s2 : "");
|
||||
} else {
|
||||
return new StringValue(StringUtils.isNotBlank(s1) ? s1 : "");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getMinimumNumberOfArguments() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getMaximumNumberOfArguments() {
|
||||
return 2;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SequenceType[] getArgumentTypes() {
|
||||
return new SequenceType[] { SequenceType.OPTIONAL_ITEM };
|
||||
}
|
||||
|
||||
@Override
|
||||
public SequenceType getResultType(SequenceType[] suppliedArgumentTypes) {
|
||||
return SequenceType.SINGLE_STRING;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,30 @@
|
|||
package eu.dnetlib.dhp.utils.saxon;
|
||||
|
||||
import net.sf.saxon.Configuration;
|
||||
import net.sf.saxon.TransformerFactoryImpl;
|
||||
|
||||
import javax.xml.transform.Transformer;
|
||||
import javax.xml.transform.TransformerException;
|
||||
import javax.xml.transform.stream.StreamSource;
|
||||
import java.io.StringReader;
|
||||
|
||||
public class SaxonTransformerFactory {
|
||||
|
||||
/**
|
||||
* Creates the index record transformer from the given XSLT
|
||||
* @param xslt
|
||||
* @return
|
||||
* @throws TransformerException
|
||||
*/
|
||||
public static Transformer newInstance(final String xslt) throws TransformerException {
|
||||
|
||||
final TransformerFactoryImpl factory = new TransformerFactoryImpl();
|
||||
final Configuration conf = factory.getConfiguration();
|
||||
conf.registerExtensionFunction(new ExtractYear());
|
||||
conf.registerExtensionFunction(new NormalizeDate());
|
||||
conf.registerExtensionFunction(new PickFirst());
|
||||
|
||||
return factory.newTransformer(new StreamSource(new StringReader(xslt)));
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue