fixed PersonCleaner extension functions

This commit is contained in:
Claudio Atzori 2021-04-27 10:10:06 +02:00
parent ef4bfd82e2
commit fa42026590
3 changed files with 15 additions and 35 deletions

View File

@ -156,7 +156,7 @@ public class TransformSparkJobNode {
* @return * @return
*/ */
private static int getRepartitionNumber(long totalInput, Integer rpt) { private static int getRepartitionNumber(long totalInput, Integer rpt) {
return (int) (totalInput / rpt); return Math.max(1, (int) (totalInput / rpt));
} }
} }

View File

@ -4,7 +4,6 @@ package eu.dnetlib.dhp.transformation.xslt;
import static eu.dnetlib.dhp.transformation.xslt.XSLTTransformationFunction.QNAME_BASE_URI; import static eu.dnetlib.dhp.transformation.xslt.XSLTTransformationFunction.QNAME_BASE_URI;
import java.io.Serializable; import java.io.Serializable;
// import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
import java.text.Normalizer; import java.text.Normalizer;
import java.util.List; import java.util.List;
@ -18,22 +17,10 @@ import com.google.common.hash.Hashing;
import eu.dnetlib.dhp.transformation.xslt.utils.Capitalize; import eu.dnetlib.dhp.transformation.xslt.utils.Capitalize;
import eu.dnetlib.dhp.transformation.xslt.utils.DotAbbreviations; import eu.dnetlib.dhp.transformation.xslt.utils.DotAbbreviations;
import net.sf.saxon.s9api.ExtensionFunction; import net.sf.saxon.s9api.*;
import net.sf.saxon.s9api.ItemType;
import net.sf.saxon.s9api.OccurrenceIndicator;
import net.sf.saxon.s9api.QName;
import net.sf.saxon.s9api.SaxonApiException;
import net.sf.saxon.s9api.SequenceType;
import net.sf.saxon.s9api.XdmValue;
//import eu.dnetlib.pace.clustering.NGramUtils;
//import eu.dnetlib.pace.util.Capitalise;
//import eu.dnetlib.pace.util.DotAbbreviations;
public class PersonCleaner implements ExtensionFunction, Serializable { public class PersonCleaner implements ExtensionFunction, Serializable {
/**
*
*/
private static final long serialVersionUID = 1L; private static final long serialVersionUID = 1L;
private List<String> firstname = Lists.newArrayList(); private List<String> firstname = Lists.newArrayList();
private List<String> surname = Lists.newArrayList(); private List<String> surname = Lists.newArrayList();
@ -45,7 +32,7 @@ public class PersonCleaner implements ExtensionFunction, Serializable {
} }
public String normalize(String s) { private String normalize(String s) {
s = Normalizer.normalize(s, Normalizer.Form.NFD); // was NFD s = Normalizer.normalize(s, Normalizer.Form.NFD); // was NFD
s = s.replaceAll("\\(.+\\)", ""); s = s.replaceAll("\\(.+\\)", "");
s = s.replaceAll("\\[.+\\]", ""); s = s.replaceAll("\\[.+\\]", "");
@ -184,7 +171,7 @@ public class PersonCleaner implements ExtensionFunction, Serializable {
@Override @Override
public QName getName() { public QName getName() {
return new QName(QNAME_BASE_URI + "/person", "person"); return new QName(QNAME_BASE_URI + "/person", "normalize");
} }
@Override @Override
@ -194,13 +181,18 @@ public class PersonCleaner implements ExtensionFunction, Serializable {
@Override @Override
public SequenceType[] getArgumentTypes() { public SequenceType[] getArgumentTypes() {
// TODO Auto-generated method stub return new SequenceType[] {
return null; SequenceType.makeSequenceType(ItemType.STRING, OccurrenceIndicator.ZERO_OR_ONE)
};
} }
@Override @Override
public XdmValue call(XdmValue[] arguments) throws SaxonApiException { public XdmValue call(XdmValue[] xdmValues) throws SaxonApiException {
// TODO Auto-generated method stub XdmValue r = xdmValues[0];
return null; if (r.size() == 0) {
return new XdmAtomicValue("");
}
final String currentValue = xdmValues[0].itemAt(0).getStringValue();
return new XdmAtomicValue(normalize(currentValue));
} }
} }

View File

@ -68,12 +68,6 @@
<xsl:call-template name="validRecord" /> <xsl:call-template name="validRecord" />
</xsl:template> </xsl:template>
<xsl:template name="terminate">
<xsl:message terminate="yes">
record is not compliant, transformation is interrupted.
</xsl:message>
</xsl:template>
<xsl:template name="validRecord"> <xsl:template name="validRecord">
<record> <record>
<xsl:apply-templates select="//*[local-name() = 'header']" /> <xsl:apply-templates select="//*[local-name() = 'header']" />
@ -282,9 +276,6 @@
<xsl:value-of select="$varEmbargoEndDate"/> <xsl:value-of select="$varEmbargoEndDate"/>
</oaf:embargoenddate> </oaf:embargoenddate>
</xsl:when> </xsl:when>
<xsl:otherwise>
<xsl:call-template name="terminate"/>
</xsl:otherwise>
</xsl:choose> </xsl:choose>
</xsl:if> </xsl:if>
@ -310,9 +301,6 @@
</dr:CobjCategory> </dr:CobjCategory>
--> -->
</xsl:when> </xsl:when>
<xsl:otherwise>
<xsl:call-template name="terminate"/>
</xsl:otherwise>
</xsl:choose> </xsl:choose>
<!-- review status --> <!-- review status -->