dnet-docker/dnet-app/libs/dnet-common-mapping/src/main/java/eu/dnetlib/common/mapping/xslt/functions/XsltVocabularyCleaner.java

70 lines
2.2 KiB
Java

package eu.dnetlib.common.mapping.xslt.functions;
import java.util.HashMap;
import java.util.Map;
import org.apache.commons.lang3.StringUtils;
import eu.dnetlib.common.clients.VocabularyClient;
import eu.dnetlib.common.mapping.xslt.XsltTransformerFactory;
import eu.dnetlib.domain.vocabulary.Synonym;
import net.sf.saxon.s9api.ExtensionFunction;
import net.sf.saxon.s9api.ItemType;
import net.sf.saxon.s9api.OccurrenceIndicator;
import net.sf.saxon.s9api.QName;
import net.sf.saxon.s9api.SaxonApiException;
import net.sf.saxon.s9api.SequenceType;
import net.sf.saxon.s9api.XdmAtomicValue;
import net.sf.saxon.s9api.XdmValue;
public class XsltVocabularyCleaner implements ExtensionFunction {
private static final String SEPARATOR = "@#@";
private final Map<String, String> termsMap = new HashMap<>();
public XsltVocabularyCleaner(final VocabularyClient vocabularyClient) {
vocabularyClient.listVocs().forEach(voc -> {
vocabularyClient.listTerms(voc.getId()).forEach(term -> {
for (final Synonym s : term.getSynonyms()) {
final String k = (voc.getId() + SEPARATOR + s.getTerm()).toLowerCase();
final String v = term.getCode();
this.termsMap.put(k, v);
}
});
});
}
@Override
public QName getName() {
return new QName(XsltTransformerFactory.QNAME_BASE_URI + "/vocs", "clean");
}
@Override
public SequenceType getResultType() {
return SequenceType.makeSequenceType(ItemType.STRING, OccurrenceIndicator.ONE_OR_MORE);
}
@Override
public SequenceType[] getArgumentTypes() {
return new SequenceType[] {
SequenceType.makeSequenceType(ItemType.STRING, OccurrenceIndicator.ZERO_OR_MORE),
SequenceType.makeSequenceType(ItemType.STRING, OccurrenceIndicator.ONE)
};
}
@Override
public XdmValue call(final XdmValue[] xdmValues) throws SaxonApiException {
final XdmValue r = xdmValues[0];
if (r.size() == 0) { return new XdmAtomicValue(""); }
final String currentValue = xdmValues[0].itemAt(0).getStringValue();
final String vocId = xdmValues[1].itemAt(0).getStringValue();
final String key = (vocId + SEPARATOR + currentValue).toLowerCase();
final String cleanedValue = this.termsMap.getOrDefault(key, currentValue);
return new XdmAtomicValue(StringUtils.isNotBlank(cleanedValue) ? cleanedValue : currentValue);
}
}