70 lines
2.2 KiB
Java
70 lines
2.2 KiB
Java
package eu.dnetlib.common.mapping.xslt.functions;
|
|
|
|
import java.util.HashMap;
|
|
import java.util.Map;
|
|
|
|
import org.apache.commons.lang3.StringUtils;
|
|
|
|
import eu.dnetlib.common.clients.VocabularyClient;
|
|
import eu.dnetlib.common.mapping.xslt.XsltTransformerFactory;
|
|
import eu.dnetlib.domain.vocabulary.Synonym;
|
|
import net.sf.saxon.s9api.ExtensionFunction;
|
|
import net.sf.saxon.s9api.ItemType;
|
|
import net.sf.saxon.s9api.OccurrenceIndicator;
|
|
import net.sf.saxon.s9api.QName;
|
|
import net.sf.saxon.s9api.SaxonApiException;
|
|
import net.sf.saxon.s9api.SequenceType;
|
|
import net.sf.saxon.s9api.XdmAtomicValue;
|
|
import net.sf.saxon.s9api.XdmValue;
|
|
|
|
public class XsltVocabularyCleaner implements ExtensionFunction {
|
|
|
|
private static final String SEPARATOR = "@#@";
|
|
|
|
private final Map<String, String> termsMap = new HashMap<>();
|
|
|
|
public XsltVocabularyCleaner(final VocabularyClient vocabularyClient) {
|
|
vocabularyClient.listVocs().forEach(voc -> {
|
|
vocabularyClient.listTerms(voc.getId()).forEach(term -> {
|
|
for (final Synonym s : term.getSynonyms()) {
|
|
final String k = (voc.getId() + SEPARATOR + s.getTerm()).toLowerCase();
|
|
final String v = term.getCode();
|
|
this.termsMap.put(k, v);
|
|
}
|
|
});
|
|
});
|
|
}
|
|
|
|
@Override
|
|
public QName getName() {
|
|
return new QName(XsltTransformerFactory.QNAME_BASE_URI + "/vocs", "clean");
|
|
}
|
|
|
|
@Override
|
|
public SequenceType getResultType() {
|
|
return SequenceType.makeSequenceType(ItemType.STRING, OccurrenceIndicator.ONE_OR_MORE);
|
|
}
|
|
|
|
@Override
|
|
public SequenceType[] getArgumentTypes() {
|
|
return new SequenceType[] {
|
|
SequenceType.makeSequenceType(ItemType.STRING, OccurrenceIndicator.ZERO_OR_MORE),
|
|
SequenceType.makeSequenceType(ItemType.STRING, OccurrenceIndicator.ONE)
|
|
};
|
|
}
|
|
|
|
@Override
|
|
public XdmValue call(final XdmValue[] xdmValues) throws SaxonApiException {
|
|
final XdmValue r = xdmValues[0];
|
|
if (r.size() == 0) { return new XdmAtomicValue(""); }
|
|
final String currentValue = xdmValues[0].itemAt(0).getStringValue();
|
|
final String vocId = xdmValues[1].itemAt(0).getStringValue();
|
|
|
|
final String key = (vocId + SEPARATOR + currentValue).toLowerCase();
|
|
final String cleanedValue = this.termsMap.getOrDefault(key, currentValue);
|
|
|
|
return new XdmAtomicValue(StringUtils.isNotBlank(cleanedValue) ? cleanedValue : currentValue);
|
|
}
|
|
|
|
}
|