refactoring

This commit is contained in:
Michele Artini 2024-02-08 15:26:53 +01:00
parent 98d8499eec
commit 7fbffa4afa
15 changed files with 218 additions and 282 deletions

View File

@ -3,12 +3,15 @@ package eu.dnetlib.services.oai;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.boot.autoconfigure.domain.EntityScan;
import org.springframework.context.annotation.Bean;
import eu.dnetlib.common.app.AbstractDnetApp;
import eu.dnetlib.domain.oai.OaiConfiguration;
import eu.dnetlib.domain.oai.OaiMetadataFormat;
import eu.dnetlib.common.clients.DnetServiceClientFactory;
import eu.dnetlib.common.mapping.xslt.XsltTransformerFactory;
import eu.dnetlib.domain.oai.ExportedOaiRecord;
import eu.dnetlib.domain.oai.ExportedOaiSet;
import eu.dnetlib.domain.oai.OaiConfiguration;
import eu.dnetlib.domain.oai.OaiMetadataFormat;
import eu.dnetlib.domain.service.ServiceType;
@SpringBootApplication
@ -23,4 +26,9 @@ public class OaiApplication extends AbstractDnetApp {
protected ServiceType serviceType() {
return ServiceType.oai_manager;
}
@Bean
public XsltTransformerFactory xsltTransformFactory(final DnetServiceClientFactory clientFactory) {
return new XsltTransformerFactory(clientFactory);
}
}

View File

@ -13,7 +13,8 @@ import org.springframework.data.domain.PageRequest;
import org.springframework.data.domain.Sort;
import org.springframework.stereotype.Service;
import eu.dnetlib.common.mapping.xslt.XsltUtils;
import eu.dnetlib.common.mapping.RecordTransformer;
import eu.dnetlib.common.mapping.xslt.XsltTransformerFactory;
import eu.dnetlib.domain.oai.ExportedOaiMetadataFormat;
import eu.dnetlib.domain.oai.ExportedOaiRecord;
import eu.dnetlib.domain.oai.ExportedOaiSet;
@ -42,6 +43,9 @@ public class OaiService {
@Autowired
private OaiRecordRepository oaiRecordRepository;
@Autowired
private XsltTransformerFactory xsltTransformerFactory;
private static final int CONFIGURATION_ID = 0;
private static final int OAI_PAGE_SIZE = 100;
@ -111,7 +115,8 @@ public class OaiService {
.orElseThrow(() -> new DnetRuntimeException("Invalid metadata format: " + metadataPrefix));
try {
record.setBody(XsltUtils.applyXslt(record.getBody(), xslt));
final RecordTransformer<String, String> transformer = this.xsltTransformerFactory.getTransformerByXSLT(xslt, null);
record.setBody(transformer.transform(record.getBody()));
} catch (final Exception e) {
throw new DnetRuntimeException("Error processing record", e);
}

View File

@ -1,9 +1,5 @@
package eu.dnetlib.wfs;
import java.util.ArrayList;
import java.util.List;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
@ -15,8 +11,7 @@ import eu.dnetlib.common.app.AbstractDnetApp;
import eu.dnetlib.common.clients.DnetServiceClientFactory;
import eu.dnetlib.common.index.solr.SolrService;
import eu.dnetlib.common.mapping.cleaner.CleanerFactory;
import eu.dnetlib.common.mapping.xslt.DnetXsltFunction;
import eu.dnetlib.common.mapping.xslt.XsltTransformFactory;
import eu.dnetlib.common.mapping.xslt.XsltTransformerFactory;
import eu.dnetlib.common.mdstores.backends.sql.MDStoreSqlBackend;
import eu.dnetlib.domain.service.ServiceType;
import eu.dnetlib.domain.wfs.conf.WfConfiguration;
@ -34,9 +29,6 @@ import eu.dnetlib.domain.wfs.subscriptions.WfSubscription;
@EnableScheduling
public class WfExecutorApplication extends AbstractDnetApp {
@Autowired(required = false)
private final List<DnetXsltFunction> xsltFunctions = new ArrayList<>();
@Value("${mdstores.data.datasource.url}")
private String databaseUrl;
@ -67,8 +59,8 @@ public class WfExecutorApplication extends AbstractDnetApp {
}
@Bean
public XsltTransformFactory xsltTransformFactory(final DnetServiceClientFactory clientFactory) {
return new XsltTransformFactory(clientFactory, this.xsltFunctions);
public XsltTransformerFactory xsltTransformFactory(final DnetServiceClientFactory clientFactory) {
return new XsltTransformerFactory(clientFactory);
}
@Bean

View File

@ -17,7 +17,7 @@ import eu.dnetlib.common.clients.DnetServiceClientFactory;
import eu.dnetlib.common.clients.DsmClient;
import eu.dnetlib.common.clients.MDStoreManagerClient;
import eu.dnetlib.common.mapping.RecordTransformer;
import eu.dnetlib.common.mapping.xslt.XsltTransformFactory;
import eu.dnetlib.common.mapping.xslt.XsltTransformerFactory;
import eu.dnetlib.common.mdstores.backends.sql.MDStoreSqlBackend;
import eu.dnetlib.domain.dsm.Api;
import eu.dnetlib.domain.dsm.Datasource;
@ -49,7 +49,7 @@ public class MdTransformJobNode extends ProcessNode {
private String ruleId;
@Autowired
private XsltTransformFactory xsltTransformFactory;
private XsltTransformerFactory xsltTransformFactory;
@Autowired
private DnetServiceClientFactory clientFactory;
@ -64,7 +64,7 @@ public class MdTransformJobNode extends ProcessNode {
final Map<String, Object> params = new HashMap<>();
// TODO (LOW PRIORITY): which params ?
final RecordTransformer<String, String> xslt = this.xsltTransformFactory.getTransformer(this.ruleId, params);
final RecordTransformer<String, String> xslt = this.xsltTransformFactory.getTransformerById(this.ruleId, params);
final MDStoreManagerClient mdstoreManager = this.clientFactory.getClient(MDStoreManagerClient.class);
final MDStoreVersion inputVersion = mdstoreManager.startReading(this.inputMdId);

View File

@ -1,9 +1,5 @@
package eu.dnetlib.wfs;
import java.util.ArrayList;
import java.util.List;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.boot.autoconfigure.domain.EntityScan;
@ -12,8 +8,7 @@ import org.springframework.context.annotation.Bean;
import eu.dnetlib.common.app.AbstractDnetApp;
import eu.dnetlib.common.clients.DnetServiceClientFactory;
import eu.dnetlib.common.mapping.cleaner.CleanerFactory;
import eu.dnetlib.common.mapping.xslt.DnetXsltFunction;
import eu.dnetlib.common.mapping.xslt.XsltTransformFactory;
import eu.dnetlib.common.mapping.xslt.XsltTransformerFactory;
import eu.dnetlib.domain.service.ServiceType;
import eu.dnetlib.domain.wfs.conf.WfConfiguration;
import eu.dnetlib.domain.wfs.conf.WfSection;
@ -29,9 +24,6 @@ import eu.dnetlib.domain.wfs.subscriptions.WfSubscription;
})
public class WfManagerApplication extends AbstractDnetApp {
@Autowired(required = false)
private final List<DnetXsltFunction> xsltFunctions = new ArrayList<>();
public static void main(final String[] args) {
SpringApplication.run(WfManagerApplication.class, args);
}
@ -42,8 +34,8 @@ public class WfManagerApplication extends AbstractDnetApp {
}
@Bean
public XsltTransformFactory xsltTransformFactory(final DnetServiceClientFactory clientFactory) {
return new XsltTransformFactory(clientFactory, this.xsltFunctions);
public XsltTransformerFactory xsltTransformFactory(final DnetServiceClientFactory clientFactory) {
return new XsltTransformerFactory(clientFactory);
}
@Bean

View File

@ -17,7 +17,7 @@ import org.springframework.web.bind.annotation.RestController;
import eu.dnetlib.common.controller.DnetRestController;
import eu.dnetlib.common.mapping.RecordTransformer;
import eu.dnetlib.common.mapping.cleaner.CleanerFactory;
import eu.dnetlib.common.mapping.xslt.XsltTransformFactory;
import eu.dnetlib.common.mapping.xslt.XsltTransformerFactory;
import jakarta.servlet.http.HttpServletRequest;
import jakarta.servlet.http.HttpServletResponse;
@ -29,7 +29,7 @@ public class MappingAjaxController extends DnetRestController {
private CleanerFactory cleanerFactory;
@Autowired
private XsltTransformFactory xsltFactory;
private XsltTransformerFactory xsltFactory;
@PostMapping(value = "/clean", consumes = {
MediaType.TEXT_PLAIN_VALUE, MediaType.APPLICATION_XML_VALUE,
@ -41,7 +41,7 @@ public class MappingAjaxController extends DnetRestController {
@PostMapping(value = "/xsltTransform", consumes = { MediaType.TEXT_PLAIN_VALUE,
MediaType.APPLICATION_XML_VALUE, }, produces = MediaType.APPLICATION_XML_VALUE)
public void xsltTransform(@RequestParam final String rule, final HttpServletRequest req, final HttpServletResponse res) throws Exception {
transform(req, res, this.xsltFactory.getTransformer(rule, new HashMap<>()));
transform(req, res, this.xsltFactory.getTransformerById(rule, new HashMap<>()));
}
private void transform(final HttpServletRequest req, final HttpServletResponse res, final RecordTransformer<String, String> transformer)

View File

@ -13,7 +13,7 @@ public class VocabularyClient extends DnetServiceClient {
@Cacheable("vocabulary_list")
public List<Vocabulary> listVocs() {
return Arrays.asList(httpGet("/api/vocs", Vocabulary[].class, Map.of()));
return Arrays.asList(httpGet("/api/vocs/", Vocabulary[].class, Map.of()));
}
@Cacheable("vocabulary_terms")

View File

@ -1,8 +0,0 @@
package eu.dnetlib.common.mapping.xslt;
import net.sf.saxon.s9api.ExtensionFunction;
public interface DnetXsltFunction {
ExtensionFunction asExtensionFunction();
}

View File

@ -1,73 +0,0 @@
package eu.dnetlib.common.mapping.xslt;
import java.time.LocalDate;
import java.time.ZoneId;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeParseException;
import org.apache.commons.lang3.StringUtils;
import org.springframework.stereotype.Component;
import com.github.sisyphsu.dateparser.DateParserUtils;
import net.sf.saxon.s9api.ExtensionFunction;
import net.sf.saxon.s9api.ItemType;
import net.sf.saxon.s9api.OccurrenceIndicator;
import net.sf.saxon.s9api.QName;
import net.sf.saxon.s9api.SaxonApiException;
import net.sf.saxon.s9api.SequenceType;
import net.sf.saxon.s9api.XdmAtomicValue;
import net.sf.saxon.s9api.XdmValue;
@Component
public class XsltDateCleaner implements DnetXsltFunction {
public static final String DATE_FORMAT = "yyyy-MM-dd";
@Override
public ExtensionFunction asExtensionFunction() {
return new ExtensionFunction() {
@Override
public QName getName() {
return new QName(XsltTransformFactory.QNAME_BASE_URI + "/dateISO", "dateISO");
}
@Override
public SequenceType getResultType() {
return SequenceType.makeSequenceType(ItemType.STRING, OccurrenceIndicator.ZERO_OR_ONE);
}
@Override
public SequenceType[] getArgumentTypes() {
return new SequenceType[] {
SequenceType.makeSequenceType(ItemType.STRING, OccurrenceIndicator.ZERO_OR_ONE)
};
}
@Override
public XdmValue call(final XdmValue[] xdmValues) throws SaxonApiException {
final XdmValue r = xdmValues[0];
if (r.size() == 0) { return new XdmAtomicValue(""); }
final String currentValue = xdmValues[0].itemAt(0).getStringValue();
return new XdmAtomicValue(clean(currentValue));
}
public String clean(final String inputDate) {
if (StringUtils.isBlank(inputDate)) { return null; }
try {
final LocalDate date = DateParserUtils
.parseDate(inputDate.trim())
.toInstant()
.atZone(ZoneId.systemDefault())
.toLocalDate();
return DateTimeFormatter.ofPattern(DATE_FORMAT).format(date);
} catch (final DateTimeParseException e) {
return null;
}
}
};
}
}

View File

@ -1,33 +0,0 @@
package eu.dnetlib.common.mapping.xslt;
import java.util.List;
import java.util.Map;
import eu.dnetlib.common.clients.DnetServiceClientFactory;
import eu.dnetlib.common.clients.SimpleResourceClient;
import eu.dnetlib.common.mapping.RecordTransformer;
import eu.dnetlib.errors.TransformationException;
public class XsltTransformFactory {
public static final String QNAME_BASE_URI = "http://eu/dnetlib/transform";
private final DnetServiceClientFactory clientFactory;
private final List<DnetXsltFunction> xsltFunctions;
public XsltTransformFactory(final DnetServiceClientFactory clientFactory, final List<DnetXsltFunction> xsltFunctions) {
this.clientFactory = clientFactory;
this.xsltFunctions = xsltFunctions;
}
public RecordTransformer<String, String> getTransformer(final String ruleId, final Map<String, Object> initialParams) throws TransformationException {
final String xsltText =
this.clientFactory.getClient(SimpleResourceClient.class)
.findResourceContent(SimpleResourceClient.ResourceType.transformation_rule_xslt, ruleId, String.class);
return XsltUtils.newTransformer(xsltText, initialParams, this.xsltFunctions);
}
}

View File

@ -3,19 +3,21 @@ package eu.dnetlib.common.mapping.xslt;
import java.io.StringWriter;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.xml.transform.stream.StreamSource;
import org.apache.commons.io.IOUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import eu.dnetlib.common.clients.DnetServiceClientFactory;
import eu.dnetlib.common.clients.SimpleResourceClient;
import eu.dnetlib.common.clients.VocabularyClient;
import eu.dnetlib.common.mapping.RecordTransformer;
import eu.dnetlib.common.mapping.xslt.functions.XsltDateCleaner;
import eu.dnetlib.common.mapping.xslt.functions.XsltPersonCleaner;
import eu.dnetlib.common.mapping.xslt.functions.XsltVocabularyCleaner;
import eu.dnetlib.errors.TransformationException;
import net.sf.saxon.s9api.ExtensionFunction;
import net.sf.saxon.s9api.Processor;
import net.sf.saxon.s9api.QName;
import net.sf.saxon.s9api.SaxonApiException;
@ -27,31 +29,29 @@ import net.sf.saxon.s9api.XsltCompiler;
import net.sf.saxon.s9api.XsltExecutable;
import net.sf.saxon.s9api.XsltTransformer;
public class XsltUtils {
public class XsltTransformerFactory {
private static final Log log = LogFactory.getLog(XsltUtils.class);
private final DnetServiceClientFactory clientFactory;
public static String applyXslt(final String xml, final String xslt) throws Exception {
return applyXslt(xml, xslt, new HashMap<>(), new ArrayList<>());
public static final String QNAME_BASE_URI = "http://eu/dnetlib/transform";
public XsltTransformerFactory(final DnetServiceClientFactory clientFactory) {
this.clientFactory = clientFactory;
}
public static String applyXslt(final String xml, final String xslt, final Map<String, Object> params, final List<DnetXsltFunction> xsltFunctions)
throws Exception {
final RecordTransformer<String, String> transformer = newTransformer(xslt, params, xsltFunctions);
return transformer.transform(xml);
public RecordTransformer<String, String> getTransformerById(final String ruleId, final Map<String, Object> params) throws TransformationException {
final String xsltText =
this.clientFactory.getClient(SimpleResourceClient.class)
.findResourceContent(SimpleResourceClient.ResourceType.transformation_rule_xslt, ruleId, String.class);
return getTransformerByXSLT(xsltText, params);
}
public static RecordTransformer<String, String> newTransformer(final String xsltText,
final Map<String, Object> params,
final List<DnetXsltFunction> xsltFunctions) throws TransformationException {
public RecordTransformer<String, String> getTransformerByXSLT(final String xsltText, final Map<String, Object> params) throws TransformationException {
final Processor processor = new Processor(false);
for (final DnetXsltFunction f : xsltFunctions) {
final ExtensionFunction extFunction = f.asExtensionFunction();
processor.registerExtensionFunction(extFunction);
log.info("New XSLT function registered: " + extFunction.getName());
}
processor.registerExtensionFunction(new XsltDateCleaner());
processor.registerExtensionFunction(new XsltPersonCleaner());
processor.registerExtensionFunction(new XsltVocabularyCleaner(this.clientFactory.getClient(VocabularyClient.class)));
final List<XmlProcessingError> errorList = new ArrayList<>();
@ -60,9 +60,7 @@ public class XsltUtils {
params.forEach((k, v) -> comp.setParameter(new QName(k), XdmAtomicValue.makeAtomicValue(v)));
try {
final XsltExecutable xslt = comp
.compile(new StreamSource(IOUtils.toInputStream(xsltText, StandardCharsets.UTF_8)));
final XsltExecutable xslt = comp.compile(new StreamSource(IOUtils.toInputStream(xsltText, StandardCharsets.UTF_8)));
return xml -> {
try {
final XdmNode source = processor

View File

@ -1,77 +0,0 @@
package eu.dnetlib.common.mapping.xslt;
import java.util.HashMap;
import java.util.Map;
import org.apache.commons.lang3.StringUtils;
import org.springframework.beans.factory.annotation.Autowired;
import eu.dnetlib.common.clients.VocabularyClient;
import eu.dnetlib.domain.vocabulary.Synonym;
import net.sf.saxon.s9api.ExtensionFunction;
import net.sf.saxon.s9api.ItemType;
import net.sf.saxon.s9api.OccurrenceIndicator;
import net.sf.saxon.s9api.QName;
import net.sf.saxon.s9api.SaxonApiException;
import net.sf.saxon.s9api.SequenceType;
import net.sf.saxon.s9api.XdmAtomicValue;
import net.sf.saxon.s9api.XdmValue;
public class XsltVocabularyCleaner implements DnetXsltFunction {
@Autowired
private VocabularyClient vocabularyClient;
private static final String SEPARATOR = "@#@";
@Override
public ExtensionFunction asExtensionFunction() {
final Map<String, String> termsMap = new HashMap<>();
vocabularyClient.listVocs().forEach(voc -> {
vocabularyClient.listTerms(voc.getId()).forEach(term -> {
for (final Synonym s : term.getSynonyms()) {
final String k = (voc.getId() + SEPARATOR + s.getTerm()).toLowerCase();
final String v = term.getCode();
termsMap.put(k, v);
}
});
});
return new ExtensionFunction() {
@Override
public QName getName() {
return new QName(XsltTransformFactory.QNAME_BASE_URI + "/clean", "clean");
}
@Override
public SequenceType getResultType() {
return SequenceType.makeSequenceType(ItemType.STRING, OccurrenceIndicator.ONE_OR_MORE);
}
@Override
public SequenceType[] getArgumentTypes() {
return new SequenceType[] {
SequenceType.makeSequenceType(ItemType.STRING, OccurrenceIndicator.ZERO_OR_MORE),
SequenceType.makeSequenceType(ItemType.STRING, OccurrenceIndicator.ONE)
};
}
@Override
public XdmValue call(final XdmValue[] xdmValues) throws SaxonApiException {
final XdmValue r = xdmValues[0];
if (r.size() == 0) { return new XdmAtomicValue(""); }
final String currentValue = xdmValues[0].itemAt(0).getStringValue();
final String vocId = xdmValues[1].itemAt(0).getStringValue();
final String key = (vocId + SEPARATOR + currentValue).toLowerCase();
final String cleanedValue = termsMap.getOrDefault(key, currentValue);
return new XdmAtomicValue(StringUtils.isNotBlank(cleanedValue) ? cleanedValue : currentValue);
}
};
}
}

View File

@ -0,0 +1,68 @@
package eu.dnetlib.common.mapping.xslt.functions;
import java.time.LocalDate;
import java.time.ZoneId;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeParseException;
import org.apache.commons.lang3.StringUtils;
import org.springframework.stereotype.Component;
import com.github.sisyphsu.dateparser.DateParserUtils;
import eu.dnetlib.common.mapping.xslt.XsltTransformerFactory;
import net.sf.saxon.s9api.ExtensionFunction;
import net.sf.saxon.s9api.ItemType;
import net.sf.saxon.s9api.OccurrenceIndicator;
import net.sf.saxon.s9api.QName;
import net.sf.saxon.s9api.SaxonApiException;
import net.sf.saxon.s9api.SequenceType;
import net.sf.saxon.s9api.XdmAtomicValue;
import net.sf.saxon.s9api.XdmValue;
@Component
public class XsltDateCleaner implements ExtensionFunction {
public static final String DATE_FORMAT = "yyyy-MM-dd";
@Override
public QName getName() {
return new QName(XsltTransformerFactory.QNAME_BASE_URI + "/dateISO", "dateISO");
}
@Override
public SequenceType getResultType() {
return SequenceType.makeSequenceType(ItemType.STRING, OccurrenceIndicator.ZERO_OR_ONE);
}
@Override
public SequenceType[] getArgumentTypes() {
return new SequenceType[] {
SequenceType.makeSequenceType(ItemType.STRING, OccurrenceIndicator.ZERO_OR_ONE)
};
}
@Override
public XdmValue call(final XdmValue[] xdmValues) throws SaxonApiException {
final XdmValue r = xdmValues[0];
if (r.size() == 0) { return new XdmAtomicValue(""); }
final String currentValue = xdmValues[0].itemAt(0).getStringValue();
return new XdmAtomicValue(clean(currentValue));
}
public String clean(final String inputDate) {
if (StringUtils.isBlank(inputDate)) { return null; }
try {
final LocalDate date = DateParserUtils
.parseDate(inputDate.trim())
.toInstant()
.atZone(ZoneId.systemDefault())
.toLocalDate();
return DateTimeFormatter.ofPattern(DATE_FORMAT).format(date);
} catch (final DateTimeParseException e) {
return null;
}
}
}

View File

@ -1,4 +1,4 @@
package eu.dnetlib.common.mapping.xslt;
package eu.dnetlib.common.mapping.xslt.functions;
import java.text.Normalizer;
import java.util.ArrayList;
@ -9,6 +9,7 @@ import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import org.springframework.stereotype.Component;
import eu.dnetlib.common.mapping.xslt.XsltTransformerFactory;
import net.sf.saxon.s9api.ExtensionFunction;
import net.sf.saxon.s9api.ItemType;
import net.sf.saxon.s9api.OccurrenceIndicator;
@ -19,7 +20,32 @@ import net.sf.saxon.s9api.XdmAtomicValue;
import net.sf.saxon.s9api.XdmValue;
@Component
public class XsltPersonCleaner implements DnetXsltFunction {
public class XsltPersonCleaner implements ExtensionFunction {
@Override
public QName getName() {
return new QName(XsltTransformerFactory.QNAME_BASE_URI + "/person", "normalize");
}
@Override
public SequenceType getResultType() {
return SequenceType.makeSequenceType(ItemType.STRING, OccurrenceIndicator.ZERO_OR_ONE);
}
@Override
public SequenceType[] getArgumentTypes() {
return new SequenceType[] {
SequenceType.makeSequenceType(ItemType.STRING, OccurrenceIndicator.ZERO_OR_ONE)
};
}
@Override
public XdmValue call(final XdmValue[] xdmValues) throws SaxonApiException {
final XdmValue r = xdmValues[0];
if (r.size() == 0) { return new XdmAtomicValue(""); }
final String currentValue = xdmValues[0].itemAt(0).getStringValue();
return new XdmAtomicValue(normalize(currentValue));
}
public static String normalize(String s) {
final List<String> firstname = new ArrayList<>();
@ -109,35 +135,4 @@ public class XsltPersonCleaner implements DnetXsltFunction {
.collect(Collectors.toList());
}
@Override
public ExtensionFunction asExtensionFunction() {
return new ExtensionFunction() {
@Override
public QName getName() {
return new QName(XsltTransformFactory.QNAME_BASE_URI + "/person", "normalize");
}
@Override
public SequenceType getResultType() {
return SequenceType.makeSequenceType(ItemType.STRING, OccurrenceIndicator.ZERO_OR_ONE);
}
@Override
public SequenceType[] getArgumentTypes() {
return new SequenceType[] {
SequenceType.makeSequenceType(ItemType.STRING, OccurrenceIndicator.ZERO_OR_ONE)
};
}
@Override
public XdmValue call(final XdmValue[] xdmValues) throws SaxonApiException {
final XdmValue r = xdmValues[0];
if (r.size() == 0) { return new XdmAtomicValue(""); }
final String currentValue = xdmValues[0].itemAt(0).getStringValue();
return new XdmAtomicValue(normalize(currentValue));
}
};
}
}

View File

@ -0,0 +1,69 @@
package eu.dnetlib.common.mapping.xslt.functions;
import java.util.HashMap;
import java.util.Map;
import org.apache.commons.lang3.StringUtils;
import eu.dnetlib.common.clients.VocabularyClient;
import eu.dnetlib.common.mapping.xslt.XsltTransformerFactory;
import eu.dnetlib.domain.vocabulary.Synonym;
import net.sf.saxon.s9api.ExtensionFunction;
import net.sf.saxon.s9api.ItemType;
import net.sf.saxon.s9api.OccurrenceIndicator;
import net.sf.saxon.s9api.QName;
import net.sf.saxon.s9api.SaxonApiException;
import net.sf.saxon.s9api.SequenceType;
import net.sf.saxon.s9api.XdmAtomicValue;
import net.sf.saxon.s9api.XdmValue;
public class XsltVocabularyCleaner implements ExtensionFunction {
private static final String SEPARATOR = "@#@";
private final Map<String, String> termsMap = new HashMap<>();
public XsltVocabularyCleaner(final VocabularyClient vocabularyClient) {
vocabularyClient.listVocs().forEach(voc -> {
vocabularyClient.listTerms(voc.getId()).forEach(term -> {
for (final Synonym s : term.getSynonyms()) {
final String k = (voc.getId() + SEPARATOR + s.getTerm()).toLowerCase();
final String v = term.getCode();
this.termsMap.put(k, v);
}
});
});
}
@Override
public QName getName() {
return new QName(XsltTransformerFactory.QNAME_BASE_URI + "/clean", "clean");
}
@Override
public SequenceType getResultType() {
return SequenceType.makeSequenceType(ItemType.STRING, OccurrenceIndicator.ONE_OR_MORE);
}
@Override
public SequenceType[] getArgumentTypes() {
return new SequenceType[] {
SequenceType.makeSequenceType(ItemType.STRING, OccurrenceIndicator.ZERO_OR_MORE),
SequenceType.makeSequenceType(ItemType.STRING, OccurrenceIndicator.ONE)
};
}
@Override
public XdmValue call(final XdmValue[] xdmValues) throws SaxonApiException {
final XdmValue r = xdmValues[0];
if (r.size() == 0) { return new XdmAtomicValue(""); }
final String currentValue = xdmValues[0].itemAt(0).getStringValue();
final String vocId = xdmValues[1].itemAt(0).getStringValue();
final String key = (vocId + SEPARATOR + currentValue).toLowerCase();
final String cleanedValue = this.termsMap.getOrDefault(key, currentValue);
return new XdmAtomicValue(StringUtils.isNotBlank(cleanedValue) ? cleanedValue : currentValue);
}
}