refactoring
This commit is contained in:
parent
98d8499eec
commit
7fbffa4afa
|
@ -3,12 +3,15 @@ package eu.dnetlib.services.oai;
|
|||
import org.springframework.boot.SpringApplication;
|
||||
import org.springframework.boot.autoconfigure.SpringBootApplication;
|
||||
import org.springframework.boot.autoconfigure.domain.EntityScan;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
|
||||
import eu.dnetlib.common.app.AbstractDnetApp;
|
||||
import eu.dnetlib.domain.oai.OaiConfiguration;
|
||||
import eu.dnetlib.domain.oai.OaiMetadataFormat;
|
||||
import eu.dnetlib.common.clients.DnetServiceClientFactory;
|
||||
import eu.dnetlib.common.mapping.xslt.XsltTransformerFactory;
|
||||
import eu.dnetlib.domain.oai.ExportedOaiRecord;
|
||||
import eu.dnetlib.domain.oai.ExportedOaiSet;
|
||||
import eu.dnetlib.domain.oai.OaiConfiguration;
|
||||
import eu.dnetlib.domain.oai.OaiMetadataFormat;
|
||||
import eu.dnetlib.domain.service.ServiceType;
|
||||
|
||||
@SpringBootApplication
|
||||
|
@ -23,4 +26,9 @@ public class OaiApplication extends AbstractDnetApp {
|
|||
protected ServiceType serviceType() {
|
||||
return ServiceType.oai_manager;
|
||||
}
|
||||
|
||||
@Bean
|
||||
public XsltTransformerFactory xsltTransformFactory(final DnetServiceClientFactory clientFactory) {
|
||||
return new XsltTransformerFactory(clientFactory);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -13,7 +13,8 @@ import org.springframework.data.domain.PageRequest;
|
|||
import org.springframework.data.domain.Sort;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import eu.dnetlib.common.mapping.xslt.XsltUtils;
|
||||
import eu.dnetlib.common.mapping.RecordTransformer;
|
||||
import eu.dnetlib.common.mapping.xslt.XsltTransformerFactory;
|
||||
import eu.dnetlib.domain.oai.ExportedOaiMetadataFormat;
|
||||
import eu.dnetlib.domain.oai.ExportedOaiRecord;
|
||||
import eu.dnetlib.domain.oai.ExportedOaiSet;
|
||||
|
@ -42,6 +43,9 @@ public class OaiService {
|
|||
@Autowired
|
||||
private OaiRecordRepository oaiRecordRepository;
|
||||
|
||||
@Autowired
|
||||
private XsltTransformerFactory xsltTransformerFactory;
|
||||
|
||||
private static final int CONFIGURATION_ID = 0;
|
||||
|
||||
private static final int OAI_PAGE_SIZE = 100;
|
||||
|
@ -111,7 +115,8 @@ public class OaiService {
|
|||
.orElseThrow(() -> new DnetRuntimeException("Invalid metadata format: " + metadataPrefix));
|
||||
|
||||
try {
|
||||
record.setBody(XsltUtils.applyXslt(record.getBody(), xslt));
|
||||
final RecordTransformer<String, String> transformer = this.xsltTransformerFactory.getTransformerByXSLT(xslt, null);
|
||||
record.setBody(transformer.transform(record.getBody()));
|
||||
} catch (final Exception e) {
|
||||
throw new DnetRuntimeException("Error processing record", e);
|
||||
}
|
||||
|
|
|
@ -1,9 +1,5 @@
|
|||
package eu.dnetlib.wfs;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.boot.SpringApplication;
|
||||
import org.springframework.boot.autoconfigure.SpringBootApplication;
|
||||
|
@ -15,8 +11,7 @@ import eu.dnetlib.common.app.AbstractDnetApp;
|
|||
import eu.dnetlib.common.clients.DnetServiceClientFactory;
|
||||
import eu.dnetlib.common.index.solr.SolrService;
|
||||
import eu.dnetlib.common.mapping.cleaner.CleanerFactory;
|
||||
import eu.dnetlib.common.mapping.xslt.DnetXsltFunction;
|
||||
import eu.dnetlib.common.mapping.xslt.XsltTransformFactory;
|
||||
import eu.dnetlib.common.mapping.xslt.XsltTransformerFactory;
|
||||
import eu.dnetlib.common.mdstores.backends.sql.MDStoreSqlBackend;
|
||||
import eu.dnetlib.domain.service.ServiceType;
|
||||
import eu.dnetlib.domain.wfs.conf.WfConfiguration;
|
||||
|
@ -34,9 +29,6 @@ import eu.dnetlib.domain.wfs.subscriptions.WfSubscription;
|
|||
@EnableScheduling
|
||||
public class WfExecutorApplication extends AbstractDnetApp {
|
||||
|
||||
@Autowired(required = false)
|
||||
private final List<DnetXsltFunction> xsltFunctions = new ArrayList<>();
|
||||
|
||||
@Value("${mdstores.data.datasource.url}")
|
||||
private String databaseUrl;
|
||||
|
||||
|
@ -67,8 +59,8 @@ public class WfExecutorApplication extends AbstractDnetApp {
|
|||
}
|
||||
|
||||
@Bean
|
||||
public XsltTransformFactory xsltTransformFactory(final DnetServiceClientFactory clientFactory) {
|
||||
return new XsltTransformFactory(clientFactory, this.xsltFunctions);
|
||||
public XsltTransformerFactory xsltTransformFactory(final DnetServiceClientFactory clientFactory) {
|
||||
return new XsltTransformerFactory(clientFactory);
|
||||
}
|
||||
|
||||
@Bean
|
||||
|
|
|
@ -17,7 +17,7 @@ import eu.dnetlib.common.clients.DnetServiceClientFactory;
|
|||
import eu.dnetlib.common.clients.DsmClient;
|
||||
import eu.dnetlib.common.clients.MDStoreManagerClient;
|
||||
import eu.dnetlib.common.mapping.RecordTransformer;
|
||||
import eu.dnetlib.common.mapping.xslt.XsltTransformFactory;
|
||||
import eu.dnetlib.common.mapping.xslt.XsltTransformerFactory;
|
||||
import eu.dnetlib.common.mdstores.backends.sql.MDStoreSqlBackend;
|
||||
import eu.dnetlib.domain.dsm.Api;
|
||||
import eu.dnetlib.domain.dsm.Datasource;
|
||||
|
@ -49,7 +49,7 @@ public class MdTransformJobNode extends ProcessNode {
|
|||
private String ruleId;
|
||||
|
||||
@Autowired
|
||||
private XsltTransformFactory xsltTransformFactory;
|
||||
private XsltTransformerFactory xsltTransformFactory;
|
||||
|
||||
@Autowired
|
||||
private DnetServiceClientFactory clientFactory;
|
||||
|
@ -64,7 +64,7 @@ public class MdTransformJobNode extends ProcessNode {
|
|||
|
||||
final Map<String, Object> params = new HashMap<>();
|
||||
// TODO (LOW PRIORITY): which params ?
|
||||
final RecordTransformer<String, String> xslt = this.xsltTransformFactory.getTransformer(this.ruleId, params);
|
||||
final RecordTransformer<String, String> xslt = this.xsltTransformFactory.getTransformerById(this.ruleId, params);
|
||||
final MDStoreManagerClient mdstoreManager = this.clientFactory.getClient(MDStoreManagerClient.class);
|
||||
|
||||
final MDStoreVersion inputVersion = mdstoreManager.startReading(this.inputMdId);
|
||||
|
|
|
@ -1,9 +1,5 @@
|
|||
package eu.dnetlib.wfs;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.SpringApplication;
|
||||
import org.springframework.boot.autoconfigure.SpringBootApplication;
|
||||
import org.springframework.boot.autoconfigure.domain.EntityScan;
|
||||
|
@ -12,8 +8,7 @@ import org.springframework.context.annotation.Bean;
|
|||
import eu.dnetlib.common.app.AbstractDnetApp;
|
||||
import eu.dnetlib.common.clients.DnetServiceClientFactory;
|
||||
import eu.dnetlib.common.mapping.cleaner.CleanerFactory;
|
||||
import eu.dnetlib.common.mapping.xslt.DnetXsltFunction;
|
||||
import eu.dnetlib.common.mapping.xslt.XsltTransformFactory;
|
||||
import eu.dnetlib.common.mapping.xslt.XsltTransformerFactory;
|
||||
import eu.dnetlib.domain.service.ServiceType;
|
||||
import eu.dnetlib.domain.wfs.conf.WfConfiguration;
|
||||
import eu.dnetlib.domain.wfs.conf.WfSection;
|
||||
|
@ -29,9 +24,6 @@ import eu.dnetlib.domain.wfs.subscriptions.WfSubscription;
|
|||
})
|
||||
public class WfManagerApplication extends AbstractDnetApp {
|
||||
|
||||
@Autowired(required = false)
|
||||
private final List<DnetXsltFunction> xsltFunctions = new ArrayList<>();
|
||||
|
||||
public static void main(final String[] args) {
|
||||
SpringApplication.run(WfManagerApplication.class, args);
|
||||
}
|
||||
|
@ -42,8 +34,8 @@ public class WfManagerApplication extends AbstractDnetApp {
|
|||
}
|
||||
|
||||
@Bean
|
||||
public XsltTransformFactory xsltTransformFactory(final DnetServiceClientFactory clientFactory) {
|
||||
return new XsltTransformFactory(clientFactory, this.xsltFunctions);
|
||||
public XsltTransformerFactory xsltTransformFactory(final DnetServiceClientFactory clientFactory) {
|
||||
return new XsltTransformerFactory(clientFactory);
|
||||
}
|
||||
|
||||
@Bean
|
||||
|
|
|
@ -17,7 +17,7 @@ import org.springframework.web.bind.annotation.RestController;
|
|||
import eu.dnetlib.common.controller.DnetRestController;
|
||||
import eu.dnetlib.common.mapping.RecordTransformer;
|
||||
import eu.dnetlib.common.mapping.cleaner.CleanerFactory;
|
||||
import eu.dnetlib.common.mapping.xslt.XsltTransformFactory;
|
||||
import eu.dnetlib.common.mapping.xslt.XsltTransformerFactory;
|
||||
import jakarta.servlet.http.HttpServletRequest;
|
||||
import jakarta.servlet.http.HttpServletResponse;
|
||||
|
||||
|
@ -29,7 +29,7 @@ public class MappingAjaxController extends DnetRestController {
|
|||
private CleanerFactory cleanerFactory;
|
||||
|
||||
@Autowired
|
||||
private XsltTransformFactory xsltFactory;
|
||||
private XsltTransformerFactory xsltFactory;
|
||||
|
||||
@PostMapping(value = "/clean", consumes = {
|
||||
MediaType.TEXT_PLAIN_VALUE, MediaType.APPLICATION_XML_VALUE,
|
||||
|
@ -41,7 +41,7 @@ public class MappingAjaxController extends DnetRestController {
|
|||
@PostMapping(value = "/xsltTransform", consumes = { MediaType.TEXT_PLAIN_VALUE,
|
||||
MediaType.APPLICATION_XML_VALUE, }, produces = MediaType.APPLICATION_XML_VALUE)
|
||||
public void xsltTransform(@RequestParam final String rule, final HttpServletRequest req, final HttpServletResponse res) throws Exception {
|
||||
transform(req, res, this.xsltFactory.getTransformer(rule, new HashMap<>()));
|
||||
transform(req, res, this.xsltFactory.getTransformerById(rule, new HashMap<>()));
|
||||
}
|
||||
|
||||
private void transform(final HttpServletRequest req, final HttpServletResponse res, final RecordTransformer<String, String> transformer)
|
||||
|
|
|
@ -13,7 +13,7 @@ public class VocabularyClient extends DnetServiceClient {
|
|||
|
||||
@Cacheable("vocabulary_list")
|
||||
public List<Vocabulary> listVocs() {
|
||||
return Arrays.asList(httpGet("/api/vocs", Vocabulary[].class, Map.of()));
|
||||
return Arrays.asList(httpGet("/api/vocs/", Vocabulary[].class, Map.of()));
|
||||
}
|
||||
|
||||
@Cacheable("vocabulary_terms")
|
||||
|
|
|
@ -1,8 +0,0 @@
|
|||
package eu.dnetlib.common.mapping.xslt;
|
||||
|
||||
import net.sf.saxon.s9api.ExtensionFunction;
|
||||
|
||||
public interface DnetXsltFunction {
|
||||
|
||||
ExtensionFunction asExtensionFunction();
|
||||
}
|
|
@ -1,73 +0,0 @@
|
|||
package eu.dnetlib.common.mapping.xslt;
|
||||
|
||||
import java.time.LocalDate;
|
||||
import java.time.ZoneId;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.time.format.DateTimeParseException;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import com.github.sisyphsu.dateparser.DateParserUtils;
|
||||
|
||||
import net.sf.saxon.s9api.ExtensionFunction;
|
||||
import net.sf.saxon.s9api.ItemType;
|
||||
import net.sf.saxon.s9api.OccurrenceIndicator;
|
||||
import net.sf.saxon.s9api.QName;
|
||||
import net.sf.saxon.s9api.SaxonApiException;
|
||||
import net.sf.saxon.s9api.SequenceType;
|
||||
import net.sf.saxon.s9api.XdmAtomicValue;
|
||||
import net.sf.saxon.s9api.XdmValue;
|
||||
|
||||
@Component
|
||||
public class XsltDateCleaner implements DnetXsltFunction {
|
||||
|
||||
public static final String DATE_FORMAT = "yyyy-MM-dd";
|
||||
|
||||
@Override
|
||||
public ExtensionFunction asExtensionFunction() {
|
||||
return new ExtensionFunction() {
|
||||
|
||||
@Override
|
||||
public QName getName() {
|
||||
return new QName(XsltTransformFactory.QNAME_BASE_URI + "/dateISO", "dateISO");
|
||||
}
|
||||
|
||||
@Override
|
||||
public SequenceType getResultType() {
|
||||
return SequenceType.makeSequenceType(ItemType.STRING, OccurrenceIndicator.ZERO_OR_ONE);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SequenceType[] getArgumentTypes() {
|
||||
return new SequenceType[] {
|
||||
SequenceType.makeSequenceType(ItemType.STRING, OccurrenceIndicator.ZERO_OR_ONE)
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public XdmValue call(final XdmValue[] xdmValues) throws SaxonApiException {
|
||||
final XdmValue r = xdmValues[0];
|
||||
if (r.size() == 0) { return new XdmAtomicValue(""); }
|
||||
final String currentValue = xdmValues[0].itemAt(0).getStringValue();
|
||||
return new XdmAtomicValue(clean(currentValue));
|
||||
}
|
||||
|
||||
public String clean(final String inputDate) {
|
||||
if (StringUtils.isBlank(inputDate)) { return null; }
|
||||
|
||||
try {
|
||||
final LocalDate date = DateParserUtils
|
||||
.parseDate(inputDate.trim())
|
||||
.toInstant()
|
||||
.atZone(ZoneId.systemDefault())
|
||||
.toLocalDate();
|
||||
return DateTimeFormatter.ofPattern(DATE_FORMAT).format(date);
|
||||
} catch (final DateTimeParseException e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
}
|
||||
}
|
|
@ -1,33 +0,0 @@
|
|||
package eu.dnetlib.common.mapping.xslt;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import eu.dnetlib.common.clients.DnetServiceClientFactory;
|
||||
import eu.dnetlib.common.clients.SimpleResourceClient;
|
||||
import eu.dnetlib.common.mapping.RecordTransformer;
|
||||
import eu.dnetlib.errors.TransformationException;
|
||||
|
||||
public class XsltTransformFactory {
|
||||
|
||||
public static final String QNAME_BASE_URI = "http://eu/dnetlib/transform";
|
||||
|
||||
private final DnetServiceClientFactory clientFactory;
|
||||
|
||||
private final List<DnetXsltFunction> xsltFunctions;
|
||||
|
||||
public XsltTransformFactory(final DnetServiceClientFactory clientFactory, final List<DnetXsltFunction> xsltFunctions) {
|
||||
this.clientFactory = clientFactory;
|
||||
this.xsltFunctions = xsltFunctions;
|
||||
}
|
||||
|
||||
public RecordTransformer<String, String> getTransformer(final String ruleId, final Map<String, Object> initialParams) throws TransformationException {
|
||||
final String xsltText =
|
||||
this.clientFactory.getClient(SimpleResourceClient.class)
|
||||
.findResourceContent(SimpleResourceClient.ResourceType.transformation_rule_xslt, ruleId, String.class);
|
||||
|
||||
return XsltUtils.newTransformer(xsltText, initialParams, this.xsltFunctions);
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -3,19 +3,21 @@ package eu.dnetlib.common.mapping.xslt;
|
|||
import java.io.StringWriter;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import javax.xml.transform.stream.StreamSource;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
||||
import eu.dnetlib.common.clients.DnetServiceClientFactory;
|
||||
import eu.dnetlib.common.clients.SimpleResourceClient;
|
||||
import eu.dnetlib.common.clients.VocabularyClient;
|
||||
import eu.dnetlib.common.mapping.RecordTransformer;
|
||||
import eu.dnetlib.common.mapping.xslt.functions.XsltDateCleaner;
|
||||
import eu.dnetlib.common.mapping.xslt.functions.XsltPersonCleaner;
|
||||
import eu.dnetlib.common.mapping.xslt.functions.XsltVocabularyCleaner;
|
||||
import eu.dnetlib.errors.TransformationException;
|
||||
import net.sf.saxon.s9api.ExtensionFunction;
|
||||
import net.sf.saxon.s9api.Processor;
|
||||
import net.sf.saxon.s9api.QName;
|
||||
import net.sf.saxon.s9api.SaxonApiException;
|
||||
|
@ -27,31 +29,29 @@ import net.sf.saxon.s9api.XsltCompiler;
|
|||
import net.sf.saxon.s9api.XsltExecutable;
|
||||
import net.sf.saxon.s9api.XsltTransformer;
|
||||
|
||||
public class XsltUtils {
|
||||
public class XsltTransformerFactory {
|
||||
|
||||
private static final Log log = LogFactory.getLog(XsltUtils.class);
|
||||
private final DnetServiceClientFactory clientFactory;
|
||||
|
||||
public static String applyXslt(final String xml, final String xslt) throws Exception {
|
||||
return applyXslt(xml, xslt, new HashMap<>(), new ArrayList<>());
|
||||
public static final String QNAME_BASE_URI = "http://eu/dnetlib/transform";
|
||||
|
||||
public XsltTransformerFactory(final DnetServiceClientFactory clientFactory) {
|
||||
this.clientFactory = clientFactory;
|
||||
}
|
||||
|
||||
public static String applyXslt(final String xml, final String xslt, final Map<String, Object> params, final List<DnetXsltFunction> xsltFunctions)
|
||||
throws Exception {
|
||||
final RecordTransformer<String, String> transformer = newTransformer(xslt, params, xsltFunctions);
|
||||
return transformer.transform(xml);
|
||||
public RecordTransformer<String, String> getTransformerById(final String ruleId, final Map<String, Object> params) throws TransformationException {
|
||||
final String xsltText =
|
||||
this.clientFactory.getClient(SimpleResourceClient.class)
|
||||
.findResourceContent(SimpleResourceClient.ResourceType.transformation_rule_xslt, ruleId, String.class);
|
||||
|
||||
return getTransformerByXSLT(xsltText, params);
|
||||
}
|
||||
|
||||
public static RecordTransformer<String, String> newTransformer(final String xsltText,
|
||||
final Map<String, Object> params,
|
||||
final List<DnetXsltFunction> xsltFunctions) throws TransformationException {
|
||||
|
||||
public RecordTransformer<String, String> getTransformerByXSLT(final String xsltText, final Map<String, Object> params) throws TransformationException {
|
||||
final Processor processor = new Processor(false);
|
||||
|
||||
for (final DnetXsltFunction f : xsltFunctions) {
|
||||
final ExtensionFunction extFunction = f.asExtensionFunction();
|
||||
processor.registerExtensionFunction(extFunction);
|
||||
log.info("New XSLT function registered: " + extFunction.getName());
|
||||
}
|
||||
processor.registerExtensionFunction(new XsltDateCleaner());
|
||||
processor.registerExtensionFunction(new XsltPersonCleaner());
|
||||
processor.registerExtensionFunction(new XsltVocabularyCleaner(this.clientFactory.getClient(VocabularyClient.class)));
|
||||
|
||||
final List<XmlProcessingError> errorList = new ArrayList<>();
|
||||
|
||||
|
@ -60,9 +60,7 @@ public class XsltUtils {
|
|||
params.forEach((k, v) -> comp.setParameter(new QName(k), XdmAtomicValue.makeAtomicValue(v)));
|
||||
|
||||
try {
|
||||
final XsltExecutable xslt = comp
|
||||
.compile(new StreamSource(IOUtils.toInputStream(xsltText, StandardCharsets.UTF_8)));
|
||||
|
||||
final XsltExecutable xslt = comp.compile(new StreamSource(IOUtils.toInputStream(xsltText, StandardCharsets.UTF_8)));
|
||||
return xml -> {
|
||||
try {
|
||||
final XdmNode source = processor
|
|
@ -1,77 +0,0 @@
|
|||
package eu.dnetlib.common.mapping.xslt;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
|
||||
import eu.dnetlib.common.clients.VocabularyClient;
|
||||
import eu.dnetlib.domain.vocabulary.Synonym;
|
||||
import net.sf.saxon.s9api.ExtensionFunction;
|
||||
import net.sf.saxon.s9api.ItemType;
|
||||
import net.sf.saxon.s9api.OccurrenceIndicator;
|
||||
import net.sf.saxon.s9api.QName;
|
||||
import net.sf.saxon.s9api.SaxonApiException;
|
||||
import net.sf.saxon.s9api.SequenceType;
|
||||
import net.sf.saxon.s9api.XdmAtomicValue;
|
||||
import net.sf.saxon.s9api.XdmValue;
|
||||
|
||||
public class XsltVocabularyCleaner implements DnetXsltFunction {
|
||||
|
||||
@Autowired
|
||||
private VocabularyClient vocabularyClient;
|
||||
|
||||
private static final String SEPARATOR = "@#@";
|
||||
|
||||
@Override
|
||||
public ExtensionFunction asExtensionFunction() {
|
||||
|
||||
final Map<String, String> termsMap = new HashMap<>();
|
||||
|
||||
vocabularyClient.listVocs().forEach(voc -> {
|
||||
vocabularyClient.listTerms(voc.getId()).forEach(term -> {
|
||||
for (final Synonym s : term.getSynonyms()) {
|
||||
final String k = (voc.getId() + SEPARATOR + s.getTerm()).toLowerCase();
|
||||
final String v = term.getCode();
|
||||
termsMap.put(k, v);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
return new ExtensionFunction() {
|
||||
|
||||
@Override
|
||||
public QName getName() {
|
||||
return new QName(XsltTransformFactory.QNAME_BASE_URI + "/clean", "clean");
|
||||
}
|
||||
|
||||
@Override
|
||||
public SequenceType getResultType() {
|
||||
return SequenceType.makeSequenceType(ItemType.STRING, OccurrenceIndicator.ONE_OR_MORE);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SequenceType[] getArgumentTypes() {
|
||||
return new SequenceType[] {
|
||||
SequenceType.makeSequenceType(ItemType.STRING, OccurrenceIndicator.ZERO_OR_MORE),
|
||||
SequenceType.makeSequenceType(ItemType.STRING, OccurrenceIndicator.ONE)
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public XdmValue call(final XdmValue[] xdmValues) throws SaxonApiException {
|
||||
final XdmValue r = xdmValues[0];
|
||||
if (r.size() == 0) { return new XdmAtomicValue(""); }
|
||||
final String currentValue = xdmValues[0].itemAt(0).getStringValue();
|
||||
final String vocId = xdmValues[1].itemAt(0).getStringValue();
|
||||
|
||||
final String key = (vocId + SEPARATOR + currentValue).toLowerCase();
|
||||
final String cleanedValue = termsMap.getOrDefault(key, currentValue);
|
||||
|
||||
return new XdmAtomicValue(StringUtils.isNotBlank(cleanedValue) ? cleanedValue : currentValue);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,68 @@
|
|||
package eu.dnetlib.common.mapping.xslt.functions;
|
||||
|
||||
import java.time.LocalDate;
|
||||
import java.time.ZoneId;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.time.format.DateTimeParseException;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import com.github.sisyphsu.dateparser.DateParserUtils;
|
||||
|
||||
import eu.dnetlib.common.mapping.xslt.XsltTransformerFactory;
|
||||
import net.sf.saxon.s9api.ExtensionFunction;
|
||||
import net.sf.saxon.s9api.ItemType;
|
||||
import net.sf.saxon.s9api.OccurrenceIndicator;
|
||||
import net.sf.saxon.s9api.QName;
|
||||
import net.sf.saxon.s9api.SaxonApiException;
|
||||
import net.sf.saxon.s9api.SequenceType;
|
||||
import net.sf.saxon.s9api.XdmAtomicValue;
|
||||
import net.sf.saxon.s9api.XdmValue;
|
||||
|
||||
@Component
|
||||
public class XsltDateCleaner implements ExtensionFunction {
|
||||
|
||||
public static final String DATE_FORMAT = "yyyy-MM-dd";
|
||||
|
||||
@Override
|
||||
public QName getName() {
|
||||
return new QName(XsltTransformerFactory.QNAME_BASE_URI + "/dateISO", "dateISO");
|
||||
}
|
||||
|
||||
@Override
|
||||
public SequenceType getResultType() {
|
||||
return SequenceType.makeSequenceType(ItemType.STRING, OccurrenceIndicator.ZERO_OR_ONE);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SequenceType[] getArgumentTypes() {
|
||||
return new SequenceType[] {
|
||||
SequenceType.makeSequenceType(ItemType.STRING, OccurrenceIndicator.ZERO_OR_ONE)
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public XdmValue call(final XdmValue[] xdmValues) throws SaxonApiException {
|
||||
final XdmValue r = xdmValues[0];
|
||||
if (r.size() == 0) { return new XdmAtomicValue(""); }
|
||||
final String currentValue = xdmValues[0].itemAt(0).getStringValue();
|
||||
return new XdmAtomicValue(clean(currentValue));
|
||||
}
|
||||
|
||||
public String clean(final String inputDate) {
|
||||
if (StringUtils.isBlank(inputDate)) { return null; }
|
||||
|
||||
try {
|
||||
final LocalDate date = DateParserUtils
|
||||
.parseDate(inputDate.trim())
|
||||
.toInstant()
|
||||
.atZone(ZoneId.systemDefault())
|
||||
.toLocalDate();
|
||||
return DateTimeFormatter.ofPattern(DATE_FORMAT).format(date);
|
||||
} catch (final DateTimeParseException e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -1,4 +1,4 @@
|
|||
package eu.dnetlib.common.mapping.xslt;
|
||||
package eu.dnetlib.common.mapping.xslt.functions;
|
||||
|
||||
import java.text.Normalizer;
|
||||
import java.util.ArrayList;
|
||||
|
@ -9,6 +9,7 @@ import java.util.stream.Collectors;
|
|||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import eu.dnetlib.common.mapping.xslt.XsltTransformerFactory;
|
||||
import net.sf.saxon.s9api.ExtensionFunction;
|
||||
import net.sf.saxon.s9api.ItemType;
|
||||
import net.sf.saxon.s9api.OccurrenceIndicator;
|
||||
|
@ -19,7 +20,32 @@ import net.sf.saxon.s9api.XdmAtomicValue;
|
|||
import net.sf.saxon.s9api.XdmValue;
|
||||
|
||||
@Component
|
||||
public class XsltPersonCleaner implements DnetXsltFunction {
|
||||
public class XsltPersonCleaner implements ExtensionFunction {
|
||||
|
||||
@Override
|
||||
public QName getName() {
|
||||
return new QName(XsltTransformerFactory.QNAME_BASE_URI + "/person", "normalize");
|
||||
}
|
||||
|
||||
@Override
|
||||
public SequenceType getResultType() {
|
||||
return SequenceType.makeSequenceType(ItemType.STRING, OccurrenceIndicator.ZERO_OR_ONE);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SequenceType[] getArgumentTypes() {
|
||||
return new SequenceType[] {
|
||||
SequenceType.makeSequenceType(ItemType.STRING, OccurrenceIndicator.ZERO_OR_ONE)
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public XdmValue call(final XdmValue[] xdmValues) throws SaxonApiException {
|
||||
final XdmValue r = xdmValues[0];
|
||||
if (r.size() == 0) { return new XdmAtomicValue(""); }
|
||||
final String currentValue = xdmValues[0].itemAt(0).getStringValue();
|
||||
return new XdmAtomicValue(normalize(currentValue));
|
||||
}
|
||||
|
||||
public static String normalize(String s) {
|
||||
final List<String> firstname = new ArrayList<>();
|
||||
|
@ -109,35 +135,4 @@ public class XsltPersonCleaner implements DnetXsltFunction {
|
|||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
@Override
|
||||
public ExtensionFunction asExtensionFunction() {
|
||||
return new ExtensionFunction() {
|
||||
|
||||
@Override
|
||||
public QName getName() {
|
||||
return new QName(XsltTransformFactory.QNAME_BASE_URI + "/person", "normalize");
|
||||
}
|
||||
|
||||
@Override
|
||||
public SequenceType getResultType() {
|
||||
return SequenceType.makeSequenceType(ItemType.STRING, OccurrenceIndicator.ZERO_OR_ONE);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SequenceType[] getArgumentTypes() {
|
||||
return new SequenceType[] {
|
||||
SequenceType.makeSequenceType(ItemType.STRING, OccurrenceIndicator.ZERO_OR_ONE)
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public XdmValue call(final XdmValue[] xdmValues) throws SaxonApiException {
|
||||
final XdmValue r = xdmValues[0];
|
||||
if (r.size() == 0) { return new XdmAtomicValue(""); }
|
||||
final String currentValue = xdmValues[0].itemAt(0).getStringValue();
|
||||
return new XdmAtomicValue(normalize(currentValue));
|
||||
}
|
||||
|
||||
};
|
||||
}
|
||||
}
|
|
@ -0,0 +1,69 @@
|
|||
package eu.dnetlib.common.mapping.xslt.functions;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import eu.dnetlib.common.clients.VocabularyClient;
|
||||
import eu.dnetlib.common.mapping.xslt.XsltTransformerFactory;
|
||||
import eu.dnetlib.domain.vocabulary.Synonym;
|
||||
import net.sf.saxon.s9api.ExtensionFunction;
|
||||
import net.sf.saxon.s9api.ItemType;
|
||||
import net.sf.saxon.s9api.OccurrenceIndicator;
|
||||
import net.sf.saxon.s9api.QName;
|
||||
import net.sf.saxon.s9api.SaxonApiException;
|
||||
import net.sf.saxon.s9api.SequenceType;
|
||||
import net.sf.saxon.s9api.XdmAtomicValue;
|
||||
import net.sf.saxon.s9api.XdmValue;
|
||||
|
||||
public class XsltVocabularyCleaner implements ExtensionFunction {
|
||||
|
||||
private static final String SEPARATOR = "@#@";
|
||||
|
||||
private final Map<String, String> termsMap = new HashMap<>();
|
||||
|
||||
public XsltVocabularyCleaner(final VocabularyClient vocabularyClient) {
|
||||
vocabularyClient.listVocs().forEach(voc -> {
|
||||
vocabularyClient.listTerms(voc.getId()).forEach(term -> {
|
||||
for (final Synonym s : term.getSynonyms()) {
|
||||
final String k = (voc.getId() + SEPARATOR + s.getTerm()).toLowerCase();
|
||||
final String v = term.getCode();
|
||||
this.termsMap.put(k, v);
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
@Override
|
||||
public QName getName() {
|
||||
return new QName(XsltTransformerFactory.QNAME_BASE_URI + "/clean", "clean");
|
||||
}
|
||||
|
||||
@Override
|
||||
public SequenceType getResultType() {
|
||||
return SequenceType.makeSequenceType(ItemType.STRING, OccurrenceIndicator.ONE_OR_MORE);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SequenceType[] getArgumentTypes() {
|
||||
return new SequenceType[] {
|
||||
SequenceType.makeSequenceType(ItemType.STRING, OccurrenceIndicator.ZERO_OR_MORE),
|
||||
SequenceType.makeSequenceType(ItemType.STRING, OccurrenceIndicator.ONE)
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public XdmValue call(final XdmValue[] xdmValues) throws SaxonApiException {
|
||||
final XdmValue r = xdmValues[0];
|
||||
if (r.size() == 0) { return new XdmAtomicValue(""); }
|
||||
final String currentValue = xdmValues[0].itemAt(0).getStringValue();
|
||||
final String vocId = xdmValues[1].itemAt(0).getStringValue();
|
||||
|
||||
final String key = (vocId + SEPARATOR + currentValue).toLowerCase();
|
||||
final String cleanedValue = this.termsMap.getOrDefault(key, currentValue);
|
||||
|
||||
return new XdmAtomicValue(StringUtils.isNotBlank(cleanedValue) ? cleanedValue : currentValue);
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue