105 lines
3.1 KiB
Java
105 lines
3.1 KiB
Java
package eu.dnetlib.wfs.nodes;
|
|
|
|
import java.time.LocalDateTime;
|
|
import java.util.function.Predicate;
|
|
import java.util.stream.Stream;
|
|
|
|
import org.dom4j.Document;
|
|
import org.dom4j.DocumentException;
|
|
import org.dom4j.DocumentHelper;
|
|
import org.springframework.beans.BeanUtils;
|
|
import org.springframework.beans.factory.annotation.Autowired;
|
|
|
|
import eu.dnetlib.common.clients.DnetServiceClientFactory;
|
|
import eu.dnetlib.common.clients.DsmClient;
|
|
import eu.dnetlib.common.clients.MDStoreManagerClient;
|
|
import eu.dnetlib.common.mapping.cleaner.Cleaner;
|
|
import eu.dnetlib.common.mapping.cleaner.CleanerFactory;
|
|
import eu.dnetlib.common.mdstores.backends.sql.MDStoreSqlBackend;
|
|
import eu.dnetlib.domain.dsm.Api;
|
|
import eu.dnetlib.domain.dsm.Datasource;
|
|
import eu.dnetlib.domain.mdstore.MDStoreVersion;
|
|
import eu.dnetlib.domain.mdstore.records.MetadataRecord;
|
|
import eu.dnetlib.wfs.annotations.WfInputParam;
|
|
import eu.dnetlib.wfs.annotations.WfNode;
|
|
import eu.dnetlib.wfs.utils.XpathFilterFactory;
|
|
import jakarta.transaction.Transactional;
|
|
|
|
@WfNode("md_clean")
|
|
public class MdCleanerJobNode extends ProcessNode {
|
|
|
|
@WfInputParam
|
|
private Datasource ds;
|
|
|
|
@WfInputParam
|
|
private Api api;
|
|
|
|
@WfInputParam
|
|
private String inputMdId;
|
|
|
|
@WfInputParam
|
|
private String outputMdId;
|
|
|
|
@WfInputParam
|
|
private String filterXpath;
|
|
|
|
@WfInputParam
|
|
private String ruleId;
|
|
|
|
@Autowired
|
|
private CleanerFactory cleanerFactory;
|
|
|
|
@Autowired
|
|
private DnetServiceClientFactory clientFactory;
|
|
|
|
@Autowired
|
|
private MDStoreSqlBackend mdStoreSqlBackend;
|
|
|
|
@Override
|
|
@Transactional
|
|
protected void execute() throws Exception {
|
|
|
|
final Predicate<Document> filter = XpathFilterFactory.createFilter(this.filterXpath);
|
|
|
|
final Cleaner cleaner = this.cleanerFactory.newCleaner(this.ruleId);
|
|
|
|
final MDStoreManagerClient mdstoreManager = this.clientFactory.getClient(MDStoreManagerClient.class);
|
|
|
|
final MDStoreVersion inputVersion = mdstoreManager.startReading(this.inputMdId);
|
|
final MDStoreVersion outputVersion = mdstoreManager.newVersion(this.outputMdId);
|
|
|
|
try {
|
|
final Stream<MetadataRecord> stream = this.mdStoreSqlBackend.streamEntries(inputVersion)
|
|
.filter(record -> {
|
|
try {
|
|
final Document doc = DocumentHelper.parseText(record.getBody());
|
|
return filter.test(doc);
|
|
} catch (final DocumentException e) {
|
|
throw new RuntimeException("Invalid record: " + record.getBody());
|
|
}
|
|
})
|
|
.map(input -> {
|
|
final MetadataRecord output = new MetadataRecord();
|
|
BeanUtils.copyProperties(input, output);
|
|
output.setBody(cleaner.transform(input.getBody()));
|
|
output.setDateOfTransformation(LocalDateTime.now());
|
|
return output;
|
|
});
|
|
|
|
this.mdStoreSqlBackend.saveRecords(outputVersion, stream);
|
|
|
|
final long size = this.mdStoreSqlBackend.countRecords(outputVersion.getId());
|
|
|
|
mdstoreManager.commitVersion(outputVersion.getId(), size);
|
|
|
|
this.clientFactory.getClient(DsmClient.class).updateApiAggregationInfo(this.api.getId(), this.outputMdId, size);
|
|
} catch (final Throwable e) {
|
|
mdstoreManager.abortVersion(outputVersion);
|
|
throw e;
|
|
} finally {
|
|
mdstoreManager.endReading(inputVersion);
|
|
}
|
|
}
|
|
|
|
}
|