dnet-docker/dnet-app/apps/dnet-wf-aggregation-postgres/src/main/java/eu/dnetlib/wfs/nodes/MdCleanerJobNode.java

105 lines
3.1 KiB
Java

package eu.dnetlib.wfs.nodes;
import java.time.LocalDateTime;
import java.util.function.Predicate;
import java.util.stream.Stream;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.DocumentHelper;
import org.springframework.beans.BeanUtils;
import org.springframework.beans.factory.annotation.Autowired;
import eu.dnetlib.common.clients.DnetServiceClientFactory;
import eu.dnetlib.common.clients.DsmClient;
import eu.dnetlib.common.clients.MDStoreManagerClient;
import eu.dnetlib.common.mapping.cleaner.Cleaner;
import eu.dnetlib.common.mapping.cleaner.CleanerFactory;
import eu.dnetlib.common.mdstores.backends.sql.MDStoreSqlBackend;
import eu.dnetlib.domain.dsm.Api;
import eu.dnetlib.domain.dsm.Datasource;
import eu.dnetlib.domain.mdstore.MDStoreVersion;
import eu.dnetlib.domain.mdstore.records.MetadataRecord;
import eu.dnetlib.wfs.annotations.WfInputParam;
import eu.dnetlib.wfs.annotations.WfNode;
import eu.dnetlib.wfs.utils.XpathFilterFactory;
import jakarta.transaction.Transactional;
@WfNode("md_clean")
public class MdCleanerJobNode extends ProcessNode {
@WfInputParam
private Datasource ds;
@WfInputParam
private Api api;
@WfInputParam
private String inputMdId;
@WfInputParam
private String outputMdId;
@WfInputParam
private String filterXpath;
@WfInputParam
private String ruleId;
@Autowired
private CleanerFactory cleanerFactory;
@Autowired
private DnetServiceClientFactory clientFactory;
@Autowired
private MDStoreSqlBackend mdStoreSqlBackend;
@Override
@Transactional
protected void execute() throws Exception {
final Predicate<Document> filter = XpathFilterFactory.createFilter(this.filterXpath);
final Cleaner cleaner = this.cleanerFactory.newCleaner(this.ruleId);
final MDStoreManagerClient mdstoreManager = this.clientFactory.getClient(MDStoreManagerClient.class);
final MDStoreVersion inputVersion = mdstoreManager.startReading(this.inputMdId);
final MDStoreVersion outputVersion = mdstoreManager.newVersion(this.outputMdId);
try {
final Stream<MetadataRecord> stream = this.mdStoreSqlBackend.streamEntries(inputVersion)
.filter(record -> {
try {
final Document doc = DocumentHelper.parseText(record.getBody());
return filter.test(doc);
} catch (final DocumentException e) {
throw new RuntimeException("Invalid record: " + record.getBody());
}
})
.map(input -> {
final MetadataRecord output = new MetadataRecord();
BeanUtils.copyProperties(input, output);
output.setBody(cleaner.transform(input.getBody()));
output.setDateOfTransformation(LocalDateTime.now());
return output;
});
this.mdStoreSqlBackend.saveRecords(outputVersion, stream);
final long size = this.mdStoreSqlBackend.countRecords(outputVersion.getId());
mdstoreManager.commitVersion(outputVersion.getId(), size);
this.clientFactory.getClient(DsmClient.class).updateApiAggregationInfo(this.api.getId(), this.outputMdId, size);
} catch (final Throwable e) {
mdstoreManager.abortVersion(outputVersion);
throw e;
} finally {
mdstoreManager.endReading(inputVersion);
}
}
}