1
0
Fork 0

[ActionManagement] reduced number of xqueries used to access ActionSet info

This commit is contained in:
Claudio Atzori 2021-05-07 17:31:32 +02:00
parent d82071ba6c
commit 8a0de2fc18
1 changed files with 39 additions and 63 deletions

View File

@ -3,20 +3,23 @@ package eu.dnetlib.dhp.actionmanager;
import java.io.Serializable; import java.io.Serializable;
import java.io.StringReader; import java.io.StringReader;
import java.util.ArrayList; import java.util.*;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import org.apache.commons.lang3.tuple.Triple;
import org.dom4j.Document; import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Element; import org.dom4j.Element;
import org.dom4j.io.SAXReader; import org.dom4j.io.SAXReader;
import org.jetbrains.annotations.NotNull;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.google.common.base.Joiner;
import com.google.common.base.Splitter; import com.google.common.base.Splitter;
import com.google.common.collect.Iterables; import com.google.common.collect.Iterables;
import com.google.common.collect.Lists; import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import eu.dnetlib.actionmanager.rmi.ActionManagerException; import eu.dnetlib.actionmanager.rmi.ActionManagerException;
import eu.dnetlib.actionmanager.set.ActionManagerSet; import eu.dnetlib.actionmanager.set.ActionManagerSet;
@ -25,6 +28,7 @@ import eu.dnetlib.dhp.actionmanager.partition.PartitionActionSetsByPayloadTypeJo
import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
import scala.Tuple2;
public class ISClient implements Serializable { public class ISClient implements Serializable {
@ -40,80 +44,52 @@ public class ISClient implements Serializable {
public List<String> getLatestRawsetPaths(String setIds) { public List<String> getLatestRawsetPaths(String setIds) {
List<String> ids = Lists final Set<String> ids = Sets
.newArrayList( .newHashSet(
Splitter Splitter
.on(INPUT_ACTION_SET_ID_SEPARATOR) .on(INPUT_ACTION_SET_ID_SEPARATOR)
.omitEmptyStrings() .omitEmptyStrings()
.trimResults() .trimResults()
.split(setIds)); .split(setIds));
return ids
.stream()
.map(id -> getSet(isLookup, id))
.map(as -> as.getPathToLatest())
.collect(Collectors.toCollection(ArrayList::new));
}
private ActionManagerSet getSet(ISLookUpService isLookup, final String setId) {
final String q = "for $x in collection('/db/DRIVER/ActionManagerSetDSResources/ActionManagerSetDSResourceType') "
+ "where $x//SET/@id = '"
+ setId
+ "' return $x";
try { try {
final String basePath = getBasePathHDFS(isLookup); final String basePath = getBasePathHDFS(isLookup);
final String setProfile = isLookup.getResourceProfileByQuery(q);
return getActionManagerSet(basePath, setProfile); // <SET id="..." directory="..." latest="xxx"/>
} catch (ISLookUpException | ActionManagerException e) { final String xquery = "for $x in collection('/db/DRIVER/ActionManagerSetDSResources/ActionManagerSetDSResourceType') "
throw new RuntimeException("Error accessing Sets, using query: " + q); +
"return <SET id='{$x//SET/@id/string()}' directory='{$x//SET/@directory/string()}' latest='{$x//LATEST/@id/string()}'/>";
return Optional
.ofNullable(isLookup.quickSearchProfile(xquery))
.map(
sets -> sets
.stream()
.map(set -> parseSetInfo(set))
.filter(t -> ids.contains(t.getLeft()))
.map(t -> buildDirectory(basePath, t))
.collect(Collectors.toList()))
.orElseThrow(() -> new IllegalStateException("empty set list"));
} catch (ActionManagerException | ISLookUpException e) {
throw new IllegalStateException("unable to query ActionSets info from the IS");
} }
} }
private ActionManagerSet getActionManagerSet(final String basePath, final String profile) private Triple<String, String, String> parseSetInfo(String set) {
throws ActionManagerException {
final SAXReader reader = new SAXReader();
final ActionManagerSet set = new ActionManagerSet();
try { try {
final Document doc = reader.read(new StringReader(profile)); Document doc = new SAXReader().read(new StringReader(set));
return Triple
set.setId(doc.valueOf("//SET/@id").trim()); .of(
set.setName(doc.valueOf("//SET").trim()); doc.valueOf("//SET/@id"),
set.setImpact(ImpactTypes.valueOf(doc.valueOf("//IMPACT").trim())); doc.valueOf("//SET/@directory"),
set doc.valueOf("//SET/@latest"));
.setLatest( } catch (DocumentException e) {
doc.valueOf("//RAW_SETS/LATEST/@id"), throw new IllegalStateException(e);
doc.valueOf("//RAW_SETS/LATEST/@creationDate"),
doc.valueOf("//RAW_SETS/LATEST/@lastUpdate"));
set.setDirectory(doc.valueOf("//SET/@directory"));
final List expiredNodes = doc.selectNodes("//RAW_SETS/EXPIRED");
if (expiredNodes != null) {
for (int i = 0; i < expiredNodes.size(); i++) {
Element ex = (Element) expiredNodes.get(i);
set
.addExpired(
ex.attributeValue("id"),
ex.attributeValue("creationDate"),
ex.attributeValue("lastUpdate"));
}
}
final StringBuilder sb = new StringBuilder();
sb.append(basePath);
sb.append("/");
sb.append(doc.valueOf("//SET/@directory"));
sb.append("/");
sb.append(doc.valueOf("//RAW_SETS/LATEST/@id"));
set.setPathToLatest(sb.toString());
return set;
} catch (Exception e) {
throw new ActionManagerException("Error creating set from profile: " + profile, e);
} }
} }
private String buildDirectory(String basePath, Triple<String, String, String> t) {
return Joiner.on("/").join(basePath, t.getMiddle(), t.getRight());
}
private String getBasePathHDFS(ISLookUpService isLookup) throws ActionManagerException { private String getBasePathHDFS(ISLookUpService isLookup) throws ActionManagerException {
return queryServiceProperty(isLookup, "basePath"); return queryServiceProperty(isLookup, "basePath");
} }