partial implementation

This commit is contained in:
Michele Artini 2024-11-21 12:09:09 +01:00
parent e4faf7c227
commit b8e264404f
2 changed files with 198 additions and 129 deletions

View File

@ -1,23 +1,21 @@
package eu.dnetlib.app.directindex.clients;
import java.io.Serializable;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.cache.annotation.CacheEvict;
import org.springframework.cache.annotation.Cacheable;
import org.springframework.stereotype.Component;
import org.springframework.web.client.RestClientException;
import org.springframework.web.client.RestTemplate;
import eu.dnetlib.app.directindex.errors.DirectIndexApiException;
@Component
public class CommunityClient {
@ -25,93 +23,161 @@ public class CommunityClient {
private static final Log log = LogFactory.getLog(CommunityClient.class);
@Value("${dnet.directindex.context.url}")
private String contextApiUrl;
@Value("${dnet.directindex.community.url}")
private String communityApiUrl;
public Map<String, ContextInfo> findContexts(final List<String> paths) {
final Map<String, ContextInfo> res = new HashMap<>();
for (final String id : paths) {
final String context = id.substring(id.lastIndexOf("/") + 1);
if (!id.contains(ZENODO_COMMUNITY)) {
res.putAll(filterNodes(id));
} else {
final String url = communityApiUrl + "/" + context + "/openairecommunities";
for (final String path : new RestTemplate().getForObject(url, ZenodoContextList.class).getOpenAirecommunitylist()) {
res.putAll(filterNodes(path));
}
}
}
return res;
}
private Map<String, ContextInfo> filterNodes(final String path) {
final String root = path.split("::")[0];
return findNodes(root)
.entrySet()
.stream()
.filter(e -> path.equals(e.getKey()) || path.startsWith(e.getKey() + "::"))
.collect(Collectors.toMap(Entry::getKey, Entry::getValue));
}
@Cacheable("contexts")
public Collection<ContextInfo> findContexts(final String id) throws DirectIndexApiException {
private Map<String, ContextInfo> findNodes(final String root) {
// TODO prendere esempio da
// https://code-repo.d4science.org/D-Net/dnet-hadoop/src/commit/4d3aef3a095dfbc194983b024ae4ed688eda2b73/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java#L587
// https://code-repo.d4science.org/D-Net/dnet-hadoop/src/branch/main/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ContextMapper.java
log.info("Preparing context: " + root);
if (!id.contains(ZENODO_COMMUNITY)) { return Arrays.asList(createContextInfo(id)); }
final Map<String, ContextInfo> res = new HashMap<>();
final String context = id.substring(id.lastIndexOf("/") + 1);
final RestTemplate rt = new RestTemplate();
final Community c = rt.getForObject(communityApiUrl + "/" + root, Community.class);
final String url = communityApiUrl + "/" + context + "/openairecommunities";
try {
return new RestTemplate().getForObject(url, ZenodoContextList.class)
.getOpenAirecommunitylist()
.stream()
.map(this::createContextInfo)
.toList();
} catch (final RestClientException e) {
log.error("Unable to get object for url: " + url, e);
throw new DirectIndexApiException("Unable to get object for url: " + url, e);
}
}
res.put(c.getId(), new ContextInfo(StringUtils.firstNonBlank(c.getDisplayName(), c.getName()), c.getType()));
@Cacheable("context-labels")
public String findContextLabel(final String id) {
// TODO
return null;
}
@Cacheable("category-labels")
public String findCategoryLabel(final String id) {
// TODO
return null;
}
@Cacheable("category-labels")
public String findConceptLabel(final String id) {
// TODO
return null;
}
private ContextInfo createContextInfo(final String community) {
return createContextInfo(community.split("::"), 0);
}
private ContextInfo createContextInfo(final String[] arr, final int pos) {
final StringWriter id = new StringWriter();
id.write(arr[0]);
for (int i = 0; i < pos; i++) {
id.write("::");
id.write(arr[i + 1]);
for (final SubCommunity sc : rt.getForObject(communityApiUrl + "/" + root + "/subcommunities?all=true", SubCommunity[].class)) {
final String[] arr = sc.getSubCommunityId().split("::");
if (arr.length > 2) {
res.put(arr[0] + "::" + arr[1], new ContextInfo(sc.getCategory(), ""));
res.put(sc.getSubCommunityId(), new ContextInfo(sc.getLabel(), ""));
}
}
final ContextInfo info = new ContextInfo();
info.setId(id.toString());
if (pos == 0) {
info.setElem("context");
info.setLabel(findContextLabel(id.toString()));
} else if (pos == 1) {
info.setElem("category");
info.setLabel(findCategoryLabel(id.toString()));
} else {
info.setElem("concept");
info.setLabel(findConceptLabel(id.toString()));
}
if (pos + 1 < arr.length) {
info.getChildren().add(createContextInfo(arr, pos + 1));
}
return info;
return res;
}
@CacheEvict(value = { "contexts", "context-labels" }, allEntries = true)
@CacheEvict(value = { "contexts" }, allEntries = true)
public void clearCache() {}
public class Community implements Serializable {
private static final long serialVersionUID = 6566834038680683536L;
private String id;
private String type;
private String name;
private String shortName;
private String displayName;
private String displayShortName;
public String getId() {
return id;
}
public void setId(final String id) {
this.id = id;
}
public String getType() {
return type;
}
public void setType(final String type) {
this.type = type;
}
public String getName() {
return name;
}
public void setName(final String name) {
this.name = name;
}
public String getShortName() {
return shortName;
}
public void setShortName(final String shortName) {
this.shortName = shortName;
}
public String getDisplayName() {
return displayName;
}
public void setDisplayName(final String displayName) {
this.displayName = displayName;
}
public String getDisplayShortName() {
return displayShortName;
}
public void setDisplayShortName(final String displayShortName) {
this.displayShortName = displayShortName;
}
}
public class SubCommunity implements Serializable {
private static final long serialVersionUID = 6363561947231890039L;
private String subCommunityId;
private String label;
private String category;
public String getSubCommunityId() {
return subCommunityId;
}
public void setSubCommunityId(final String subCommunityId) {
this.subCommunityId = subCommunityId;
}
public String getLabel() {
return label;
}
public void setLabel(final String label) {
this.label = label;
}
public String getCategory() {
return category;
}
public void setCategory(final String category) {
this.category = category;
}
}
public class ZenodoContextList implements Serializable {
private static final long serialVersionUID = -8575901008472098218L;
@ -144,45 +210,20 @@ public class CommunityClient {
private static final long serialVersionUID = 96456546778111904L;
private String elem;
private String id;
private String label;
private List<ContextInfo> children = new ArrayList<>();
private final String label;
private final String type;
public String getElem() {
return elem;
}
public void setElem(final String elem) {
this.elem = elem;
}
public String getId() {
return id;
}
public void setId(final String id) {
this.id = id;
}
public List<ContextInfo> getChildren() {
return children;
}
public void setChildren(final List<ContextInfo> children) {
this.children = children;
}
public boolean isRoot() {
return "context".equals(elem);
public ContextInfo(final String label, final String type) {
this.label = label;
this.type = type;
}
public String getLabel() {
return label;
}
public void setLabel(final String label) {
this.label = label;
public String getType() {
return type;
}
}

View File

@ -2,11 +2,11 @@ package eu.dnetlib.app.directindex.mapping;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Map.Entry;
import java.util.stream.Collectors;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.lang3.StringUtils;
@ -16,6 +16,7 @@ import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import eu.dnetlib.app.directindex.clients.CommunityClient;
import eu.dnetlib.app.directindex.clients.CommunityClient.ContextInfo;
import eu.dnetlib.app.directindex.clients.DatasourceManagerClient;
import eu.dnetlib.app.directindex.clients.VocabularyClient;
import eu.dnetlib.app.directindex.errors.DirectIndexApiException;
@ -25,6 +26,8 @@ import eu.dnetlib.app.directindex.input.ResultEntry;
import eu.dnetlib.dhp.schema.solr.AccessRight;
import eu.dnetlib.dhp.schema.solr.Author;
import eu.dnetlib.dhp.schema.solr.BestAccessRight;
import eu.dnetlib.dhp.schema.solr.Category;
import eu.dnetlib.dhp.schema.solr.Concept;
import eu.dnetlib.dhp.schema.solr.Context;
import eu.dnetlib.dhp.schema.solr.Country;
import eu.dnetlib.dhp.schema.solr.Funder;
@ -500,24 +503,49 @@ public class SolrRecordMapper {
}
private List<Context> prepareDnetContext(final List<String> list) {
return list.stream()
.map(s -> {
try {
return communityClient.findContexts(s);
} catch (final DirectIndexApiException e) {
log.warn("Problem finding community: " + s, e);
return Arrays.asList();
}
})
.flatMap(Collection::stream)
.map(ctx -> {
final Context context = new Context();
return context;
})
.filter(Objects::nonNull)
.toList();
final Map<String, ContextInfo> nodes = communityClient.findContexts(list);
final List<Context> res = new ArrayList<Context>();
filterContextInfoByLevel(nodes, 0, null).forEach((k, v) -> {
res.add(Context.newInstance(k, v.getLabel(), v.getType(), prepareDnetCategories(nodes, k)));
});
return res;
}
private List<Category> prepareDnetCategories(final Map<String, ContextInfo> nodes, final String parent) {
final List<Category> res = new ArrayList<Category>();
filterContextInfoByLevel(nodes, 1, parent).forEach((k, v) -> {
final Category cat = Category.newInstance(k, v.getLabel());
cat.setConcept(prepareDnetConcepts(nodes, k, 2));
res.add(cat);
});
return res;
}
private List<Concept> prepareDnetConcepts(final Map<String, ContextInfo> nodes, final String parent, final int level) {
final List<Concept> res = new ArrayList<Concept>();
filterContextInfoByLevel(nodes, level, parent).forEach((k, v) -> {
final Concept cpt = Concept.newInstance(k, v.getLabel());
// TODO The class is limited at only 1 level for the concepts
// cpt.setConcept(prepareDnetConcepts(nodes, k, level + 1));
res.add(cpt);
});
return res;
}
private Map<String, ContextInfo> filterContextInfoByLevel(final Map<String, ContextInfo> nodes, final int level, final String parent) {
return nodes.entrySet()
.stream()
.filter(e -> parent == null || e.getKey().startsWith(parent + "::"))
.filter(e -> e.getKey().split("::").length == level)
.collect(Collectors.toMap(Entry::getKey, Entry::getValue));
}
public String calculateOpenaireId(final String originalId, final String collectedFromId) throws DirectIndexApiException {