2020-04-30 11:05:17 +02:00
|
|
|
|
2020-05-11 17:32:06 +02:00
|
|
|
package eu.dnetlib.dhp.bulktag.community;
|
2020-04-21 16:03:51 +02:00
|
|
|
|
2020-05-11 17:38:08 +02:00
|
|
|
import static eu.dnetlib.dhp.bulktag.community.TaggingConstants.*;
|
|
|
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
2020-04-21 16:03:51 +02:00
|
|
|
|
|
|
|
import java.io.Serializable;
|
2020-03-03 16:38:50 +01:00
|
|
|
import java.util.*;
|
|
|
|
import java.util.stream.Collectors;
|
2020-04-30 11:05:17 +02:00
|
|
|
|
2020-05-11 17:38:08 +02:00
|
|
|
import org.apache.commons.lang3.StringUtils;
|
2023-04-05 12:18:39 +02:00
|
|
|
import org.slf4j.Logger;
|
|
|
|
import org.slf4j.LoggerFactory;
|
2020-05-11 17:38:08 +02:00
|
|
|
|
|
|
|
import com.google.gson.Gson;
|
|
|
|
import com.jayway.jsonpath.DocumentContext;
|
|
|
|
import com.jayway.jsonpath.JsonPath;
|
|
|
|
|
2023-04-18 17:39:31 +02:00
|
|
|
import eu.dnetlib.dhp.bulktag.eosc.EoscIFTag;
|
2020-05-11 17:38:08 +02:00
|
|
|
import eu.dnetlib.dhp.schema.oaf.*;
|
2022-12-07 10:45:38 +01:00
|
|
|
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
2020-04-30 11:05:17 +02:00
|
|
|
|
2020-04-21 16:03:51 +02:00
|
|
|
/** Created by miriam on 02/08/2018. */
|
|
|
|
public class ResultTagger implements Serializable {
|
2023-04-05 12:18:39 +02:00
|
|
|
private static final Logger log = LoggerFactory.getLogger(ResultTagger.class);
|
2020-03-03 16:38:50 +01:00
|
|
|
|
2020-04-30 11:05:17 +02:00
|
|
|
private boolean clearContext(Result result) {
|
|
|
|
int tmp = result.getContext().size();
|
|
|
|
List<Context> clist = result
|
|
|
|
.getContext()
|
|
|
|
.stream()
|
|
|
|
.filter(c -> (!c.getId().contains(ZENODO_COMMUNITY_INDICATOR)))
|
|
|
|
.collect(Collectors.toList());
|
|
|
|
result.setContext(clist);
|
|
|
|
return (tmp != clist.size());
|
|
|
|
}
|
|
|
|
|
|
|
|
private Map<String, List<String>> getParamMap(final Result result, Map<String, String> params) {
|
|
|
|
Map<String, List<String>> param = new HashMap<>();
|
|
|
|
String json = new Gson().toJson(result, Result.class);
|
|
|
|
DocumentContext jsonContext = JsonPath.parse(json);
|
|
|
|
if (params == null) {
|
|
|
|
params = new HashMap<>();
|
|
|
|
}
|
|
|
|
for (String key : params.keySet()) {
|
|
|
|
try {
|
|
|
|
param.put(key, jsonContext.read(params.get(key)));
|
|
|
|
} catch (com.jayway.jsonpath.PathNotFoundException e) {
|
|
|
|
param.put(key, new ArrayList<>());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return param;
|
|
|
|
}
|
|
|
|
|
|
|
|
public <R extends Result> R enrichContextCriteria(
|
2020-05-11 17:38:08 +02:00
|
|
|
final R result, final CommunityConfiguration conf, final Map<String, String> criteria) {
|
2020-04-30 11:05:17 +02:00
|
|
|
|
|
|
|
final Map<String, List<String>> param = getParamMap(result, criteria);
|
|
|
|
|
|
|
|
// Verify if the entity is deletedbyinference. In case verify if to clean the context list
|
|
|
|
// from all the zenodo communities
|
|
|
|
if (result.getDataInfo().getDeletedbyinference()) {
|
|
|
|
clearContext(result);
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2023-04-18 17:39:31 +02:00
|
|
|
// Execute the EOSCTag for the services
|
|
|
|
switch (result.getResulttype().getClassid()) {
|
2023-04-18 09:53:11 +02:00
|
|
|
case PUBLICATION_RESULTTYPE_CLASSID:
|
|
|
|
break;
|
|
|
|
case SOFTWARE_RESULTTYPE_CLASSID:
|
2023-04-18 17:39:31 +02:00
|
|
|
EoscIFTag.tagForSoftware(result);
|
2023-04-18 09:53:11 +02:00
|
|
|
break;
|
|
|
|
case DATASET_RESULTTYPE_CLASSID:
|
2023-04-18 17:39:31 +02:00
|
|
|
EoscIFTag.tagForDataset(result);
|
2023-04-18 09:53:11 +02:00
|
|
|
break;
|
|
|
|
case ORP_RESULTTYPE_CLASSID:
|
2023-04-18 17:39:31 +02:00
|
|
|
EoscIFTag.tagForOther(result);
|
2023-04-18 09:53:11 +02:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2023-05-24 09:56:23 +02:00
|
|
|
// communities contains all the communities to be not added to the context
|
|
|
|
final Set<String> removeCommunities = new HashSet<>();
|
|
|
|
|
|
|
|
conf
|
2023-05-24 16:20:05 +02:00
|
|
|
.getRemoveConstraintsMap()
|
|
|
|
.keySet()
|
|
|
|
.forEach(communityId -> {
|
|
|
|
if (conf.getRemoveConstraintsMap().get(communityId).getCriteria() != null &&
|
|
|
|
conf
|
|
|
|
.getRemoveConstraintsMap()
|
|
|
|
.get(communityId)
|
|
|
|
.getCriteria()
|
|
|
|
.stream()
|
|
|
|
.anyMatch(crit -> crit.verifyCriteria(param)))
|
|
|
|
removeCommunities.add(communityId);
|
|
|
|
});
|
2023-05-24 09:56:23 +02:00
|
|
|
|
2020-04-30 11:05:17 +02:00
|
|
|
// communities contains all the communities to be added as context for the result
|
|
|
|
final Set<String> communities = new HashSet<>();
|
|
|
|
|
|
|
|
// tagging for Subject
|
|
|
|
final Set<String> subjects = new HashSet<>();
|
2020-07-09 14:05:21 +02:00
|
|
|
|
2020-07-17 14:03:23 +02:00
|
|
|
if (Objects.nonNull(result.getSubject())) {
|
|
|
|
result
|
|
|
|
.getSubject()
|
2020-04-30 11:05:17 +02:00
|
|
|
.stream()
|
2021-08-11 12:13:22 +02:00
|
|
|
.map(StructuredProperty::getValue)
|
2020-04-30 11:05:17 +02:00
|
|
|
.filter(StringUtils::isNotBlank)
|
|
|
|
.map(String::toLowerCase)
|
|
|
|
.map(String::trim)
|
|
|
|
.collect(Collectors.toCollection(HashSet::new))
|
|
|
|
.forEach(s -> subjects.addAll(conf.getCommunityForSubjectValue(s)));
|
|
|
|
}
|
|
|
|
|
|
|
|
communities.addAll(subjects);
|
|
|
|
|
|
|
|
// Tagging for datasource
|
|
|
|
final Set<String> datasources = new HashSet<>();
|
2023-04-18 17:39:31 +02:00
|
|
|
final Set<String> collfrom = new HashSet<>();
|
|
|
|
final Set<String> hostdby = new HashSet<>();
|
2020-04-30 11:05:17 +02:00
|
|
|
|
2020-07-09 14:05:21 +02:00
|
|
|
if (Objects.nonNull(result.getInstance())) {
|
|
|
|
for (Instance i : result.getInstance()) {
|
2021-08-11 12:13:22 +02:00
|
|
|
if (Objects.nonNull(i.getCollectedfrom()) && Objects.nonNull(i.getCollectedfrom().getKey())) {
|
2023-04-18 17:39:31 +02:00
|
|
|
collfrom.add(StringUtils.substringAfter(i.getCollectedfrom().getKey(), "|"));
|
2020-07-09 14:05:21 +02:00
|
|
|
}
|
2021-08-11 12:13:22 +02:00
|
|
|
if (Objects.nonNull(i.getHostedby()) && Objects.nonNull(i.getHostedby().getKey())) {
|
2023-04-18 17:39:31 +02:00
|
|
|
hostdby.add(StringUtils.substringAfter(i.getHostedby().getKey(), "|"));
|
2020-07-09 14:05:21 +02:00
|
|
|
}
|
|
|
|
|
2020-04-30 11:05:17 +02:00
|
|
|
}
|
|
|
|
|
2023-04-18 17:39:31 +02:00
|
|
|
collfrom
|
2022-02-02 12:46:31 +01:00
|
|
|
.forEach(
|
2020-04-30 11:05:17 +02:00
|
|
|
dsId -> datasources
|
|
|
|
.addAll(
|
|
|
|
conf.getCommunityForDatasource(dsId, param)));
|
2023-04-18 17:39:31 +02:00
|
|
|
hostdby.forEach(dsId -> {
|
|
|
|
datasources
|
|
|
|
.addAll(
|
|
|
|
conf.getCommunityForDatasource(dsId, param));
|
|
|
|
if (conf.isEoscDatasource(dsId)) {
|
|
|
|
datasources.add("eosc");
|
|
|
|
}
|
|
|
|
|
|
|
|
});
|
2020-04-30 11:05:17 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
communities.addAll(datasources);
|
|
|
|
|
|
|
|
/* Tagging for Zenodo Communities */
|
|
|
|
final Set<String> czenodo = new HashSet<>();
|
|
|
|
|
|
|
|
Optional<List<Context>> oresultcontext = Optional.ofNullable(result.getContext());
|
|
|
|
if (oresultcontext.isPresent()) {
|
|
|
|
oresultcontext
|
|
|
|
.get()
|
|
|
|
.stream()
|
|
|
|
.filter(c -> c.getId().contains(ZENODO_COMMUNITY_INDICATOR))
|
|
|
|
.collect(Collectors.toList())
|
|
|
|
.forEach(
|
|
|
|
c -> czenodo
|
|
|
|
.addAll(
|
|
|
|
conf
|
|
|
|
.getCommunityForZenodoCommunityValue(
|
|
|
|
c
|
|
|
|
.getId()
|
|
|
|
.substring(
|
|
|
|
c.getId().lastIndexOf("/") + 1)
|
|
|
|
.trim())));
|
|
|
|
}
|
|
|
|
|
|
|
|
communities.addAll(czenodo);
|
|
|
|
|
2022-09-23 16:02:19 +02:00
|
|
|
/* Tagging for Advanced Constraints */
|
|
|
|
final Set<String> aconstraints = new HashSet<>();
|
|
|
|
|
2022-09-27 14:55:10 +02:00
|
|
|
conf
|
|
|
|
.getSelectionConstraintsMap()
|
|
|
|
.keySet()
|
|
|
|
.forEach(communityId -> {
|
2023-05-24 09:56:23 +02:00
|
|
|
if (!removeCommunities.contains(communityId) &&
|
2023-05-24 16:20:05 +02:00
|
|
|
conf.getSelectionConstraintsMap().get(communityId).getCriteria() != null &&
|
2022-09-27 14:55:10 +02:00
|
|
|
conf
|
|
|
|
.getSelectionConstraintsMap()
|
|
|
|
.get(communityId)
|
|
|
|
.getCriteria()
|
|
|
|
.stream()
|
|
|
|
.anyMatch(crit -> crit.verifyCriteria(param)))
|
|
|
|
aconstraints.add(communityId);
|
|
|
|
});
|
2022-09-23 16:02:19 +02:00
|
|
|
|
|
|
|
communities.addAll(aconstraints);
|
2023-05-24 09:56:23 +02:00
|
|
|
|
|
|
|
communities.removeAll(removeCommunities);
|
|
|
|
|
2023-04-05 12:18:39 +02:00
|
|
|
if (aconstraints.size() > 0)
|
|
|
|
log.info("Found {} for advancedConstraints ", aconstraints.size());
|
2022-09-23 16:02:19 +02:00
|
|
|
|
2020-04-30 11:05:17 +02:00
|
|
|
clearContext(result);
|
|
|
|
|
|
|
|
/* Verify if there is something to bulktag */
|
|
|
|
if (communities.isEmpty()) {
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2021-08-11 12:13:22 +02:00
|
|
|
result.getContext().forEach(c -> {
|
2022-09-28 12:01:43 +02:00
|
|
|
final String cId = c.getId();
|
2022-09-28 11:44:55 +02:00
|
|
|
if (communities.contains(cId)) {
|
2021-08-11 12:13:22 +02:00
|
|
|
Optional<List<DataInfo>> opt_dataInfoList = Optional.ofNullable(c.getDataInfo());
|
|
|
|
List<DataInfo> dataInfoList;
|
|
|
|
if (opt_dataInfoList.isPresent())
|
|
|
|
dataInfoList = opt_dataInfoList.get();
|
|
|
|
else {
|
|
|
|
dataInfoList = new ArrayList<>();
|
|
|
|
c.setDataInfo(dataInfoList);
|
|
|
|
}
|
2022-09-28 11:44:55 +02:00
|
|
|
if (subjects.contains(cId))
|
2022-09-23 16:02:19 +02:00
|
|
|
dataInfoList
|
2022-09-27 14:55:10 +02:00
|
|
|
.add(
|
|
|
|
OafMapperUtils
|
|
|
|
.dataInfo(
|
|
|
|
false, BULKTAG_DATA_INFO_TYPE, true, false,
|
|
|
|
OafMapperUtils
|
|
|
|
.qualifier(
|
|
|
|
CLASS_ID_SUBJECT, CLASS_NAME_BULKTAG_SUBJECT, DNET_PROVENANCE_ACTIONS,
|
|
|
|
DNET_PROVENANCE_ACTIONS),
|
|
|
|
TAGGING_TRUST));
|
2022-09-28 11:44:55 +02:00
|
|
|
if (datasources.contains(cId))
|
2021-08-11 12:13:22 +02:00
|
|
|
dataInfoList
|
2022-09-27 14:55:10 +02:00
|
|
|
.add(
|
|
|
|
OafMapperUtils
|
|
|
|
.dataInfo(
|
|
|
|
false, BULKTAG_DATA_INFO_TYPE, true, false,
|
|
|
|
OafMapperUtils
|
|
|
|
.qualifier(
|
|
|
|
CLASS_ID_DATASOURCE, CLASS_NAME_BULKTAG_DATASOURCE, DNET_PROVENANCE_ACTIONS,
|
|
|
|
DNET_PROVENANCE_ACTIONS),
|
|
|
|
TAGGING_TRUST));
|
2022-09-28 11:44:55 +02:00
|
|
|
if (czenodo.contains(cId))
|
2021-08-11 12:13:22 +02:00
|
|
|
dataInfoList
|
2022-09-27 14:55:10 +02:00
|
|
|
.add(
|
|
|
|
OafMapperUtils
|
|
|
|
.dataInfo(
|
|
|
|
false, BULKTAG_DATA_INFO_TYPE, true, false,
|
|
|
|
OafMapperUtils
|
|
|
|
.qualifier(
|
|
|
|
CLASS_ID_CZENODO, CLASS_NAME_BULKTAG_ZENODO, DNET_PROVENANCE_ACTIONS,
|
|
|
|
DNET_PROVENANCE_ACTIONS),
|
|
|
|
TAGGING_TRUST));
|
2022-09-28 11:44:55 +02:00
|
|
|
if (aconstraints.contains(cId))
|
2021-08-11 12:13:22 +02:00
|
|
|
dataInfoList
|
2022-09-27 14:55:10 +02:00
|
|
|
.add(
|
|
|
|
OafMapperUtils
|
|
|
|
.dataInfo(
|
|
|
|
false, BULKTAG_DATA_INFO_TYPE, true, false,
|
|
|
|
OafMapperUtils
|
|
|
|
.qualifier(
|
|
|
|
CLASS_ID_ADVANCED_CONSTRAINT, CLASS_NAME_BULKTAG_ADVANCED_CONSTRAINT,
|
|
|
|
DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS),
|
|
|
|
TAGGING_TRUST));
|
2022-09-23 16:02:19 +02:00
|
|
|
|
2021-08-11 12:13:22 +02:00
|
|
|
}
|
|
|
|
});
|
2020-04-30 11:05:17 +02:00
|
|
|
|
|
|
|
communities
|
|
|
|
.removeAll(
|
2021-08-11 12:13:22 +02:00
|
|
|
result.getContext().stream().map(Context::getId).collect(Collectors.toSet()));
|
2020-04-30 11:05:17 +02:00
|
|
|
|
|
|
|
if (communities.isEmpty())
|
|
|
|
return result;
|
|
|
|
|
|
|
|
List<Context> toaddcontext = communities
|
|
|
|
.stream()
|
|
|
|
.map(
|
|
|
|
c -> {
|
|
|
|
Context context = new Context();
|
|
|
|
context.setId(c);
|
|
|
|
List<DataInfo> dataInfoList = new ArrayList<>();
|
|
|
|
if (subjects.contains(c))
|
|
|
|
dataInfoList
|
2022-09-27 14:55:10 +02:00
|
|
|
.add(
|
|
|
|
OafMapperUtils
|
|
|
|
.dataInfo(
|
|
|
|
false, BULKTAG_DATA_INFO_TYPE, true, false,
|
|
|
|
OafMapperUtils
|
|
|
|
.qualifier(
|
|
|
|
CLASS_ID_SUBJECT, CLASS_NAME_BULKTAG_SUBJECT, DNET_PROVENANCE_ACTIONS,
|
|
|
|
DNET_PROVENANCE_ACTIONS),
|
|
|
|
TAGGING_TRUST));
|
2020-04-30 11:05:17 +02:00
|
|
|
if (datasources.contains(c))
|
|
|
|
dataInfoList
|
2022-09-27 14:55:10 +02:00
|
|
|
.add(
|
|
|
|
OafMapperUtils
|
|
|
|
.dataInfo(
|
|
|
|
false, BULKTAG_DATA_INFO_TYPE, true, false,
|
|
|
|
OafMapperUtils
|
|
|
|
.qualifier(
|
|
|
|
CLASS_ID_DATASOURCE, CLASS_NAME_BULKTAG_DATASOURCE,
|
|
|
|
DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS),
|
|
|
|
TAGGING_TRUST));
|
2020-04-30 11:05:17 +02:00
|
|
|
if (czenodo.contains(c))
|
|
|
|
dataInfoList
|
2022-09-27 14:55:10 +02:00
|
|
|
.add(
|
|
|
|
OafMapperUtils
|
|
|
|
.dataInfo(
|
|
|
|
false, BULKTAG_DATA_INFO_TYPE, true, false,
|
|
|
|
OafMapperUtils
|
|
|
|
.qualifier(
|
|
|
|
CLASS_ID_CZENODO, CLASS_NAME_BULKTAG_ZENODO, DNET_PROVENANCE_ACTIONS,
|
|
|
|
DNET_PROVENANCE_ACTIONS),
|
|
|
|
TAGGING_TRUST));
|
2022-09-23 16:02:19 +02:00
|
|
|
if (aconstraints.contains(c))
|
|
|
|
dataInfoList
|
2022-09-27 14:55:10 +02:00
|
|
|
.add(
|
|
|
|
OafMapperUtils
|
|
|
|
.dataInfo(
|
|
|
|
false, BULKTAG_DATA_INFO_TYPE, true, false,
|
|
|
|
OafMapperUtils
|
|
|
|
.qualifier(
|
|
|
|
CLASS_ID_ADVANCED_CONSTRAINT, CLASS_NAME_BULKTAG_ADVANCED_CONSTRAINT,
|
|
|
|
DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS),
|
|
|
|
TAGGING_TRUST));
|
2022-09-23 16:02:19 +02:00
|
|
|
|
2020-04-30 11:05:17 +02:00
|
|
|
context.setDataInfo(dataInfoList);
|
|
|
|
return context;
|
|
|
|
})
|
|
|
|
.collect(Collectors.toList());
|
|
|
|
|
|
|
|
result.getContext().addAll(toaddcontext);
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2020-04-21 16:03:51 +02:00
|
|
|
}
|