forked from D-Net/dnet-hadoop
Merge pull request 'BulkTagging extension' (#250) from horizontalConstraints into beta
Reviewed-on: D-Net/dnet-hadoop#250
This commit is contained in:
commit
b4b6a4457c
|
@ -5,6 +5,8 @@ import java.io.Serializable;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.avro.generic.GenericData;
|
||||||
|
|
||||||
import com.google.gson.Gson;
|
import com.google.gson.Gson;
|
||||||
|
|
||||||
/** Created by miriam on 01/08/2018. */
|
/** Created by miriam on 01/08/2018. */
|
||||||
|
@ -14,6 +16,7 @@ public class Community implements Serializable {
|
||||||
private List<String> subjects = new ArrayList<>();
|
private List<String> subjects = new ArrayList<>();
|
||||||
private List<Provider> providers = new ArrayList<>();
|
private List<Provider> providers = new ArrayList<>();
|
||||||
private List<ZenodoCommunity> zenodoCommunities = new ArrayList<>();
|
private List<ZenodoCommunity> zenodoCommunities = new ArrayList<>();
|
||||||
|
private SelectionConstraints constraints = new SelectionConstraints();
|
||||||
|
|
||||||
public String toJson() {
|
public String toJson() {
|
||||||
final Gson g = new Gson();
|
final Gson g = new Gson();
|
||||||
|
@ -57,4 +60,12 @@ public class Community implements Serializable {
|
||||||
public void setZenodoCommunities(List<ZenodoCommunity> zenodoCommunities) {
|
public void setZenodoCommunities(List<ZenodoCommunity> zenodoCommunities) {
|
||||||
this.zenodoCommunities = zenodoCommunities;
|
this.zenodoCommunities = zenodoCommunities;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public SelectionConstraints getConstraints() {
|
||||||
|
return constraints;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setConstraints(SelectionConstraints constraints) {
|
||||||
|
this.constraints = constraints;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,6 +24,8 @@ public class CommunityConfiguration implements Serializable {
|
||||||
private Map<String, List<Pair<String, SelectionConstraints>>> datasourceMap = new HashMap<>();
|
private Map<String, List<Pair<String, SelectionConstraints>>> datasourceMap = new HashMap<>();
|
||||||
// map zenodocommunityid -> communityid
|
// map zenodocommunityid -> communityid
|
||||||
private Map<String, List<Pair<String, SelectionConstraints>>> zenodocommunityMap = new HashMap<>();
|
private Map<String, List<Pair<String, SelectionConstraints>>> zenodocommunityMap = new HashMap<>();
|
||||||
|
// map communityid -> selectionconstraints
|
||||||
|
private Map<String, SelectionConstraints> selectionConstraintsMap = new HashMap<>();
|
||||||
|
|
||||||
public Map<String, List<Pair<String, SelectionConstraints>>> getSubjectMap() {
|
public Map<String, List<Pair<String, SelectionConstraints>>> getSubjectMap() {
|
||||||
return subjectMap;
|
return subjectMap;
|
||||||
|
@ -51,6 +53,14 @@ public class CommunityConfiguration implements Serializable {
|
||||||
this.zenodocommunityMap = zenodocommunityMap;
|
this.zenodocommunityMap = zenodocommunityMap;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Map<String, SelectionConstraints> getSelectionConstraintsMap() {
|
||||||
|
return selectionConstraintsMap;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setSelectionConstraintsMap(Map<String, SelectionConstraints> selectionConstraintsMap) {
|
||||||
|
this.selectionConstraintsMap = selectionConstraintsMap;
|
||||||
|
}
|
||||||
|
|
||||||
CommunityConfiguration(final Map<String, Community> communities) {
|
CommunityConfiguration(final Map<String, Community> communities) {
|
||||||
this.communities = communities;
|
this.communities = communities;
|
||||||
init();
|
init();
|
||||||
|
@ -67,6 +77,9 @@ public class CommunityConfiguration implements Serializable {
|
||||||
if (zenodocommunityMap == null) {
|
if (zenodocommunityMap == null) {
|
||||||
zenodocommunityMap = Maps.newHashMap();
|
zenodocommunityMap = Maps.newHashMap();
|
||||||
}
|
}
|
||||||
|
if (selectionConstraintsMap == null) {
|
||||||
|
selectionConstraintsMap = Maps.newHashMap();
|
||||||
|
}
|
||||||
|
|
||||||
for (Community c : getCommunities().values()) {
|
for (Community c : getCommunities().values()) {
|
||||||
// get subjects
|
// get subjects
|
||||||
|
@ -87,6 +100,7 @@ public class CommunityConfiguration implements Serializable {
|
||||||
new Pair<>(id, zc.getSelCriteria()),
|
new Pair<>(id, zc.getSelCriteria()),
|
||||||
zenodocommunityMap);
|
zenodocommunityMap);
|
||||||
}
|
}
|
||||||
|
selectionConstraintsMap.put(id, c.getConstraints());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -85,9 +85,22 @@ public class CommunityConfigurationFactory {
|
||||||
c.setSubjects(parseSubjects(node));
|
c.setSubjects(parseSubjects(node));
|
||||||
c.setProviders(parseDatasources(node));
|
c.setProviders(parseDatasources(node));
|
||||||
c.setZenodoCommunities(parseZenodoCommunities(node));
|
c.setZenodoCommunities(parseZenodoCommunities(node));
|
||||||
|
c.setConstraints(parseConstrains(node));
|
||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static SelectionConstraints parseConstrains(Node node) {
|
||||||
|
Node aconstraints = node.selectSingleNode("./advancedConstraints");
|
||||||
|
if (aconstraints == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
SelectionConstraints selectionConstraints = new Gson()
|
||||||
|
.fromJson(aconstraints.getText(), SelectionConstraints.class);
|
||||||
|
|
||||||
|
selectionConstraints.setSelection(resolver);
|
||||||
|
return selectionConstraints;
|
||||||
|
}
|
||||||
|
|
||||||
private static List<String> parseSubjects(final Node node) {
|
private static List<String> parseSubjects(final Node node) {
|
||||||
|
|
||||||
final List<String> subjects = Lists.newArrayList();
|
final List<String> subjects = Lists.newArrayList();
|
||||||
|
|
|
@ -11,6 +11,7 @@ public class Constraint implements Serializable {
|
||||||
private String verb;
|
private String verb;
|
||||||
private String field;
|
private String field;
|
||||||
private String value;
|
private String value;
|
||||||
|
// private String element;
|
||||||
private Selection selection;
|
private Selection selection;
|
||||||
|
|
||||||
public String getVerb() {
|
public String getVerb() {
|
||||||
|
@ -50,4 +51,12 @@ public class Constraint implements Serializable {
|
||||||
public boolean verifyCriteria(String metadata) {
|
public boolean verifyCriteria(String metadata) {
|
||||||
return selection.apply(metadata);
|
return selection.apply(metadata);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// public String getElement() {
|
||||||
|
// return element;
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// public void setElement(String element) {
|
||||||
|
// this.element = element;
|
||||||
|
// }
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,6 +18,8 @@ public class QueryInformationSystem {
|
||||||
+ " let $datasources := $x//CONFIGURATION/context/category[./@id=concat($x//CONFIGURATION/context/@id,'::contentproviders')]/concept "
|
+ " let $datasources := $x//CONFIGURATION/context/category[./@id=concat($x//CONFIGURATION/context/@id,'::contentproviders')]/concept "
|
||||||
+ " let $organizations := $x//CONFIGURATION/context/category[./@id=concat($x//CONFIGURATION/context/@id,'::resultorganizations')]/concept "
|
+ " let $organizations := $x//CONFIGURATION/context/category[./@id=concat($x//CONFIGURATION/context/@id,'::resultorganizations')]/concept "
|
||||||
+ " let $communities := $x//CONFIGURATION/context/category[./@id=concat($x//CONFIGURATION/context/@id,'::zenodocommunities')]/concept "
|
+ " let $communities := $x//CONFIGURATION/context/category[./@id=concat($x//CONFIGURATION/context/@id,'::zenodocommunities')]/concept "
|
||||||
|
+ " let $fos := $x//CONFIGURATION/context/param[./@name='fos']/text() "
|
||||||
|
+ " let $sdg := $x//CONFIGURATION/context/param[./@name='sdg']/text() "
|
||||||
+
|
+
|
||||||
"let $zenodo := $x//param[./@name='zenodoCommunity']/text() "
|
"let $zenodo := $x//param[./@name='zenodoCommunity']/text() "
|
||||||
+ " where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] and $x//context/param[./@name = 'status']/text() != 'hidden' "
|
+ " where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] and $x//context/param[./@name = 'status']/text() != 'hidden' "
|
||||||
|
@ -28,6 +30,12 @@ public class QueryInformationSystem {
|
||||||
+ " {for $y in tokenize($subj,',') "
|
+ " {for $y in tokenize($subj,',') "
|
||||||
+ " return "
|
+ " return "
|
||||||
+ " <subject>{$y}</subject>} "
|
+ " <subject>{$y}</subject>} "
|
||||||
|
+ " {for $y in tokenize($fos,',') "
|
||||||
|
+ " return "
|
||||||
|
+ " <subject>{$y}</subject>} "
|
||||||
|
+ " {for $y in tokenize($sdg,',') "
|
||||||
|
+ " return "
|
||||||
|
+ " <subject>{$y}</subject>} "
|
||||||
+ " </subjects> "
|
+ " </subjects> "
|
||||||
+ " <datasources> "
|
+ " <datasources> "
|
||||||
+ " {for $d in $datasources "
|
+ " {for $d in $datasources "
|
||||||
|
@ -61,6 +69,9 @@ public class QueryInformationSystem {
|
||||||
+ " </selcriteria> "
|
+ " </selcriteria> "
|
||||||
+ " </zenodocommunity>} "
|
+ " </zenodocommunity>} "
|
||||||
+ " </zenodocommunities> "
|
+ " </zenodocommunities> "
|
||||||
|
+ "<advancedConstraint>"
|
||||||
|
+ "{$x//CONFIGURATION/context/param[./@name='advancedConstraint']/text()} "
|
||||||
|
+ "</advancedConstraint>"
|
||||||
+ " </community>";
|
+ " </community>";
|
||||||
|
|
||||||
public static CommunityConfiguration getCommunityConfiguration(final String isLookupUrl)
|
public static CommunityConfiguration getCommunityConfiguration(final String isLookupUrl)
|
||||||
|
|
|
@ -15,7 +15,10 @@ import com.google.gson.Gson;
|
||||||
import com.jayway.jsonpath.DocumentContext;
|
import com.jayway.jsonpath.DocumentContext;
|
||||||
import com.jayway.jsonpath.JsonPath;
|
import com.jayway.jsonpath.JsonPath;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
||||||
|
|
||||||
/** Created by miriam on 02/08/2018. */
|
/** Created by miriam on 02/08/2018. */
|
||||||
public class ResultTagger implements Serializable {
|
public class ResultTagger implements Serializable {
|
||||||
|
@ -95,13 +98,6 @@ public class ResultTagger implements Serializable {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// result
|
|
||||||
// .getInstance()
|
|
||||||
// .stream()
|
|
||||||
// .map(i -> new Pair<>(i.getCollectedfrom().getKey(), i.getHostedby().getKey()))
|
|
||||||
// .flatMap(p -> Stream.of(p.getFst(), p.getSnd()))
|
|
||||||
// .map(s -> StringUtils.substringAfter(s, "|"))
|
|
||||||
// .collect(Collectors.toCollection(HashSet::new))
|
|
||||||
tmp
|
tmp
|
||||||
.forEach(
|
.forEach(
|
||||||
dsId -> datasources
|
dsId -> datasources
|
||||||
|
@ -135,6 +131,25 @@ public class ResultTagger implements Serializable {
|
||||||
|
|
||||||
communities.addAll(czenodo);
|
communities.addAll(czenodo);
|
||||||
|
|
||||||
|
/* Tagging for Advanced Constraints */
|
||||||
|
final Set<String> aconstraints = new HashSet<>();
|
||||||
|
|
||||||
|
conf
|
||||||
|
.getSelectionConstraintsMap()
|
||||||
|
.keySet()
|
||||||
|
.forEach(communityId -> {
|
||||||
|
if (conf.getSelectionConstraintsMap().get(communityId) != null &&
|
||||||
|
conf
|
||||||
|
.getSelectionConstraintsMap()
|
||||||
|
.get(communityId)
|
||||||
|
.getCriteria()
|
||||||
|
.stream()
|
||||||
|
.anyMatch(crit -> crit.verifyCriteria(param)))
|
||||||
|
aconstraints.add(communityId);
|
||||||
|
});
|
||||||
|
|
||||||
|
communities.addAll(aconstraints);
|
||||||
|
|
||||||
clearContext(result);
|
clearContext(result);
|
||||||
|
|
||||||
/* Verify if there is something to bulktag */
|
/* Verify if there is something to bulktag */
|
||||||
|
@ -152,30 +167,51 @@ public class ResultTagger implements Serializable {
|
||||||
dataInfoList = new ArrayList<>();
|
dataInfoList = new ArrayList<>();
|
||||||
c.setDataInfo(dataInfoList);
|
c.setDataInfo(dataInfoList);
|
||||||
}
|
}
|
||||||
if (subjects.contains(c.getId()))
|
if (subjects.contains(c))
|
||||||
dataInfoList
|
dataInfoList
|
||||||
.add(
|
.add(
|
||||||
getDataInfo(
|
OafMapperUtils
|
||||||
BULKTAG_DATA_INFO_TYPE,
|
.dataInfo(
|
||||||
CLASS_ID_SUBJECT,
|
false, BULKTAG_DATA_INFO_TYPE, true, false,
|
||||||
CLASS_NAME_BULKTAG_SUBJECT,
|
OafMapperUtils
|
||||||
TAGGING_TRUST));
|
.qualifier(
|
||||||
if (datasources.contains(c.getId()))
|
CLASS_ID_SUBJECT, CLASS_NAME_BULKTAG_SUBJECT, DNET_PROVENANCE_ACTIONS,
|
||||||
|
DNET_PROVENANCE_ACTIONS),
|
||||||
|
TAGGING_TRUST));
|
||||||
|
if (datasources.contains(c))
|
||||||
dataInfoList
|
dataInfoList
|
||||||
.add(
|
.add(
|
||||||
getDataInfo(
|
OafMapperUtils
|
||||||
BULKTAG_DATA_INFO_TYPE,
|
.dataInfo(
|
||||||
CLASS_ID_DATASOURCE,
|
false, BULKTAG_DATA_INFO_TYPE, true, false,
|
||||||
CLASS_NAME_BULKTAG_DATASOURCE,
|
OafMapperUtils
|
||||||
TAGGING_TRUST));
|
.qualifier(
|
||||||
if (czenodo.contains(c.getId()))
|
CLASS_ID_DATASOURCE, CLASS_NAME_BULKTAG_DATASOURCE, DNET_PROVENANCE_ACTIONS,
|
||||||
|
DNET_PROVENANCE_ACTIONS),
|
||||||
|
TAGGING_TRUST));
|
||||||
|
if (czenodo.contains(c))
|
||||||
dataInfoList
|
dataInfoList
|
||||||
.add(
|
.add(
|
||||||
getDataInfo(
|
OafMapperUtils
|
||||||
BULKTAG_DATA_INFO_TYPE,
|
.dataInfo(
|
||||||
CLASS_ID_CZENODO,
|
false, BULKTAG_DATA_INFO_TYPE, true, false,
|
||||||
CLASS_NAME_BULKTAG_ZENODO,
|
OafMapperUtils
|
||||||
TAGGING_TRUST));
|
.qualifier(
|
||||||
|
CLASS_ID_CZENODO, CLASS_NAME_BULKTAG_ZENODO, DNET_PROVENANCE_ACTIONS,
|
||||||
|
DNET_PROVENANCE_ACTIONS),
|
||||||
|
TAGGING_TRUST));
|
||||||
|
if (aconstraints.contains(c))
|
||||||
|
dataInfoList
|
||||||
|
.add(
|
||||||
|
OafMapperUtils
|
||||||
|
.dataInfo(
|
||||||
|
false, BULKTAG_DATA_INFO_TYPE, true, false,
|
||||||
|
OafMapperUtils
|
||||||
|
.qualifier(
|
||||||
|
CLASS_ID_ADVANCED_CONSTRAINT, CLASS_NAME_BULKTAG_ADVANCED_CONSTRAINT,
|
||||||
|
DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS),
|
||||||
|
TAGGING_TRUST));
|
||||||
|
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -196,27 +232,48 @@ public class ResultTagger implements Serializable {
|
||||||
if (subjects.contains(c))
|
if (subjects.contains(c))
|
||||||
dataInfoList
|
dataInfoList
|
||||||
.add(
|
.add(
|
||||||
getDataInfo(
|
OafMapperUtils
|
||||||
BULKTAG_DATA_INFO_TYPE,
|
.dataInfo(
|
||||||
CLASS_ID_SUBJECT,
|
false, BULKTAG_DATA_INFO_TYPE, true, false,
|
||||||
CLASS_NAME_BULKTAG_SUBJECT,
|
OafMapperUtils
|
||||||
TAGGING_TRUST));
|
.qualifier(
|
||||||
|
CLASS_ID_SUBJECT, CLASS_NAME_BULKTAG_SUBJECT, DNET_PROVENANCE_ACTIONS,
|
||||||
|
DNET_PROVENANCE_ACTIONS),
|
||||||
|
TAGGING_TRUST));
|
||||||
if (datasources.contains(c))
|
if (datasources.contains(c))
|
||||||
dataInfoList
|
dataInfoList
|
||||||
.add(
|
.add(
|
||||||
getDataInfo(
|
OafMapperUtils
|
||||||
BULKTAG_DATA_INFO_TYPE,
|
.dataInfo(
|
||||||
CLASS_ID_DATASOURCE,
|
false, BULKTAG_DATA_INFO_TYPE, true, false,
|
||||||
CLASS_NAME_BULKTAG_DATASOURCE,
|
OafMapperUtils
|
||||||
TAGGING_TRUST));
|
.qualifier(
|
||||||
|
CLASS_ID_DATASOURCE, CLASS_NAME_BULKTAG_DATASOURCE,
|
||||||
|
DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS),
|
||||||
|
TAGGING_TRUST));
|
||||||
if (czenodo.contains(c))
|
if (czenodo.contains(c))
|
||||||
dataInfoList
|
dataInfoList
|
||||||
.add(
|
.add(
|
||||||
getDataInfo(
|
OafMapperUtils
|
||||||
BULKTAG_DATA_INFO_TYPE,
|
.dataInfo(
|
||||||
CLASS_ID_CZENODO,
|
false, BULKTAG_DATA_INFO_TYPE, true, false,
|
||||||
CLASS_NAME_BULKTAG_ZENODO,
|
OafMapperUtils
|
||||||
TAGGING_TRUST));
|
.qualifier(
|
||||||
|
CLASS_ID_CZENODO, CLASS_NAME_BULKTAG_ZENODO, DNET_PROVENANCE_ACTIONS,
|
||||||
|
DNET_PROVENANCE_ACTIONS),
|
||||||
|
TAGGING_TRUST));
|
||||||
|
if (aconstraints.contains(c))
|
||||||
|
dataInfoList
|
||||||
|
.add(
|
||||||
|
OafMapperUtils
|
||||||
|
.dataInfo(
|
||||||
|
false, BULKTAG_DATA_INFO_TYPE, true, false,
|
||||||
|
OafMapperUtils
|
||||||
|
.qualifier(
|
||||||
|
CLASS_ID_ADVANCED_CONSTRAINT, CLASS_NAME_BULKTAG_ADVANCED_CONSTRAINT,
|
||||||
|
DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS),
|
||||||
|
TAGGING_TRUST));
|
||||||
|
|
||||||
context.setDataInfo(dataInfoList);
|
context.setDataInfo(dataInfoList);
|
||||||
return context;
|
return context;
|
||||||
})
|
})
|
||||||
|
@ -226,22 +283,4 @@ public class ResultTagger implements Serializable {
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static DataInfo getDataInfo(
|
|
||||||
String inference_provenance, String inference_class_id, String inference_class_name, String trust) {
|
|
||||||
DataInfo di = new DataInfo();
|
|
||||||
di.setInferred(true);
|
|
||||||
di.setInferenceprovenance(inference_provenance);
|
|
||||||
di.setProvenanceaction(getQualifier(inference_class_id, inference_class_name));
|
|
||||||
di.setTrust(trust);
|
|
||||||
return di;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static Qualifier getQualifier(String inference_class_id, String inference_class_name) {
|
|
||||||
Qualifier pa = new Qualifier();
|
|
||||||
pa.setClassid(inference_class_id);
|
|
||||||
pa.setClassname(inference_class_name);
|
|
||||||
pa.setSchemeid(DNET_PROVENANCE_ACTIONS);
|
|
||||||
pa.setSchemename(DNET_PROVENANCE_ACTIONS);
|
|
||||||
return pa;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -11,12 +11,14 @@ public class TaggingConstants {
|
||||||
public static final String CLASS_ID_SUBJECT = "community:subject";
|
public static final String CLASS_ID_SUBJECT = "community:subject";
|
||||||
public static final String CLASS_ID_DATASOURCE = "community:datasource";
|
public static final String CLASS_ID_DATASOURCE = "community:datasource";
|
||||||
public static final String CLASS_ID_CZENODO = "community:zenodocommunity";
|
public static final String CLASS_ID_CZENODO = "community:zenodocommunity";
|
||||||
|
public static final String CLASS_ID_ADVANCED_CONSTRAINT = "community:advconstraint";
|
||||||
|
|
||||||
public static final String ZENODO_COMMUNITY_INDICATOR = "zenodo.org/communities/";
|
public static final String ZENODO_COMMUNITY_INDICATOR = "zenodo.org/communities/";
|
||||||
|
|
||||||
public static final String CLASS_NAME_BULKTAG_SUBJECT = "Bulktagging for Community - Subject";
|
public static final String CLASS_NAME_BULKTAG_SUBJECT = "Bulktagging for Community - Subject";
|
||||||
public static final String CLASS_NAME_BULKTAG_DATASOURCE = "Bulktagging for Community - Datasource";
|
public static final String CLASS_NAME_BULKTAG_DATASOURCE = "Bulktagging for Community - Datasource";
|
||||||
public static final String CLASS_NAME_BULKTAG_ZENODO = "Bulktagging for Community - Zenodo";
|
public static final String CLASS_NAME_BULKTAG_ZENODO = "Bulktagging for Community - Zenodo";
|
||||||
|
public static final String CLASS_NAME_BULKTAG_ADVANCED_CONSTRAINT = "Bulktagging for Community - Advanced Constraints";
|
||||||
|
|
||||||
public static final String TAGGING_TRUST = "0.8";
|
public static final String TAGGING_TRUST = "0.8";
|
||||||
}
|
}
|
||||||
|
|
|
@ -121,8 +121,7 @@ public class SparkEoscBulkTag implements Serializable {
|
||||||
.getInstance()
|
.getInstance()
|
||||||
.stream()
|
.stream()
|
||||||
.anyMatch(
|
.anyMatch(
|
||||||
i -> (hostedByList.contains(i.getHostedby().getKey())) ||
|
i -> (hostedByList.contains(i.getHostedby().getKey())))
|
||||||
(value.getEoscifguidelines() != null && value.getEoscifguidelines().size() > 0))
|
|
||||||
&&
|
&&
|
||||||
!value.getContext().stream().anyMatch(c -> c.getId().equals("eosc"))) {
|
!value.getContext().stream().anyMatch(c -> c.getId().equals("eosc"))) {
|
||||||
Context context = new Context();
|
Context context = new Context();
|
||||||
|
|
|
@ -6,6 +6,10 @@ import static eu.dnetlib.dhp.bulktag.community.TaggingConstants.ZENODO_COMMUNITY
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
import org.apache.commons.io.FileUtils;
|
import org.apache.commons.io.FileUtils;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
|
@ -23,11 +27,12 @@ import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
import com.google.gson.Gson;
|
||||||
|
import com.jayway.jsonpath.DocumentContext;
|
||||||
|
import com.jayway.jsonpath.JsonPath;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
import eu.dnetlib.dhp.bulktag.community.ProtoMap;
|
||||||
import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Software;
|
|
||||||
|
|
||||||
public class BulkTagJobTest {
|
public class BulkTagJobTest {
|
||||||
|
|
||||||
|
@ -39,7 +44,8 @@ public class BulkTagJobTest {
|
||||||
+ " \"title\" : \"$['title'][*]['value']\","
|
+ " \"title\" : \"$['title'][*]['value']\","
|
||||||
+ " \"orcid\" : \"$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']\","
|
+ " \"orcid\" : \"$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']\","
|
||||||
+ " \"contributor\" : \"$['contributor'][*]['value']\","
|
+ " \"contributor\" : \"$['contributor'][*]['value']\","
|
||||||
+ " \"description\" : \"$['description'][*]['value']\"}";
|
+ " \"description\" : \"$['description'][*]['value']\", "
|
||||||
|
+ " \"subject\" :\"$['subject'][*]['value']\" }";
|
||||||
|
|
||||||
private static SparkSession spark;
|
private static SparkSession spark;
|
||||||
|
|
||||||
|
@ -763,10 +769,28 @@ public class BulkTagJobTest {
|
||||||
org.apache.spark.sql.Dataset<Row> idExplodeCommunity = spark.sql(query);
|
org.apache.spark.sql.Dataset<Row> idExplodeCommunity = spark.sql(query);
|
||||||
|
|
||||||
idExplodeCommunity.show(false);
|
idExplodeCommunity.show(false);
|
||||||
Assertions.assertEquals(3, idExplodeCommunity.count());
|
Assertions.assertEquals(4, idExplodeCommunity.count());
|
||||||
|
|
||||||
Assertions
|
Assertions
|
||||||
.assertEquals(
|
.assertEquals(
|
||||||
3, idExplodeCommunity.filter("provenance = 'community:datasource'").count());
|
3, idExplodeCommunity.filter("provenance = 'community:datasource'").count());
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
1, idExplodeCommunity.filter("provenance = 'community:advconstraint'").count());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// @Test
|
||||||
|
// void test1(){
|
||||||
|
// ProtoMap params = new Gson().fromJson(pathMap, ProtoMap.class);
|
||||||
|
// HashMap<String, String> param = new HashMap<>();
|
||||||
|
// for (String key : params.keySet()) {
|
||||||
|
// try {
|
||||||
|
// param.put(key, jsonContext.read(params.get(key)));
|
||||||
|
// } catch (com.jayway.jsonpath.PathNotFoundException e) {
|
||||||
|
// param.put(key, new ArrayList<>());
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// return param;
|
||||||
|
// }
|
||||||
|
// }
|
||||||
}
|
}
|
||||||
|
|
|
@ -1193,6 +1193,9 @@
|
||||||
<organizations/>
|
<organizations/>
|
||||||
</community>
|
</community>
|
||||||
<community id="science-innovation-policy">
|
<community id="science-innovation-policy">
|
||||||
|
<advancedConstraints>{"criteria":[{"constraint":[{"verb":"equals_ignorecase","field":"subject","value":"ciencias de la comunicación"},
|
||||||
|
{"verb":"equals","field":"subject","value":"Miriam"}]},
|
||||||
|
{"constraint":[{"verb":"equals","field":"subject","value":"miriam"}]}]}</advancedConstraints>
|
||||||
<subjects>
|
<subjects>
|
||||||
<subject>Sustainability-oriented science policy</subject>
|
<subject>Sustainability-oriented science policy</subject>
|
||||||
<subject> STI policies</subject>
|
<subject> STI policies</subject>
|
||||||
|
@ -1316,7 +1319,7 @@
|
||||||
<openaireId>opendoar____::358aee4cc897452c00244351e4d91f69</openaireId>
|
<openaireId>opendoar____::358aee4cc897452c00244351e4d91f69</openaireId>
|
||||||
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"COVID-19"}]},
|
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"COVID-19"}]},
|
||||||
{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"SARS-CoV-2"}]},
|
{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"SARS-CoV-2"}]},
|
||||||
{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"2019-nCoV"}]}]}
|
{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"2019-nCoV"}}]}
|
||||||
</selcriteria>
|
</selcriteria>
|
||||||
</datasource>
|
</datasource>
|
||||||
<datasource>
|
<datasource>
|
||||||
|
|
File diff suppressed because one or more lines are too long
Binary file not shown.
|
@ -10,7 +10,6 @@ import java.util.stream.Collectors;
|
||||||
|
|
||||||
import javax.swing.text.html.Option;
|
import javax.swing.text.html.Option;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.PidType;
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.function.FilterFunction;
|
import org.apache.spark.api.java.function.FilterFunction;
|
||||||
|
@ -33,6 +32,7 @@ import eu.dnetlib.dhp.oa.graph.clean.CleanContextSparkJob;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Country;
|
import eu.dnetlib.dhp.schema.oaf.Country;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.utils.PidType;
|
||||||
|
|
||||||
public class CleanCountrySparkJob implements Serializable {
|
public class CleanCountrySparkJob implements Serializable {
|
||||||
private static final Logger log = LoggerFactory.getLogger(CleanCountrySparkJob.class);
|
private static final Logger log = LoggerFactory.getLogger(CleanCountrySparkJob.class);
|
||||||
|
@ -113,7 +113,10 @@ public class CleanCountrySparkJob implements Serializable {
|
||||||
if (r
|
if (r
|
||||||
.getPid()
|
.getPid()
|
||||||
.stream()
|
.stream()
|
||||||
.anyMatch(p -> p.getQualifier().getClassid()
|
.anyMatch(
|
||||||
|
p -> p
|
||||||
|
.getQualifier()
|
||||||
|
.getClassid()
|
||||||
.equals(PidType.doi) && pidInParam(p.getValue(), verifyParam))) {
|
.equals(PidType.doi) && pidInParam(p.getValue(), verifyParam))) {
|
||||||
r
|
r
|
||||||
.setCountry(
|
.setCountry(
|
||||||
|
|
|
@ -422,7 +422,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
||||||
|
|
||||||
final Relation r2 = OafMapperUtils
|
final Relation r2 = OafMapperUtils
|
||||||
.getRelation(
|
.getRelation(
|
||||||
orgId, dsId, DATASOURCE_ORGANIZATION, PROVISION, PROVIDES, collectedFrom, info, lastUpdateTimestamp);
|
orgId, dsId, DATASOURCE_ORGANIZATION, PROVISION, PROVIDES, collectedFrom, info,
|
||||||
|
lastUpdateTimestamp);
|
||||||
|
|
||||||
return Arrays.asList(r1, r2);
|
return Arrays.asList(r1, r2);
|
||||||
} catch (final Exception e) {
|
} catch (final Exception e) {
|
||||||
|
|
Loading…
Reference in New Issue