[GraphAnnotation]Extention of bulktagging to include the easiest graph annotation patterns. Fixed issue and add test

This commit is contained in:
Miriam Baglioni 2024-08-06 14:12:51 +02:00
parent 944e780172
commit ded0c25b44
11 changed files with 27 additions and 17 deletions

View File

@ -123,8 +123,8 @@ public class SparkBulkTagJob {
TaggingConstants.CLASS_NAME_BULKTAG_ORGANIZATION); TaggingConstants.CLASS_NAME_BULKTAG_ORGANIZATION);
execEntityTag( execEntityTag(
spark, inputPath + "project", outputPath + "project", spark, inputPath + "project", outputPath + "project",
Utils.getCommunityProjects(baseURL), Project.class, TaggingConstants.CLASS_ID_PROJECT, Utils.getCommunityProjects(baseURL), Project.class, TaggingConstants.CLASS_ID_PROJECT,
TaggingConstants.CLASS_NAME_BULKTAG_PROJECT); TaggingConstants.CLASS_NAME_BULKTAG_PROJECT);
execDatasourceTag(spark, inputPath, outputPath, Utils.getDatasourceCommunities(baseURL)); execDatasourceTag(spark, inputPath, outputPath, Utils.getDatasourceCommunities(baseURL));
}); });
} }
@ -290,6 +290,7 @@ public class SparkBulkTagJob {
.parallelStream() .parallelStream()
.filter(ModelSupport::isResult) .filter(ModelSupport::isResult)
.forEach(e -> { .forEach(e -> {
removeOutputDir(spark, outputPath + e.name()); removeOutputDir(spark, outputPath + e.name());
ResultTagger resultTagger = new ResultTagger(); ResultTagger resultTagger = new ResultTagger();
Class<R> resultClazz = ModelSupport.entityTypes.get(e); Class<R> resultClazz = ModelSupport.entityTypes.get(e);

View File

@ -10,6 +10,7 @@ import java.lang.reflect.Method;
import java.util.*; import java.util.*;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import com.jayway.jsonpath.Criteria;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -19,13 +20,11 @@ import com.jayway.jsonpath.DocumentContext;
import com.jayway.jsonpath.JsonPath; import com.jayway.jsonpath.JsonPath;
import com.jayway.jsonpath.PathNotFoundException; import com.jayway.jsonpath.PathNotFoundException;
import eu.dnetlib.dhp.bulktag.Tagging;
import eu.dnetlib.dhp.bulktag.actions.MapModel; import eu.dnetlib.dhp.bulktag.actions.MapModel;
import eu.dnetlib.dhp.bulktag.actions.Parameters; import eu.dnetlib.dhp.bulktag.actions.Parameters;
import eu.dnetlib.dhp.bulktag.eosc.EoscIFTag; import eu.dnetlib.dhp.bulktag.eosc.EoscIFTag;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
import scala.Tuple2;
/** Created by miriam on 02/08/2018. */ /** Created by miriam on 02/08/2018. */
public class ResultTagger implements Serializable { public class ResultTagger implements Serializable {
@ -123,9 +122,11 @@ public class ResultTagger implements Serializable {
//adding code for tagging of results searching supplementaryMaterial //adding code for tagging of results searching supplementaryMaterial
final Set<String> tags = new HashSet<>(); final Set<String> tags = new HashSet<>();
taggingConstraints.getTags().forEach(t -> { taggingConstraints.getTags().forEach(t -> {
if (t.getCriteria().stream().anyMatch(crit -> crit.verifyCriteria(param))) if (t.getCriteria().stream().anyMatch(crit -> crit.verifyCriteria(param)))
tags.add(t.getTagId()); tags.add(t.getId());
}); });
// communities contains all the communities to be not added to the context // communities contains all the communities to be not added to the context
@ -262,7 +263,7 @@ public class ResultTagger implements Serializable {
tags.forEach(t -> { tags.forEach(t -> {
Context con = new Context(); Context con = new Context();
con.setId(t); con.setId(t);
List<DataInfo> dataInfoList = Arrays con.setDataInfo(Arrays
.asList( .asList(
OafMapperUtils OafMapperUtils
.dataInfo( .dataInfo(
@ -271,7 +272,7 @@ public class ResultTagger implements Serializable {
.qualifier( .qualifier(
CLASS_ID_ANNOTATION, CLASS_NAME_ANNOTATION, DNET_PROVENANCE_ACTIONS, CLASS_ID_ANNOTATION, CLASS_NAME_ANNOTATION, DNET_PROVENANCE_ACTIONS,
DNET_PROVENANCE_ACTIONS), DNET_PROVENANCE_ACTIONS),
TAGGING_TRUST)); TAGGING_TRUST)));
result.getContext().add(con); result.getContext().add(con);
}); });

View File

@ -1,14 +1,16 @@
package eu.dnetlib.dhp.bulktag.community; package eu.dnetlib.dhp.bulktag.community;
public class TaggingConstraint extends SelectionConstraints { import java.io.Serializable;
private String tagId;
public String getTagId() { public class TaggingConstraint extends SelectionConstraints implements Serializable {
return tagId; private String id;
public String getId() {
return id;
} }
public void setTagId(String tagId) { public void setId(String id) {
this.tagId = tagId; this.id = id;
} }
} }

View File

@ -1,9 +1,10 @@
package eu.dnetlib.dhp.bulktag.community; package eu.dnetlib.dhp.bulktag.community;
import java.io.Serializable;
import java.util.List; import java.util.List;
public class TaggingConstraints { public class TaggingConstraints implements Serializable {
private List<TaggingConstraint> tags; private List<TaggingConstraint> tags;
public List<TaggingConstraint> getTags() { public List<TaggingConstraint> getTags() {

View File

@ -1,4 +1,4 @@
sourcePath=/tmp/miriam/12_graph_copy sourcePath=/tmp/miriam/12_graph_copy
pathMap=/data/bulktagging/pathMap pathMap=/data/bulktagging/pathMap
baseURL=https://services.openaire.eu/openaire/community/ baseURL=https://services.openaire.eu/openaire/community/
taggingCriteria={"tags":[{"id":"SM","criteria":[{"constraint":[{"verb":"starts_with_caseinsensitive","field":"title","value":"supplementary material for"},{"verb":"starts_with_caseinsensitive","field":"title","value":"supplementary document for"},{"verb":"starts_with_caseinsensitive","field":"title","value":"figure"},{"verb":"starts_with_caseinsensitive","field":"title","value":"supplementary figure"},{"verb":"starts_with_caseinsensitive","field":"title","value":"supplemental figure"},{"verb":"starts_with_caseinsensitive","field":"title","value":"supplementary table"},{"verb":"starts_with_caseinsensitive","field":"title","value":"table for"}]}]}]} taggingCriteria={"tags":[{"id":"SM","criteria":[{"constraint":[{"verb":"starts_with_caseinsensitive","field":"title","value":"supplementary material for"}]},{"constraint":[{"verb":"starts_with_caseinsensitive","field":"title","value":"supplementary document for"}]},{"constraint":[{"verb":"starts_with_caseinsensitive","field":"title","value":"figure"}]},{"constraint":[{"verb":"starts_with_caseinsensitive","field":"title","value":"supplementary figure"}]},{"constraint":[{"verb":"starts_with_caseinsensitive","field":"title","value":"supplemental figure"}]},{"constraint":[{"verb":"starts_with_caseinsensitive","field":"title","value":"supplementary table"}]},{"constraint":[{"verb":"starts_with_caseinsensitive","field":"title","value":"table for"}]}]}]}

View File

@ -33,6 +33,7 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.gson.Gson; import com.google.gson.Gson;
import eu.dnetlib.dhp.bulktag.community.ProtoMap; import eu.dnetlib.dhp.bulktag.community.ProtoMap;
import eu.dnetlib.dhp.bulktag.community.TaggingConstraints;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.*;
public class BulkTagJobTest { public class BulkTagJobTest {
@ -68,7 +69,7 @@ public class BulkTagJobTest {
private static String taggingConf = ""; private static String taggingConf = "";
private static String taggingCriteria = "{\"criteria\":[{\"constraint\":[{\"verb\":\"starts_with_caseinsensitive\",\"field\":\"title\",\"value\":\"supplementary material\"}]}]}"; private static String taggingCriteria = "{\"tags\":[{\"id\":\"SM\",\"criteria\":[{\"constraint\":[{\"verb\":\"starts_with_caseinsensitive\",\"field\":\"title\",\"value\":\"supplementary material for\"}]},{\"constraint\":[{\"verb\":\"starts_with_caseinsensitive\",\"field\":\"title\",\"value\":\"supplementary document for\"}]},{\"constraint\":[{\"verb\":\"starts_with_caseinsensitive\",\"field\":\"title\",\"value\":\"figure\"}]},{\"constraint\":[{\"verb\":\"starts_with_caseinsensitive\",\"field\":\"title\",\"value\":\"supplementary figure\"}]},{\"constraint\":[{\"verb\":\"starts_with_caseinsensitive\",\"field\":\"title\",\"value\":\"supplemental figure\"}]},{\"constraint\":[{\"verb\":\"starts_with_caseinsensitive\",\"field\":\"title\",\"value\":\"supplementary table\"}]},{\"constraint\":[{\"verb\":\"starts_with_caseinsensitive\",\"field\":\"title\",\"value\":\"table for\"}]}]}]}";
static { static {
try { try {
@ -2011,6 +2012,10 @@ public class BulkTagJobTest {
"-nameNode", "local" "-nameNode", "local"
}); });
System.out.println("prrr");
} }
} }