[GraphAnnotation]Extention of bulktagging to include the easiest graph annotation patterns. Fixed issue and add test

This commit is contained in:
Miriam Baglioni 2024-08-06 14:12:51 +02:00
parent 944e780172
commit ded0c25b44
11 changed files with 27 additions and 17 deletions

View File

@ -290,6 +290,7 @@ public class SparkBulkTagJob {
.parallelStream()
.filter(ModelSupport::isResult)
.forEach(e -> {
removeOutputDir(spark, outputPath + e.name());
ResultTagger resultTagger = new ResultTagger();
Class<R> resultClazz = ModelSupport.entityTypes.get(e);

View File

@ -10,6 +10,7 @@ import java.lang.reflect.Method;
import java.util.*;
import java.util.stream.Collectors;
import com.jayway.jsonpath.Criteria;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -19,13 +20,11 @@ import com.jayway.jsonpath.DocumentContext;
import com.jayway.jsonpath.JsonPath;
import com.jayway.jsonpath.PathNotFoundException;
import eu.dnetlib.dhp.bulktag.Tagging;
import eu.dnetlib.dhp.bulktag.actions.MapModel;
import eu.dnetlib.dhp.bulktag.actions.Parameters;
import eu.dnetlib.dhp.bulktag.eosc.EoscIFTag;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
import scala.Tuple2;
/** Created by miriam on 02/08/2018. */
public class ResultTagger implements Serializable {
@ -123,9 +122,11 @@ public class ResultTagger implements Serializable {
//adding code for tagging of results searching supplementaryMaterial
final Set<String> tags = new HashSet<>();
taggingConstraints.getTags().forEach(t -> {
if (t.getCriteria().stream().anyMatch(crit -> crit.verifyCriteria(param)))
tags.add(t.getTagId());
tags.add(t.getId());
});
// communities contains all the communities to be not added to the context
@ -262,7 +263,7 @@ public class ResultTagger implements Serializable {
tags.forEach(t -> {
Context con = new Context();
con.setId(t);
List<DataInfo> dataInfoList = Arrays
con.setDataInfo(Arrays
.asList(
OafMapperUtils
.dataInfo(
@ -271,7 +272,7 @@ public class ResultTagger implements Serializable {
.qualifier(
CLASS_ID_ANNOTATION, CLASS_NAME_ANNOTATION, DNET_PROVENANCE_ACTIONS,
DNET_PROVENANCE_ACTIONS),
TAGGING_TRUST));
TAGGING_TRUST)));
result.getContext().add(con);
});

View File

@ -1,14 +1,16 @@
package eu.dnetlib.dhp.bulktag.community;
public class TaggingConstraint extends SelectionConstraints {
private String tagId;
import java.io.Serializable;
public String getTagId() {
return tagId;
public class TaggingConstraint extends SelectionConstraints implements Serializable {
private String id;
public String getId() {
return id;
}
public void setTagId(String tagId) {
this.tagId = tagId;
public void setId(String id) {
this.id = id;
}
}

View File

@ -1,9 +1,10 @@
package eu.dnetlib.dhp.bulktag.community;
import java.io.Serializable;
import java.util.List;
public class TaggingConstraints {
public class TaggingConstraints implements Serializable {
private List<TaggingConstraint> tags;
public List<TaggingConstraint> getTags() {

View File

@ -1,4 +1,4 @@
sourcePath=/tmp/miriam/12_graph_copy
pathMap=/data/bulktagging/pathMap
baseURL=https://services.openaire.eu/openaire/community/
taggingCriteria={"tags":[{"id":"SM","criteria":[{"constraint":[{"verb":"starts_with_caseinsensitive","field":"title","value":"supplementary material for"},{"verb":"starts_with_caseinsensitive","field":"title","value":"supplementary document for"},{"verb":"starts_with_caseinsensitive","field":"title","value":"figure"},{"verb":"starts_with_caseinsensitive","field":"title","value":"supplementary figure"},{"verb":"starts_with_caseinsensitive","field":"title","value":"supplemental figure"},{"verb":"starts_with_caseinsensitive","field":"title","value":"supplementary table"},{"verb":"starts_with_caseinsensitive","field":"title","value":"table for"}]}]}]}
taggingCriteria={"tags":[{"id":"SM","criteria":[{"constraint":[{"verb":"starts_with_caseinsensitive","field":"title","value":"supplementary material for"}]},{"constraint":[{"verb":"starts_with_caseinsensitive","field":"title","value":"supplementary document for"}]},{"constraint":[{"verb":"starts_with_caseinsensitive","field":"title","value":"figure"}]},{"constraint":[{"verb":"starts_with_caseinsensitive","field":"title","value":"supplementary figure"}]},{"constraint":[{"verb":"starts_with_caseinsensitive","field":"title","value":"supplemental figure"}]},{"constraint":[{"verb":"starts_with_caseinsensitive","field":"title","value":"supplementary table"}]},{"constraint":[{"verb":"starts_with_caseinsensitive","field":"title","value":"table for"}]}]}]}

View File

@ -33,6 +33,7 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.gson.Gson;
import eu.dnetlib.dhp.bulktag.community.ProtoMap;
import eu.dnetlib.dhp.bulktag.community.TaggingConstraints;
import eu.dnetlib.dhp.schema.oaf.*;
public class BulkTagJobTest {
@ -68,7 +69,7 @@ public class BulkTagJobTest {
private static String taggingConf = "";
private static String taggingCriteria = "{\"criteria\":[{\"constraint\":[{\"verb\":\"starts_with_caseinsensitive\",\"field\":\"title\",\"value\":\"supplementary material\"}]}]}";
private static String taggingCriteria = "{\"tags\":[{\"id\":\"SM\",\"criteria\":[{\"constraint\":[{\"verb\":\"starts_with_caseinsensitive\",\"field\":\"title\",\"value\":\"supplementary material for\"}]},{\"constraint\":[{\"verb\":\"starts_with_caseinsensitive\",\"field\":\"title\",\"value\":\"supplementary document for\"}]},{\"constraint\":[{\"verb\":\"starts_with_caseinsensitive\",\"field\":\"title\",\"value\":\"figure\"}]},{\"constraint\":[{\"verb\":\"starts_with_caseinsensitive\",\"field\":\"title\",\"value\":\"supplementary figure\"}]},{\"constraint\":[{\"verb\":\"starts_with_caseinsensitive\",\"field\":\"title\",\"value\":\"supplemental figure\"}]},{\"constraint\":[{\"verb\":\"starts_with_caseinsensitive\",\"field\":\"title\",\"value\":\"supplementary table\"}]},{\"constraint\":[{\"verb\":\"starts_with_caseinsensitive\",\"field\":\"title\",\"value\":\"table for\"}]}]}]}";
static {
try {
@ -2011,6 +2012,10 @@ public class BulkTagJobTest {
"-nameNode", "local"
});
}
System.out.println("prrr");
}
}