[GraphAnnotation]Extention of bulktagging to include the easiest graph annotation patterns. Fixed issue and add test
This commit is contained in:
parent
944e780172
commit
ded0c25b44
|
@ -290,6 +290,7 @@ public class SparkBulkTagJob {
|
|||
.parallelStream()
|
||||
.filter(ModelSupport::isResult)
|
||||
.forEach(e -> {
|
||||
|
||||
removeOutputDir(spark, outputPath + e.name());
|
||||
ResultTagger resultTagger = new ResultTagger();
|
||||
Class<R> resultClazz = ModelSupport.entityTypes.get(e);
|
||||
|
|
|
@ -10,6 +10,7 @@ import java.lang.reflect.Method;
|
|||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import com.jayway.jsonpath.Criteria;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
@ -19,13 +20,11 @@ import com.jayway.jsonpath.DocumentContext;
|
|||
import com.jayway.jsonpath.JsonPath;
|
||||
import com.jayway.jsonpath.PathNotFoundException;
|
||||
|
||||
import eu.dnetlib.dhp.bulktag.Tagging;
|
||||
import eu.dnetlib.dhp.bulktag.actions.MapModel;
|
||||
import eu.dnetlib.dhp.bulktag.actions.Parameters;
|
||||
import eu.dnetlib.dhp.bulktag.eosc.EoscIFTag;
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
||||
import scala.Tuple2;
|
||||
|
||||
/** Created by miriam on 02/08/2018. */
|
||||
public class ResultTagger implements Serializable {
|
||||
|
@ -123,9 +122,11 @@ public class ResultTagger implements Serializable {
|
|||
|
||||
//adding code for tagging of results searching supplementaryMaterial
|
||||
final Set<String> tags = new HashSet<>();
|
||||
|
||||
|
||||
taggingConstraints.getTags().forEach(t -> {
|
||||
if (t.getCriteria().stream().anyMatch(crit -> crit.verifyCriteria(param)))
|
||||
tags.add(t.getTagId());
|
||||
tags.add(t.getId());
|
||||
});
|
||||
|
||||
// communities contains all the communities to be not added to the context
|
||||
|
@ -262,7 +263,7 @@ public class ResultTagger implements Serializable {
|
|||
tags.forEach(t -> {
|
||||
Context con = new Context();
|
||||
con.setId(t);
|
||||
List<DataInfo> dataInfoList = Arrays
|
||||
con.setDataInfo(Arrays
|
||||
.asList(
|
||||
OafMapperUtils
|
||||
.dataInfo(
|
||||
|
@ -271,7 +272,7 @@ public class ResultTagger implements Serializable {
|
|||
.qualifier(
|
||||
CLASS_ID_ANNOTATION, CLASS_NAME_ANNOTATION, DNET_PROVENANCE_ACTIONS,
|
||||
DNET_PROVENANCE_ACTIONS),
|
||||
TAGGING_TRUST));
|
||||
TAGGING_TRUST)));
|
||||
result.getContext().add(con);
|
||||
});
|
||||
|
||||
|
|
|
@ -1,14 +1,16 @@
|
|||
|
||||
package eu.dnetlib.dhp.bulktag.community;
|
||||
|
||||
public class TaggingConstraint extends SelectionConstraints {
|
||||
private String tagId;
|
||||
import java.io.Serializable;
|
||||
|
||||
public String getTagId() {
|
||||
return tagId;
|
||||
public class TaggingConstraint extends SelectionConstraints implements Serializable {
|
||||
private String id;
|
||||
|
||||
public String getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public void setTagId(String tagId) {
|
||||
this.tagId = tagId;
|
||||
public void setId(String id) {
|
||||
this.id = id;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,9 +1,10 @@
|
|||
|
||||
package eu.dnetlib.dhp.bulktag.community;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
|
||||
public class TaggingConstraints {
|
||||
public class TaggingConstraints implements Serializable {
|
||||
private List<TaggingConstraint> tags;
|
||||
|
||||
public List<TaggingConstraint> getTags() {
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
sourcePath=/tmp/miriam/12_graph_copy
|
||||
pathMap=/data/bulktagging/pathMap
|
||||
baseURL=https://services.openaire.eu/openaire/community/
|
||||
taggingCriteria={"tags":[{"id":"SM","criteria":[{"constraint":[{"verb":"starts_with_caseinsensitive","field":"title","value":"supplementary material for"},{"verb":"starts_with_caseinsensitive","field":"title","value":"supplementary document for"},{"verb":"starts_with_caseinsensitive","field":"title","value":"figure"},{"verb":"starts_with_caseinsensitive","field":"title","value":"supplementary figure"},{"verb":"starts_with_caseinsensitive","field":"title","value":"supplemental figure"},{"verb":"starts_with_caseinsensitive","field":"title","value":"supplementary table"},{"verb":"starts_with_caseinsensitive","field":"title","value":"table for"}]}]}]}
|
||||
taggingCriteria={"tags":[{"id":"SM","criteria":[{"constraint":[{"verb":"starts_with_caseinsensitive","field":"title","value":"supplementary material for"}]},{"constraint":[{"verb":"starts_with_caseinsensitive","field":"title","value":"supplementary document for"}]},{"constraint":[{"verb":"starts_with_caseinsensitive","field":"title","value":"figure"}]},{"constraint":[{"verb":"starts_with_caseinsensitive","field":"title","value":"supplementary figure"}]},{"constraint":[{"verb":"starts_with_caseinsensitive","field":"title","value":"supplemental figure"}]},{"constraint":[{"verb":"starts_with_caseinsensitive","field":"title","value":"supplementary table"}]},{"constraint":[{"verb":"starts_with_caseinsensitive","field":"title","value":"table for"}]}]}]}
|
|
@ -33,6 +33,7 @@ import com.fasterxml.jackson.databind.ObjectMapper;
|
|||
import com.google.gson.Gson;
|
||||
|
||||
import eu.dnetlib.dhp.bulktag.community.ProtoMap;
|
||||
import eu.dnetlib.dhp.bulktag.community.TaggingConstraints;
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
|
||||
public class BulkTagJobTest {
|
||||
|
@ -68,7 +69,7 @@ public class BulkTagJobTest {
|
|||
|
||||
private static String taggingConf = "";
|
||||
|
||||
private static String taggingCriteria = "{\"criteria\":[{\"constraint\":[{\"verb\":\"starts_with_caseinsensitive\",\"field\":\"title\",\"value\":\"supplementary material\"}]}]}";
|
||||
private static String taggingCriteria = "{\"tags\":[{\"id\":\"SM\",\"criteria\":[{\"constraint\":[{\"verb\":\"starts_with_caseinsensitive\",\"field\":\"title\",\"value\":\"supplementary material for\"}]},{\"constraint\":[{\"verb\":\"starts_with_caseinsensitive\",\"field\":\"title\",\"value\":\"supplementary document for\"}]},{\"constraint\":[{\"verb\":\"starts_with_caseinsensitive\",\"field\":\"title\",\"value\":\"figure\"}]},{\"constraint\":[{\"verb\":\"starts_with_caseinsensitive\",\"field\":\"title\",\"value\":\"supplementary figure\"}]},{\"constraint\":[{\"verb\":\"starts_with_caseinsensitive\",\"field\":\"title\",\"value\":\"supplemental figure\"}]},{\"constraint\":[{\"verb\":\"starts_with_caseinsensitive\",\"field\":\"title\",\"value\":\"supplementary table\"}]},{\"constraint\":[{\"verb\":\"starts_with_caseinsensitive\",\"field\":\"title\",\"value\":\"table for\"}]}]}]}";
|
||||
|
||||
static {
|
||||
try {
|
||||
|
@ -2011,6 +2012,10 @@ public class BulkTagJobTest {
|
|||
"-nameNode", "local"
|
||||
});
|
||||
|
||||
}
|
||||
System.out.println("prrr");
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
|
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue