[GraphAnnotation]Extention of bulktagging to include the easiest graph annotation patterns. Fixed issue and add test
This commit is contained in:
parent
944e780172
commit
ded0c25b44
|
@ -123,8 +123,8 @@ public class SparkBulkTagJob {
|
||||||
TaggingConstants.CLASS_NAME_BULKTAG_ORGANIZATION);
|
TaggingConstants.CLASS_NAME_BULKTAG_ORGANIZATION);
|
||||||
execEntityTag(
|
execEntityTag(
|
||||||
spark, inputPath + "project", outputPath + "project",
|
spark, inputPath + "project", outputPath + "project",
|
||||||
Utils.getCommunityProjects(baseURL), Project.class, TaggingConstants.CLASS_ID_PROJECT,
|
Utils.getCommunityProjects(baseURL), Project.class, TaggingConstants.CLASS_ID_PROJECT,
|
||||||
TaggingConstants.CLASS_NAME_BULKTAG_PROJECT);
|
TaggingConstants.CLASS_NAME_BULKTAG_PROJECT);
|
||||||
execDatasourceTag(spark, inputPath, outputPath, Utils.getDatasourceCommunities(baseURL));
|
execDatasourceTag(spark, inputPath, outputPath, Utils.getDatasourceCommunities(baseURL));
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
@ -290,6 +290,7 @@ public class SparkBulkTagJob {
|
||||||
.parallelStream()
|
.parallelStream()
|
||||||
.filter(ModelSupport::isResult)
|
.filter(ModelSupport::isResult)
|
||||||
.forEach(e -> {
|
.forEach(e -> {
|
||||||
|
|
||||||
removeOutputDir(spark, outputPath + e.name());
|
removeOutputDir(spark, outputPath + e.name());
|
||||||
ResultTagger resultTagger = new ResultTagger();
|
ResultTagger resultTagger = new ResultTagger();
|
||||||
Class<R> resultClazz = ModelSupport.entityTypes.get(e);
|
Class<R> resultClazz = ModelSupport.entityTypes.get(e);
|
||||||
|
|
|
@ -10,6 +10,7 @@ import java.lang.reflect.Method;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import com.jayway.jsonpath.Criteria;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
@ -19,13 +20,11 @@ import com.jayway.jsonpath.DocumentContext;
|
||||||
import com.jayway.jsonpath.JsonPath;
|
import com.jayway.jsonpath.JsonPath;
|
||||||
import com.jayway.jsonpath.PathNotFoundException;
|
import com.jayway.jsonpath.PathNotFoundException;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.bulktag.Tagging;
|
|
||||||
import eu.dnetlib.dhp.bulktag.actions.MapModel;
|
import eu.dnetlib.dhp.bulktag.actions.MapModel;
|
||||||
import eu.dnetlib.dhp.bulktag.actions.Parameters;
|
import eu.dnetlib.dhp.bulktag.actions.Parameters;
|
||||||
import eu.dnetlib.dhp.bulktag.eosc.EoscIFTag;
|
import eu.dnetlib.dhp.bulktag.eosc.EoscIFTag;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
||||||
import scala.Tuple2;
|
|
||||||
|
|
||||||
/** Created by miriam on 02/08/2018. */
|
/** Created by miriam on 02/08/2018. */
|
||||||
public class ResultTagger implements Serializable {
|
public class ResultTagger implements Serializable {
|
||||||
|
@ -123,9 +122,11 @@ public class ResultTagger implements Serializable {
|
||||||
|
|
||||||
//adding code for tagging of results searching supplementaryMaterial
|
//adding code for tagging of results searching supplementaryMaterial
|
||||||
final Set<String> tags = new HashSet<>();
|
final Set<String> tags = new HashSet<>();
|
||||||
|
|
||||||
|
|
||||||
taggingConstraints.getTags().forEach(t -> {
|
taggingConstraints.getTags().forEach(t -> {
|
||||||
if (t.getCriteria().stream().anyMatch(crit -> crit.verifyCriteria(param)))
|
if (t.getCriteria().stream().anyMatch(crit -> crit.verifyCriteria(param)))
|
||||||
tags.add(t.getTagId());
|
tags.add(t.getId());
|
||||||
});
|
});
|
||||||
|
|
||||||
// communities contains all the communities to be not added to the context
|
// communities contains all the communities to be not added to the context
|
||||||
|
@ -262,7 +263,7 @@ public class ResultTagger implements Serializable {
|
||||||
tags.forEach(t -> {
|
tags.forEach(t -> {
|
||||||
Context con = new Context();
|
Context con = new Context();
|
||||||
con.setId(t);
|
con.setId(t);
|
||||||
List<DataInfo> dataInfoList = Arrays
|
con.setDataInfo(Arrays
|
||||||
.asList(
|
.asList(
|
||||||
OafMapperUtils
|
OafMapperUtils
|
||||||
.dataInfo(
|
.dataInfo(
|
||||||
|
@ -271,7 +272,7 @@ public class ResultTagger implements Serializable {
|
||||||
.qualifier(
|
.qualifier(
|
||||||
CLASS_ID_ANNOTATION, CLASS_NAME_ANNOTATION, DNET_PROVENANCE_ACTIONS,
|
CLASS_ID_ANNOTATION, CLASS_NAME_ANNOTATION, DNET_PROVENANCE_ACTIONS,
|
||||||
DNET_PROVENANCE_ACTIONS),
|
DNET_PROVENANCE_ACTIONS),
|
||||||
TAGGING_TRUST));
|
TAGGING_TRUST)));
|
||||||
result.getContext().add(con);
|
result.getContext().add(con);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
|
@ -1,14 +1,16 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.bulktag.community;
|
package eu.dnetlib.dhp.bulktag.community;
|
||||||
|
|
||||||
public class TaggingConstraint extends SelectionConstraints {
|
import java.io.Serializable;
|
||||||
private String tagId;
|
|
||||||
|
|
||||||
public String getTagId() {
|
public class TaggingConstraint extends SelectionConstraints implements Serializable {
|
||||||
return tagId;
|
private String id;
|
||||||
|
|
||||||
|
public String getId() {
|
||||||
|
return id;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setTagId(String tagId) {
|
public void setId(String id) {
|
||||||
this.tagId = tagId;
|
this.id = id;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,9 +1,10 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.bulktag.community;
|
package eu.dnetlib.dhp.bulktag.community;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
public class TaggingConstraints {
|
public class TaggingConstraints implements Serializable {
|
||||||
private List<TaggingConstraint> tags;
|
private List<TaggingConstraint> tags;
|
||||||
|
|
||||||
public List<TaggingConstraint> getTags() {
|
public List<TaggingConstraint> getTags() {
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
sourcePath=/tmp/miriam/12_graph_copy
|
sourcePath=/tmp/miriam/12_graph_copy
|
||||||
pathMap=/data/bulktagging/pathMap
|
pathMap=/data/bulktagging/pathMap
|
||||||
baseURL=https://services.openaire.eu/openaire/community/
|
baseURL=https://services.openaire.eu/openaire/community/
|
||||||
taggingCriteria={"tags":[{"id":"SM","criteria":[{"constraint":[{"verb":"starts_with_caseinsensitive","field":"title","value":"supplementary material for"},{"verb":"starts_with_caseinsensitive","field":"title","value":"supplementary document for"},{"verb":"starts_with_caseinsensitive","field":"title","value":"figure"},{"verb":"starts_with_caseinsensitive","field":"title","value":"supplementary figure"},{"verb":"starts_with_caseinsensitive","field":"title","value":"supplemental figure"},{"verb":"starts_with_caseinsensitive","field":"title","value":"supplementary table"},{"verb":"starts_with_caseinsensitive","field":"title","value":"table for"}]}]}]}
|
taggingCriteria={"tags":[{"id":"SM","criteria":[{"constraint":[{"verb":"starts_with_caseinsensitive","field":"title","value":"supplementary material for"}]},{"constraint":[{"verb":"starts_with_caseinsensitive","field":"title","value":"supplementary document for"}]},{"constraint":[{"verb":"starts_with_caseinsensitive","field":"title","value":"figure"}]},{"constraint":[{"verb":"starts_with_caseinsensitive","field":"title","value":"supplementary figure"}]},{"constraint":[{"verb":"starts_with_caseinsensitive","field":"title","value":"supplemental figure"}]},{"constraint":[{"verb":"starts_with_caseinsensitive","field":"title","value":"supplementary table"}]},{"constraint":[{"verb":"starts_with_caseinsensitive","field":"title","value":"table for"}]}]}]}
|
|
@ -33,6 +33,7 @@ import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
import com.google.gson.Gson;
|
import com.google.gson.Gson;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.bulktag.community.ProtoMap;
|
import eu.dnetlib.dhp.bulktag.community.ProtoMap;
|
||||||
|
import eu.dnetlib.dhp.bulktag.community.TaggingConstraints;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
|
||||||
public class BulkTagJobTest {
|
public class BulkTagJobTest {
|
||||||
|
@ -68,7 +69,7 @@ public class BulkTagJobTest {
|
||||||
|
|
||||||
private static String taggingConf = "";
|
private static String taggingConf = "";
|
||||||
|
|
||||||
private static String taggingCriteria = "{\"criteria\":[{\"constraint\":[{\"verb\":\"starts_with_caseinsensitive\",\"field\":\"title\",\"value\":\"supplementary material\"}]}]}";
|
private static String taggingCriteria = "{\"tags\":[{\"id\":\"SM\",\"criteria\":[{\"constraint\":[{\"verb\":\"starts_with_caseinsensitive\",\"field\":\"title\",\"value\":\"supplementary material for\"}]},{\"constraint\":[{\"verb\":\"starts_with_caseinsensitive\",\"field\":\"title\",\"value\":\"supplementary document for\"}]},{\"constraint\":[{\"verb\":\"starts_with_caseinsensitive\",\"field\":\"title\",\"value\":\"figure\"}]},{\"constraint\":[{\"verb\":\"starts_with_caseinsensitive\",\"field\":\"title\",\"value\":\"supplementary figure\"}]},{\"constraint\":[{\"verb\":\"starts_with_caseinsensitive\",\"field\":\"title\",\"value\":\"supplemental figure\"}]},{\"constraint\":[{\"verb\":\"starts_with_caseinsensitive\",\"field\":\"title\",\"value\":\"supplementary table\"}]},{\"constraint\":[{\"verb\":\"starts_with_caseinsensitive\",\"field\":\"title\",\"value\":\"table for\"}]}]}]}";
|
||||||
|
|
||||||
static {
|
static {
|
||||||
try {
|
try {
|
||||||
|
@ -2011,6 +2012,10 @@ public class BulkTagJobTest {
|
||||||
"-nameNode", "local"
|
"-nameNode", "local"
|
||||||
});
|
});
|
||||||
|
|
||||||
|
System.out.println("prrr");
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue