From f7d06dc661f370ac4275188537958cb9e6a0a7c9 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 23 Jan 2024 11:43:08 +0100 Subject: [PATCH] compilation after merging --- .../bipfinder/SparkAtomicActionScoreJob.java | 2 +- .../main/java/eu/dnetlib/dhp/MoveResult.java | 2 +- .../dnetlib/dhp/bulktag/actions/Action.java | 43 +++--- .../bulktag/actions/ExecSubstringAction.java | 65 ++++----- .../dnetlib/dhp/bulktag/actions/MapModel.java | 30 ++-- .../dhp/bulktag/actions/Parameters.java | 29 ++-- .../dhp/bulktag/community/ProtoMap.java | 4 +- .../dhp/bulktag/community/ResultTagger.java | 36 ++--- .../dnetlib/dhp/bulktag/BulkTagJobTest.java | 136 +++++++++++------- 9 files changed, 194 insertions(+), 153 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java index 76c8ec7fa..040c89782 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java @@ -95,7 +95,7 @@ public class SparkAtomicActionScoreJob implements Serializable { return projectScores.map((MapFunction) bipProjectScores -> { Project project = new Project(); - //project.setId(bipProjectScores.getProjectId()); + project.setId(bipProjectScores.getProjectId()); project.setMeasures(bipProjectScores.toMeasures()); return project; }, Encoders.bean(Project.class)) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/MoveResult.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/MoveResult.java index c71ccb439..6731f2332 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/MoveResult.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/MoveResult.java @@ -30,7 +30,7 @@ public class MoveResult implements Serializable { public static void main(String[] args) throws Exception { String jsonConfiguration = IOUtils .toString( - MoveResult.class + MoveResult.class .getResourceAsStream( "/eu/dnetlib/dhp/wf/subworkflows/input_moveresult_parameters.json")); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/actions/Action.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/actions/Action.java index 6c9e7694f..987e7afef 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/actions/Action.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/actions/Action.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.bulktag.actions; import java.io.Serializable; @@ -8,31 +9,31 @@ import java.util.List; * @Date 22/01/24 */ public class Action implements Serializable { - private String clazz; - private String method; - private List params; + private String clazz; + private String method; + private List params; - public String getClazz() { - return clazz; - } + public String getClazz() { + return clazz; + } - public void setClazz(String clazz) { - this.clazz = clazz; - } + public void setClazz(String clazz) { + this.clazz = clazz; + } - public String getMethod() { - return method; - } + public String getMethod() { + return method; + } - public void setMethod(String method) { - this.method = method; - } + public void setMethod(String method) { + this.method = method; + } - public List getParams() { - return params; - } + public List getParams() { + return params; + } - public void setParams(List params) { - this.params = params; - } + public void setParams(List params) { + this.params = params; + } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/actions/ExecSubstringAction.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/actions/ExecSubstringAction.java index fd3091fd0..0ada4ebfb 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/actions/ExecSubstringAction.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/actions/ExecSubstringAction.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.bulktag.actions; import java.io.Serializable; @@ -8,47 +9,47 @@ import java.io.Serializable; */ public class ExecSubstringAction implements Serializable { - private String value; - private String from; - private String to; + private String value; + private String from; + private String to; - public String getValue() { - return value; - } + public String getValue() { + return value; + } - public void setValue(String value) { - this.value = value; - } + public void setValue(String value) { + this.value = value; + } - public String getFrom() { - return from; - } + public String getFrom() { + return from; + } - public void setFrom(String from) { - this.from = from; - } + public void setFrom(String from) { + this.from = from; + } - public String getTo() { - return to; - } + public String getTo() { + return to; + } - public void setTo(String to) { - this.to = to; - } + public void setTo(String to) { + this.to = to; + } - public String execSubstring(){ - int to = Integer.valueOf(this.to); - int from = Integer.valueOf(this.from); + public String execSubstring() { + int to = Integer.valueOf(this.to); + int from = Integer.valueOf(this.from); - if(to < from || from > this.value.length()) - return ""; + if (to < from || from > this.value.length()) + return ""; - if(from < 0) - from = 0; - if (to > this.value.length()) - to = this.value.length(); + if (from < 0) + from = 0; + if (to > this.value.length()) + to = this.value.length(); - return this.value.substring(from, to); + return this.value.substring(from, to); - } + } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/actions/MapModel.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/actions/MapModel.java index ef3eb43cc..6a0d20b57 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/actions/MapModel.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/actions/MapModel.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.bulktag.actions; import java.io.Serializable; @@ -8,23 +9,22 @@ import java.io.Serializable; */ public class MapModel implements Serializable { - private String path; - private Action action; + private String path; + private Action action; + public String getPath() { + return path; + } - public String getPath() { - return path; - } + public void setPath(String path) { + this.path = path; + } - public void setPath(String path) { - this.path = path; - } + public Action getAction() { + return action; + } - public Action getAction() { - return action; - } - - public void setAction(Action action) { - this.action = action; - } + public void setAction(Action action) { + this.action = action; + } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/actions/Parameters.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/actions/Parameters.java index d23634605..973b00b77 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/actions/Parameters.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/actions/Parameters.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.bulktag.actions; import java.io.Serializable; @@ -7,22 +8,22 @@ import java.io.Serializable; * @Date 22/01/24 */ public class Parameters implements Serializable { - private String paramName; - private String paramValue; + private String paramName; + private String paramValue; - public String getParamName() { - return paramName; - } + public String getParamName() { + return paramName; + } - public void setParamName(String paramName) { - this.paramName = paramName; - } + public void setParamName(String paramName) { + this.paramName = paramName; + } - public String getParamValue() { - return paramValue; - } + public String getParamValue() { + return paramValue; + } - public void setParamValue(String paramValue) { - this.paramValue = paramValue; - } + public void setParamValue(String paramValue) { + this.paramValue = paramValue; + } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ProtoMap.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ProtoMap.java index 15b4d10b7..dc75aec37 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ProtoMap.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ProtoMap.java @@ -1,11 +1,11 @@ package eu.dnetlib.dhp.bulktag.community; -import eu.dnetlib.dhp.bulktag.actions.MapModel; - import java.io.Serializable; import java.util.HashMap; +import eu.dnetlib.dhp.bulktag.actions.MapModel; + public class ProtoMap extends HashMap implements Serializable { public ProtoMap() { diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java index 67422c2ad..2ea229e3e 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java @@ -10,9 +10,6 @@ import java.lang.reflect.Method; import java.util.*; import java.util.stream.Collectors; -import com.jayway.jsonpath.PathNotFoundException; -import eu.dnetlib.dhp.bulktag.actions.MapModel; -import eu.dnetlib.dhp.bulktag.actions.Parameters; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -20,7 +17,10 @@ import org.slf4j.LoggerFactory; import com.google.gson.Gson; import com.jayway.jsonpath.DocumentContext; import com.jayway.jsonpath.JsonPath; +import com.jayway.jsonpath.PathNotFoundException; +import eu.dnetlib.dhp.bulktag.actions.MapModel; +import eu.dnetlib.dhp.bulktag.actions.Parameters; import eu.dnetlib.dhp.bulktag.eosc.EoscIFTag; import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; @@ -40,7 +40,8 @@ public class ResultTagger implements Serializable { return (tmp != clist.size()); } - private Map> getParamMap(final Result result, Map params) throws NoSuchMethodException, InvocationTargetException { + private Map> getParamMap(final Result result, Map params) + throws NoSuchMethodException, InvocationTargetException { Map> param = new HashMap<>(); String json = new Gson().toJson(result, Result.class); DocumentContext jsonContext = JsonPath.parse(json); @@ -54,30 +55,34 @@ public class ResultTagger implements Serializable { try { String path = mapModel.getPath(); Object obj = jsonContext.read(path); - List pathValue ; - if(obj instanceof java.lang.String) - pathValue = Arrays.asList((String)obj); + List pathValue; + if (obj instanceof java.lang.String) + pathValue = Arrays.asList((String) obj); else - pathValue = (List)obj; - if(Optional.ofNullable(mapModel.getAction()).isPresent()){ + pathValue = (List) obj; + if (Optional.ofNullable(mapModel.getAction()).isPresent()) { Class c = Class.forName(mapModel.getAction().getClazz()); Object class_instance = c.newInstance(); Method setField = c.getMethod("setValue", String.class); setField.invoke(class_instance, pathValue.get(0)); - for(Parameters p : mapModel.getAction().getParams()){ + for (Parameters p : mapModel.getAction().getParams()) { setField = c.getMethod("set" + p.getParamName(), String.class); setField.invoke(class_instance, p.getParamValue()); } - param.put(key,Arrays.asList((String)c.getMethod(mapModel.getAction().getMethod()).invoke(class_instance))); + param + .put( + key, Arrays + .asList((String) c.getMethod(mapModel.getAction().getMethod()).invoke(class_instance))); } - else{ + else { param.put(key, pathValue); } - } catch (PathNotFoundException | ClassNotFoundException | InstantiationException | IllegalAccessException e) { + } catch (PathNotFoundException | ClassNotFoundException | InstantiationException + | IllegalAccessException e) { param.put(key, new ArrayList<>()); } } @@ -86,9 +91,8 @@ public class ResultTagger implements Serializable { } public R enrichContextCriteria( - final R result, final CommunityConfiguration conf, final Map criteria) throws InvocationTargetException, NoSuchMethodException { - - + final R result, final CommunityConfiguration conf, final Map criteria) + throws InvocationTargetException, NoSuchMethodException { // Verify if the entity is deletedbyinference. In case verify if to clean the context list // from all the zenodo communities diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java index 01be4d0a3..433e046cc 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java @@ -33,25 +33,25 @@ public class BulkTagJobTest { private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); public static final String pathMap = "{\"author\":{\"path\":\"$['author'][*]['fullname']\"}," + - " \"title\":{\"path\":\"$['title'][*]['value']\"}, "+ - " \"orcid\":{\"path\":\"$['author'][*]['pid'][*][?(@['qualifier']['classid']=='orcid')]['value']\"} , " + - " \"orcid_pending\":{\"path\":\"$['author'][*]['pid'][*][?(@['qualifier']['classid']=='orcid_pending')]['value']\"} ,"+ - "\"contributor\" : {\"path\":\"$['contributor'][*]['value']\"},"+ - " \"description\" : {\"path\":\"$['description'][*]['value']\"},"+ - " \"subject\" :{\"path\":\"$['subject'][*]['value']\"}, " + - " \"fos\" : {\"path\":\"$['subject'][?(@['qualifier']['classid']=='FOS')].value\"} , "+ - "\"sdg\" : {\"path\":\"$['subject'][?(@['qualifier']['classid']=='SDG')].value\"}," + - "\"journal\":{\"path\":\"$['journal'].name\"}," + - "\"hostedby\":{\"path\":\"$['instance'][*]['hostedby']['key']\"}," + - "\"collectedfrom\":{\"path\":\"$['instance'][*]['collectedfrom']['key']\"}," + - "\"publisher\":{\"path\":\"$['publisher'].value\"}," + - "\"publicationyear\":{\"path\":\"$['dateofacceptance'].value\", " + - " \"action\":{\"clazz\":\"eu.dnetlib.dhp.bulktag.actions.ExecSubstringAction\"," + - "\"method\":\"execSubstring\","+ - "\"params\":[" + - "{\"paramName\":\"From\", \"paramValue\":0}, " + - "{\"paramName\":\"To\",\"paramValue\":4}]}}}"; - + " \"title\":{\"path\":\"$['title'][*]['value']\"}, " + + " \"orcid\":{\"path\":\"$['author'][*]['pid'][*][?(@['qualifier']['classid']=='orcid')]['value']\"} , " + + " \"orcid_pending\":{\"path\":\"$['author'][*]['pid'][*][?(@['qualifier']['classid']=='orcid_pending')]['value']\"} ," + + + "\"contributor\" : {\"path\":\"$['contributor'][*]['value']\"}," + + " \"description\" : {\"path\":\"$['description'][*]['value']\"}," + + " \"subject\" :{\"path\":\"$['subject'][*]['value']\"}, " + + " \"fos\" : {\"path\":\"$['subject'][?(@['qualifier']['classid']=='FOS')].value\"} , " + + "\"sdg\" : {\"path\":\"$['subject'][?(@['qualifier']['classid']=='SDG')].value\"}," + + "\"journal\":{\"path\":\"$['journal'].name\"}," + + "\"hostedby\":{\"path\":\"$['instance'][*]['hostedby']['key']\"}," + + "\"collectedfrom\":{\"path\":\"$['instance'][*]['collectedfrom']['key']\"}," + + "\"publisher\":{\"path\":\"$['publisher'].value\"}," + + "\"publicationyear\":{\"path\":\"$['dateofacceptance'].value\", " + + " \"action\":{\"clazz\":\"eu.dnetlib.dhp.bulktag.actions.ExecSubstringAction\"," + + "\"method\":\"execSubstring\"," + + "\"params\":[" + + "{\"paramName\":\"From\", \"paramValue\":0}, " + + "{\"paramName\":\"To\",\"paramValue\":4}]}}}"; private static SparkSession spark; @@ -1609,60 +1609,94 @@ public class BulkTagJobTest { Assertions.assertEquals(0, spark.sql(query).count()); } - @Test void pubdateTest() throws Exception { - final String pathMap = BulkTagJobTest.pathMap; SparkBulkTagJob - .main( - new String[] { - "-isSparkSessionManaged", Boolean.FALSE.toString(), - "-sourcePath", - getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/publicationyear/").getPath(), - "-taggingConf", - IOUtils - .toString( - BulkTagJobTest.class - .getResourceAsStream( - "/eu/dnetlib/dhp/bulktag/communityconfiguration/tagging_conf_publicationdate.xml")), - "-outputPath", workingDir.toString() + "/", - "-pathMap", pathMap - }); + .main( + new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-sourcePath", + getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/publicationyear/").getPath(), + "-taggingConf", + IOUtils + .toString( + BulkTagJobTest.class + .getResourceAsStream( + "/eu/dnetlib/dhp/bulktag/communityconfiguration/tagging_conf_publicationdate.xml")), + "-outputPath", workingDir.toString() + "/", + "-pathMap", pathMap + }); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); JavaRDD tmp = sc - .textFile(workingDir.toString() + "/dataset") - .map(item -> OBJECT_MAPPER.readValue(item, Dataset.class)); + .textFile(workingDir.toString() + "/dataset") + .map(item -> OBJECT_MAPPER.readValue(item, Dataset.class)); Assertions.assertEquals(10, tmp.count()); org.apache.spark.sql.Dataset verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(Dataset.class)); + .createDataset(tmp.rdd(), Encoders.bean(Dataset.class)); verificationDataset.createOrReplaceTempView("dataset"); - String query = "select id, MyT.id community, MyD.provenanceaction.classid " - + "from dataset " - + "lateral view explode(context) c as MyT " - + "lateral view explode(MyT.datainfo) d as MyD " - + "where MyD.inferenceprovenance = 'bulktagging'"; + + "from dataset " + + "lateral view explode(context) c as MyT " + + "lateral view explode(MyT.datainfo) d as MyD " + + "where MyD.inferenceprovenance = 'bulktagging'"; org.apache.spark.sql.Dataset queryResult = spark.sql(query); queryResult.show(false); Assertions.assertEquals(5, queryResult.count()); - Assertions.assertEquals(1, queryResult.filter((FilterFunction) r -> r.getAs("id").equals("50|od______3989::02dd5d2c222191b0b9bd4f33c8e96529")).count()); - Assertions.assertEquals(1, queryResult.filter((FilterFunction) r -> r.getAs("id").equals("50|od______3989::2f4f3c820c450bd08dac08d07cc82dcf")).count()); - Assertions.assertEquals(1, queryResult.filter((FilterFunction) r -> r.getAs("id").equals("50|od______3989::7fcbe3a03280663cddebfd3cb9203177")).count()); - Assertions.assertEquals(1, queryResult.filter((FilterFunction) r -> r.getAs("id").equals("50|od______3989::d791339867bec6d3eb2104deeb4e4961")).count()); - Assertions.assertEquals(1, queryResult.filter((FilterFunction) r -> r.getAs("id").equals("50|od______3989::d90d3a1f64ad264b5ebed8a35b280343")).count()); - + Assertions + .assertEquals( + 1, + queryResult + .filter( + (FilterFunction) r -> r + .getAs("id") + .equals("50|od______3989::02dd5d2c222191b0b9bd4f33c8e96529")) + .count()); + Assertions + .assertEquals( + 1, + queryResult + .filter( + (FilterFunction) r -> r + .getAs("id") + .equals("50|od______3989::2f4f3c820c450bd08dac08d07cc82dcf")) + .count()); + Assertions + .assertEquals( + 1, + queryResult + .filter( + (FilterFunction) r -> r + .getAs("id") + .equals("50|od______3989::7fcbe3a03280663cddebfd3cb9203177")) + .count()); + Assertions + .assertEquals( + 1, + queryResult + .filter( + (FilterFunction) r -> r + .getAs("id") + .equals("50|od______3989::d791339867bec6d3eb2104deeb4e4961")) + .count()); + Assertions + .assertEquals( + 1, + queryResult + .filter( + (FilterFunction) r -> r + .getAs("id") + .equals("50|od______3989::d90d3a1f64ad264b5ebed8a35b280343")) + .count()); } - - }