From 9567c13bc37d8c32c0b8b5763206b502c12ea02c Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 24 May 2023 16:20:05 +0200 Subject: [PATCH] refactoring --- .../community/CommunityConfiguration.java | 2 +- .../CommunityConfigurationFactory.java | 2 +- .../dhp/bulktag/community/ResultTagger.java | 26 +++++----- .../dnetlib/dhp/bulktag/BulkTagJobTest.java | 47 ++++++++++--------- 4 files changed, 39 insertions(+), 38 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java index 689628869..e061ccd5e 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java @@ -28,7 +28,7 @@ public class CommunityConfiguration implements Serializable { private Map selectionConstraintsMap = new HashMap<>(); // map eosc datasource -> communityid private Map>> eoscDatasourceMap = new HashMap<>(); - //map communityid -> remove constraints + // map communityid -> remove constraints private Map removeConstraintsMap = new HashMap<>(); public Map>> getEoscDatasourceMap() { diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfigurationFactory.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfigurationFactory.java index 695bbc363..7b9e03ef6 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfigurationFactory.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfigurationFactory.java @@ -109,7 +109,7 @@ public class CommunityConfigurationFactory { return new SelectionConstraints(); } SelectionConstraints selectionConstraints = new Gson() - .fromJson(constsNode.getText(), SelectionConstraints.class); + .fromJson(constsNode.getText(), SelectionConstraints.class); selectionConstraints.setSelection(resolver); log.info("number of selection constraints set " + selectionConstraints.getCriteria().size()); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java index 0be2f59a9..5f62c10f4 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java @@ -83,18 +83,18 @@ public class ResultTagger implements Serializable { final Set removeCommunities = new HashSet<>(); conf - .getRemoveConstraintsMap() - .keySet() - .forEach(communityId -> { - if (conf.getRemoveConstraintsMap().get(communityId).getCriteria() != null && - conf - .getRemoveConstraintsMap() - .get(communityId) - .getCriteria() - .stream() - .anyMatch(crit -> crit.verifyCriteria(param))) - removeCommunities.add(communityId); - }); + .getRemoveConstraintsMap() + .keySet() + .forEach(communityId -> { + if (conf.getRemoveConstraintsMap().get(communityId).getCriteria() != null && + conf + .getRemoveConstraintsMap() + .get(communityId) + .getCriteria() + .stream() + .anyMatch(crit -> crit.verifyCriteria(param))) + removeCommunities.add(communityId); + }); // communities contains all the communities to be added as context for the result final Set communities = new HashSet<>(); @@ -182,7 +182,7 @@ public class ResultTagger implements Serializable { .keySet() .forEach(communityId -> { if (!removeCommunities.contains(communityId) && - conf.getSelectionConstraintsMap().get(communityId).getCriteria() != null && + conf.getSelectionConstraintsMap().get(communityId).getCriteria() != null && conf .getSelectionConstraintsMap() .get(communityId) diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java index aff5ee5fb..a63e62471 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java @@ -40,9 +40,9 @@ public class BulkTagJobTest { + " \"description\" : \"$['description'][*]['value']\", " + " \"subject\" :\"$['subject'][*]['value']\" , " + "\"fos\" : \"$['subject'][?(@['qualifier']['classid']=='FOS')].value\"," + - "\"sdg\" : \"$['subject'][?(@['qualifier']['classid']=='SDG')].value\"," + + "\"sdg\" : \"$['subject'][?(@['qualifier']['classid']=='SDG')].value\"," + "\"hostedby\" : \"$['instance'][*]['hostedby']['key']\" , " + - "\"collectedfrom\" : \"$['instance'][*]['collectedfrom']['key']\"} "; + "\"collectedfrom\" : \"$['instance'][*]['collectedfrom']['key']\"} "; private static SparkSession spark; @@ -1527,43 +1527,44 @@ public class BulkTagJobTest { .count()); } - @Test void removeTest() throws Exception { final String pathMap = BulkTagJobTest.pathMap; SparkBulkTagJob - .main( - new String[]{ - "-isTest", Boolean.TRUE.toString(), - "-isSparkSessionManaged", Boolean.FALSE.toString(), - "-sourcePath", - getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_datasourcewithconstraints").getPath(), - "-taggingConf", taggingConf, - "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset", - "-outputPath", workingDir.toString() + "/dataset", - "-isLookUpUrl", MOCK_IS_LOOK_UP_URL, - "-pathMap", pathMap - }); + .main( + new String[] { + "-isTest", Boolean.TRUE.toString(), + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-sourcePath", + getClass() + .getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_datasourcewithconstraints") + .getPath(), + "-taggingConf", taggingConf, + "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset", + "-outputPath", workingDir.toString() + "/dataset", + "-isLookUpUrl", MOCK_IS_LOOK_UP_URL, + "-pathMap", pathMap + }); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); JavaRDD tmp = sc - .textFile(workingDir.toString() + "/dataset") - .map(item -> OBJECT_MAPPER.readValue(item, Dataset.class)); + .textFile(workingDir.toString() + "/dataset") + .map(item -> OBJECT_MAPPER.readValue(item, Dataset.class)); Assertions.assertEquals(12, tmp.count()); org.apache.spark.sql.Dataset verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(Dataset.class)); + .createDataset(tmp.rdd(), Encoders.bean(Dataset.class)); verificationDataset.createOrReplaceTempView("dataset"); String query = "select id, MyT.id community, MyD.provenanceaction.classid provenance, MyD.provenanceaction.classname name " - + "from dataset " - + "lateral view explode(context) c as MyT " - + "lateral view explode(MyT.datainfo) d as MyD " - + "where MyD.inferenceprovenance = 'bulktagging'"; + + "from dataset " + + "lateral view explode(context) c as MyT " + + "lateral view explode(MyT.datainfo) d as MyD " + + "where MyD.inferenceprovenance = 'bulktagging'"; org.apache.spark.sql.Dataset idExplodeCommunity = spark.sql(query); idExplodeCommunity.show(false); } - } +}