From 10579c0dd0c037476ae938b2bdec4984fc23617c Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 22 Dec 2021 23:10:16 +0100 Subject: [PATCH] [FOS]fixed doi value in test --- .../PrepareFOSSparkJob.java | 40 +-- .../createunresolvedentities/ProduceTest.java | 260 +++++++++--------- 2 files changed, 146 insertions(+), 154 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java index d4a02c2ff..c8f472db7 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java @@ -65,35 +65,35 @@ public class PrepareFOSSparkJob implements Serializable { private static void distributeFOSdois(SparkSession spark, String sourcePath, String outputPath) { Dataset fosDataset = readPath(spark, sourcePath, FOSDataModel.class); - fosDataset.groupByKey((MapFunction)v->v.getDoi(), Encoders.STRING()) - .mapGroups((MapGroupsFunction)(k,it)->{ - Result r = new Result(); - FOSDataModel first = it.next(); - r.setId(DHPUtils.generateUnresolvedIdentifier(first.getDoi(), DOI)); - HashSet level1 = new HashSet<>(); - HashSet level2 = new HashSet<>(); - HashSet level3 = new HashSet<>(); - addLevels(level1, level2, level3, first); - it.forEachRemaining(v -> addLevels(level1, level2, level3, v)); - Listsbjs = new ArrayList<>(); - level1.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME))); - level2.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME))); - level3.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME))); - r.setSubject(sbjs); - return r; - }, Encoders.bean(Result.class)) + fosDataset + .groupByKey((MapFunction) v -> v.getDoi(), Encoders.STRING()) + .mapGroups((MapGroupsFunction) (k, it) -> { + Result r = new Result(); + FOSDataModel first = it.next(); + r.setId(DHPUtils.generateUnresolvedIdentifier(first.getDoi(), DOI)); + HashSet level1 = new HashSet<>(); + HashSet level2 = new HashSet<>(); + HashSet level3 = new HashSet<>(); + addLevels(level1, level2, level3, first); + it.forEachRemaining(v -> addLevels(level1, level2, level3, v)); + List sbjs = new ArrayList<>(); + level1.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME))); + level2.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME))); + level3.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME))); + r.setSubject(sbjs); + return r; + }, Encoders.bean(Result.class)) .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") .json(outputPath + "/fos"); } - private static void addLevels(HashSet level1, HashSet level2, HashSet level3, FOSDataModel first) { + private static void addLevels(HashSet level1, HashSet level2, HashSet level3, + FOSDataModel first) { level1.add(first.getLevel1()); level2.add(first.getLevel2()); level3.add(first.getLevel3()); } - - } diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java index b1ffeee17..32fb25640 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java @@ -67,29 +67,29 @@ public class ProduceTest { } @Test - void produceTestSubjects()throws Exception{ + void produceTestSubjects() throws Exception { JavaRDD tmp = getResultJavaRDD(); List sbjs = tmp - .filter(row -> row.getSubject()!= null && row.getSubject().size()>0) - .flatMap(row -> row.getSubject().iterator()) - .collect(); + .filter(row -> row.getSubject() != null && row.getSubject().size() > 0) + .flatMap(row -> row.getSubject().iterator()) + .collect(); sbjs.forEach(sbj -> Assertions.assertEquals("FOS", sbj.getQualifier().getClassid())); sbjs - .forEach( - sbj -> Assertions - .assertEquals( - "Fields of Science and Technology classification", sbj.getQualifier().getClassname())); + .forEach( + sbj -> Assertions + .assertEquals( + "Fields of Science and Technology classification", sbj.getQualifier().getClassname())); sbjs - .forEach( - sbj -> Assertions - .assertEquals(ModelConstants.DNET_SUBJECT_TYPOLOGIES, sbj.getQualifier().getSchemeid())); + .forEach( + sbj -> Assertions + .assertEquals(ModelConstants.DNET_SUBJECT_TYPOLOGIES, sbj.getQualifier().getSchemeid())); sbjs - .forEach( - sbj -> Assertions - .assertEquals(ModelConstants.DNET_SUBJECT_TYPOLOGIES, sbj.getQualifier().getSchemename())); + .forEach( + sbj -> Assertions + .assertEquals(ModelConstants.DNET_SUBJECT_TYPOLOGIES, sbj.getQualifier().getSchemename())); sbjs.forEach(sbj -> Assertions.assertEquals(false, sbj.getDataInfo().getDeletedbyinference())); sbjs.forEach(sbj -> Assertions.assertEquals(true, sbj.getDataInfo().getInferred())); @@ -97,38 +97,36 @@ public class ProduceTest { sbjs.forEach(sbj -> Assertions.assertEquals("", sbj.getDataInfo().getTrust())); sbjs.forEach(sbj -> Assertions.assertEquals("update", sbj.getDataInfo().getInferenceprovenance())); sbjs - .forEach( - sbj -> Assertions.assertEquals("subject:fos", sbj.getDataInfo().getProvenanceaction().getClassid())); + .forEach( + sbj -> Assertions.assertEquals("subject:fos", sbj.getDataInfo().getProvenanceaction().getClassid())); sbjs - .forEach( - sbj -> Assertions - .assertEquals("Inferred by OpenAIRE", sbj.getDataInfo().getProvenanceaction().getClassname())); + .forEach( + sbj -> Assertions + .assertEquals("Inferred by OpenAIRE", sbj.getDataInfo().getProvenanceaction().getClassname())); sbjs - .forEach( - sbj -> Assertions - .assertEquals( - ModelConstants.DNET_PROVENANCE_ACTIONS, sbj.getDataInfo().getProvenanceaction().getSchemeid())); + .forEach( + sbj -> Assertions + .assertEquals( + ModelConstants.DNET_PROVENANCE_ACTIONS, sbj.getDataInfo().getProvenanceaction().getSchemeid())); sbjs - .forEach( - sbj -> Assertions - .assertEquals( - ModelConstants.DNET_PROVENANCE_ACTIONS, - sbj.getDataInfo().getProvenanceaction().getSchemename())); + .forEach( + sbj -> Assertions + .assertEquals( + ModelConstants.DNET_PROVENANCE_ACTIONS, + sbj.getDataInfo().getProvenanceaction().getSchemename())); } @Test - void produceTestMeasuress()throws Exception{ + void produceTestMeasuress() throws Exception { JavaRDD tmp = getResultJavaRDD(); List mes = tmp - .filter(row -> row.getInstance()!= null && row.getInstance().size()>0) - .flatMap(row -> row.getInstance().iterator()) - .flatMap(i->i.getMeasures().iterator()) - .flatMap(m ->m.getUnit().iterator()) - .collect(); - - + .filter(row -> row.getInstance() != null && row.getInstance().size() > 0) + .flatMap(row -> row.getInstance().iterator()) + .flatMap(i -> i.getMeasures().iterator()) + .flatMap(m -> m.getUnit().iterator()) + .collect(); mes.forEach(sbj -> Assertions.assertEquals(false, sbj.getDataInfo().getDeletedbyinference())); mes.forEach(sbj -> Assertions.assertEquals(true, sbj.getDataInfo().getInferred())); @@ -136,119 +134,118 @@ public class ProduceTest { mes.forEach(sbj -> Assertions.assertEquals("", sbj.getDataInfo().getTrust())); mes.forEach(sbj -> Assertions.assertEquals("update", sbj.getDataInfo().getInferenceprovenance())); mes - .forEach( - sbj -> Assertions.assertEquals("measure:bip", sbj.getDataInfo().getProvenanceaction().getClassid())); + .forEach( + sbj -> Assertions.assertEquals("measure:bip", sbj.getDataInfo().getProvenanceaction().getClassid())); mes - .forEach( - sbj -> Assertions - .assertEquals("Inferred by OpenAIRE", sbj.getDataInfo().getProvenanceaction().getClassname())); + .forEach( + sbj -> Assertions + .assertEquals("Inferred by OpenAIRE", sbj.getDataInfo().getProvenanceaction().getClassname())); mes - .forEach( - sbj -> Assertions - .assertEquals( - ModelConstants.DNET_PROVENANCE_ACTIONS, sbj.getDataInfo().getProvenanceaction().getSchemeid())); + .forEach( + sbj -> Assertions + .assertEquals( + ModelConstants.DNET_PROVENANCE_ACTIONS, sbj.getDataInfo().getProvenanceaction().getSchemeid())); mes - .forEach( - sbj -> Assertions - .assertEquals( - ModelConstants.DNET_PROVENANCE_ACTIONS, - sbj.getDataInfo().getProvenanceaction().getSchemename())); + .forEach( + sbj -> Assertions + .assertEquals( + ModelConstants.DNET_PROVENANCE_ACTIONS, + sbj.getDataInfo().getProvenanceaction().getSchemename())); } + @Test - void produceTest6Subjects() throws Exception{ + void produceTest6Subjects() throws Exception { final String doi = "unresolved::10.3390/s18072310::doi"; JavaRDD tmp = getResultJavaRDD(); Assertions - .assertEquals( - 6, tmp - .filter(row -> row.getId().equals(doi)) - .collect() - .get(0) - .getSubject() - .size()); + .assertEquals( + 6, tmp + .filter(row -> row.getId().equals(doi)) + .collect() + .get(0) + .getSubject() + .size()); List sbjs = tmp - .filter(row -> row.getId().equals(doi)) - .flatMap(row -> row.getSubject().iterator()) - .collect(); - + .filter(row -> row.getId().equals(doi)) + .flatMap(row -> row.getSubject().iterator()) + .collect(); Assertions - .assertEquals( - true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("04 agricultural and veterinary sciences"))); + .assertEquals( + true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("04 agricultural and veterinary sciences"))); Assertions - .assertEquals( - true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("0404 agricultural biotechnology"))); + .assertEquals( + true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("0404 agricultural biotechnology"))); Assertions.assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("040502 food science"))); Assertions - .assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("03 medical and health sciences"))); + .assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("03 medical and health sciences"))); Assertions.assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("0303 health sciences"))); Assertions - .assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("030309 nutrition & dietetics"))); - + .assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("030309 nutrition & dietetics"))); } @Test - void produceTest3Measures()throws Exception{ + void produceTest3Measures() throws Exception { final String doi = "unresolved::10.3390/s18072310::doi"; JavaRDD tmp = getResultJavaRDD(); Assertions - .assertEquals( - 3, tmp - .filter(row -> row.getId().equals(doi)) - .collect() - .get(0) - .getInstance() - .get(0) - .getMeasures() - .size()); - + .assertEquals( + 3, tmp + .filter(row -> row.getId().equals(doi)) + .collect() + .get(0) + .getInstance() + .get(0) + .getMeasures() + .size()); List measures = tmp - .filter(row -> row.getId().equals(doi)) - .flatMap(row -> row.getInstance().iterator()) - .flatMap(inst -> inst.getMeasures().iterator()) - .collect(); + .filter(row -> row.getId().equals(doi)) + .flatMap(row -> row.getInstance().iterator()) + .flatMap(inst -> inst.getMeasures().iterator()) + .collect(); Assertions - .assertEquals( - "7.5597134689e-09", measures - .stream() - .filter(mes -> mes.getId().equals("influence")) - .collect(Collectors.toList()) - .get(0) - .getUnit() - .get(0) - .getValue()); + .assertEquals( + "7.5597134689e-09", measures + .stream() + .filter(mes -> mes.getId().equals("influence")) + .collect(Collectors.toList()) + .get(0) + .getUnit() + .get(0) + .getValue()); Assertions - .assertEquals( - "4.903880192", measures - .stream() - .filter(mes -> mes.getId().equals("popularity_alt")) - .collect(Collectors.toList()) - .get(0) - .getUnit() - .get(0) - .getValue()); + .assertEquals( + "4.903880192", measures + .stream() + .filter(mes -> mes.getId().equals("popularity_alt")) + .collect(Collectors.toList()) + .get(0) + .getUnit() + .get(0) + .getValue()); Assertions - .assertEquals( - "1.17977512835e-08", measures - .stream() - .filter(mes -> mes.getId().equals("popularity")) - .collect(Collectors.toList()) - .get(0) - .getUnit() - .get(0) - .getValue()); + .assertEquals( + "1.17977512835e-08", measures + .stream() + .filter(mes -> mes.getId().equals("popularity")) + .collect(Collectors.toList()) + .get(0) + .getUnit() + .get(0) + .getValue()); } + @Test void produceTestSomeNumbers() throws Exception { @@ -316,44 +313,39 @@ public class ProduceTest { .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); } - @Test - void prepareTest5Subjects()throws Exception{ - final String doi = "unresolved::10.3390/s18072310::doi"; + void prepareTest5Subjects() throws Exception { + final String doi = "unresolved::10.1063/5.0032658::doi"; + JavaRDD tmp = getResultJavaRDD(); Assertions.assertEquals(1, tmp.filter(row -> row.getId().equals(doi)).count()); Assertions - .assertEquals( - 5, tmp - .filter(row -> row.getId().equals(doi)) - .collect() - .get(0) - .getSubject() - .size()); - - - + .assertEquals( + 5, tmp + .filter(row -> row.getId().equals(doi)) + .collect() + .get(0) + .getSubject() + .size()); List sbjs = tmp - .filter(row -> row.getId().equals(doi)) - .flatMap(row -> row.getSubject().iterator()) - .collect(); - - + .filter(row -> row.getId().equals(doi)) + .flatMap(row -> row.getSubject().iterator()) + .collect(); Assertions - .assertEquals( - true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("01 natural sciences"))); + .assertEquals( + true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("01 natural sciences"))); Assertions.assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("0103 physical sciences"))); Assertions - .assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("010304 chemical physics"))); + .assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("010304 chemical physics"))); Assertions.assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("0104 chemical sciences"))); Assertions - .assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("010402 general chemistry"))); + .assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("010402 general chemistry"))); } - + }