[EOSCTag] fixed issue in case description is null. Modified test resources and classes

2022-05-04 10:06:38 +02:00 · 2022-05-04 10:06:38 +02:00 · 3aeedd931a
parent a21fe310e5
commit 3aeedd931a
6 changed files with 30 additions and 16 deletions
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkEoscTag.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkEoscTag.java
@ -179,12 +179,11 @@ public class SparkEoscTag {
 		Set<String> words = getWordsSP(r.getTitle());
 		words.addAll(getWordsF(r.getDescription()));
 		if (words.contains("galaxy") &&
-			(words.contains("workflow") || words.contains("software")))
+			words.contains("workflow"))
 			return true;

 		if (r.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("galaxy")) &&
-			(r.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("workflow"))) ||
-			r.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("software")))
+			r.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("workflow")))
 			return true;
 		return false;
 	}
@ -214,17 +213,30 @@ public class SparkEoscTag {

 	private static Set<String> getWordsSP(List<StructuredProperty> elem) {
 		Set<String> words = new HashSet<>();
-		elem
-			.forEach(
-				t -> words.addAll(Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" "))));
+		Optional
+			.ofNullable(elem)
+			.ifPresent(
+				e -> e
+					.forEach(
+						t -> words
+							.addAll(
+								Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" ")))));
 		return words;
 	}

 	private static Set<String> getWordsF(List<Field<String>> elem) {
 		Set<String> words = new HashSet<>();
-		elem
-			.forEach(
-				t -> words.addAll(Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" "))));
+		Optional
+			.ofNullable(elem)
+			.ifPresent(
+				e -> e
+					.forEach(
+						t -> words
+							.addAll(
+								Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" ")))));
+//		elem
+//			.forEach(
+//				t -> words.addAll(Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" "))));
 		return words;

 	}
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml
@ -29,7 +29,7 @@
        </configuration>
    </global>

-    <start to="reset_outputpath"/>
+    <start to="eosc_tag"/>

    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
@ -226,7 +226,7 @@
            <arg>--sourcePath</arg><arg>${outputPath}</arg>
            <arg>--workingPath</arg><arg>${workingDir}/eoscTag</arg>
        </spark>
-        <ok to="wait"/>
+        <ok to="End"/>
        <error to="Kill"/>
    </action>

--- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/EOSCTagJobTest.java
+++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/EOSCTagJobTest.java
@ -322,7 +322,7 @@ public class EOSCTagJobTest {
 			.textFile(workingDir.toString() + "/input/software")
 			.map(item -> OBJECT_MAPPER.readValue(item, Software.class));

-		Assertions.assertEquals(10, tmp.count());
+		Assertions.assertEquals(11, tmp.count());

 		Assertions
 			.assertEquals(
@ -534,11 +534,11 @@ public class EOSCTagJobTest {
 			.textFile(workingDir.toString() + "/input/dataset")
 			.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class));

-		Assertions.assertEquals(10, dats.count());
+		Assertions.assertEquals(11, dats.count());

 		Assertions
 			.assertEquals(
-				2,
+				3,
 				dats
 					.filter(s -> s.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Twitter Data")))
 					.count());
--- a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/eosctag/galaxy/dataset/dataset_10.json
+++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/eosctag/galaxy/dataset/dataset_10.json
--- a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/eosctag/galaxy/software/software_10.json
+++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/eosctag/galaxy/software/software_10.json
--- a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/eosctag/twitter/dataset/dataset_10.json
+++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/eosctag/twitter/dataset/dataset_10.json