[EOSCTag] fixed issue in case description is null. Modified test resources and classes

This commit is contained in:
Miriam Baglioni 2022-05-04 10:06:38 +02:00
parent a21fe310e5
commit 3aeedd931a
6 changed files with 30 additions and 16 deletions

View File

@ -179,12 +179,11 @@ public class SparkEoscTag {
Set<String> words = getWordsSP(r.getTitle()); Set<String> words = getWordsSP(r.getTitle());
words.addAll(getWordsF(r.getDescription())); words.addAll(getWordsF(r.getDescription()));
if (words.contains("galaxy") && if (words.contains("galaxy") &&
(words.contains("workflow") || words.contains("software"))) words.contains("workflow"))
return true; return true;
if (r.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("galaxy")) && if (r.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("galaxy")) &&
(r.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("workflow"))) || r.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("workflow")))
r.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("software")))
return true; return true;
return false; return false;
} }
@ -214,17 +213,30 @@ public class SparkEoscTag {
private static Set<String> getWordsSP(List<StructuredProperty> elem) { private static Set<String> getWordsSP(List<StructuredProperty> elem) {
Set<String> words = new HashSet<>(); Set<String> words = new HashSet<>();
elem Optional
.ofNullable(elem)
.ifPresent(
e -> e
.forEach( .forEach(
t -> words.addAll(Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" ")))); t -> words
.addAll(
Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" ")))));
return words; return words;
} }
private static Set<String> getWordsF(List<Field<String>> elem) { private static Set<String> getWordsF(List<Field<String>> elem) {
Set<String> words = new HashSet<>(); Set<String> words = new HashSet<>();
elem Optional
.ofNullable(elem)
.ifPresent(
e -> e
.forEach( .forEach(
t -> words.addAll(Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" ")))); t -> words
.addAll(
Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" ")))));
// elem
// .forEach(
// t -> words.addAll(Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" "))));
return words; return words;
} }

View File

@ -29,7 +29,7 @@
</configuration> </configuration>
</global> </global>
<start to="reset_outputpath"/> <start to="eosc_tag"/>
<kill name="Kill"> <kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message> <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
@ -226,7 +226,7 @@
<arg>--sourcePath</arg><arg>${outputPath}</arg> <arg>--sourcePath</arg><arg>${outputPath}</arg>
<arg>--workingPath</arg><arg>${workingDir}/eoscTag</arg> <arg>--workingPath</arg><arg>${workingDir}/eoscTag</arg>
</spark> </spark>
<ok to="wait"/> <ok to="End"/>
<error to="Kill"/> <error to="Kill"/>
</action> </action>

View File

@ -322,7 +322,7 @@ public class EOSCTagJobTest {
.textFile(workingDir.toString() + "/input/software") .textFile(workingDir.toString() + "/input/software")
.map(item -> OBJECT_MAPPER.readValue(item, Software.class)); .map(item -> OBJECT_MAPPER.readValue(item, Software.class));
Assertions.assertEquals(10, tmp.count()); Assertions.assertEquals(11, tmp.count());
Assertions Assertions
.assertEquals( .assertEquals(
@ -534,11 +534,11 @@ public class EOSCTagJobTest {
.textFile(workingDir.toString() + "/input/dataset") .textFile(workingDir.toString() + "/input/dataset")
.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class)); .map(item -> OBJECT_MAPPER.readValue(item, Dataset.class));
Assertions.assertEquals(10, dats.count()); Assertions.assertEquals(11, dats.count());
Assertions Assertions
.assertEquals( .assertEquals(
2, 3,
dats dats
.filter(s -> s.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Twitter Data"))) .filter(s -> s.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Twitter Data")))
.count()); .count());