[EOSCTag] fixed issue in case description is null. Modified test resources and classes

This commit is contained in:
Miriam Baglioni 2022-05-04 10:06:38 +02:00
parent a21fe310e5
commit 3aeedd931a
6 changed files with 30 additions and 16 deletions

View File

@ -179,12 +179,11 @@ public class SparkEoscTag {
Set<String> words = getWordsSP(r.getTitle());
words.addAll(getWordsF(r.getDescription()));
if (words.contains("galaxy") &&
(words.contains("workflow") || words.contains("software")))
words.contains("workflow"))
return true;
if (r.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("galaxy")) &&
(r.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("workflow"))) ||
r.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("software")))
r.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("workflow")))
return true;
return false;
}
@ -214,17 +213,30 @@ public class SparkEoscTag {
private static Set<String> getWordsSP(List<StructuredProperty> elem) {
Set<String> words = new HashSet<>();
elem
.forEach(
t -> words.addAll(Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" "))));
Optional
.ofNullable(elem)
.ifPresent(
e -> e
.forEach(
t -> words
.addAll(
Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" ")))));
return words;
}
private static Set<String> getWordsF(List<Field<String>> elem) {
Set<String> words = new HashSet<>();
elem
.forEach(
t -> words.addAll(Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" "))));
Optional
.ofNullable(elem)
.ifPresent(
e -> e
.forEach(
t -> words
.addAll(
Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" ")))));
// elem
// .forEach(
// t -> words.addAll(Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" "))));
return words;
}

View File

@ -29,7 +29,7 @@
</configuration>
</global>
<start to="reset_outputpath"/>
<start to="eosc_tag"/>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
@ -226,7 +226,7 @@
<arg>--sourcePath</arg><arg>${outputPath}</arg>
<arg>--workingPath</arg><arg>${workingDir}/eoscTag</arg>
</spark>
<ok to="wait"/>
<ok to="End"/>
<error to="Kill"/>
</action>

View File

@ -322,7 +322,7 @@ public class EOSCTagJobTest {
.textFile(workingDir.toString() + "/input/software")
.map(item -> OBJECT_MAPPER.readValue(item, Software.class));
Assertions.assertEquals(10, tmp.count());
Assertions.assertEquals(11, tmp.count());
Assertions
.assertEquals(
@ -534,11 +534,11 @@ public class EOSCTagJobTest {
.textFile(workingDir.toString() + "/input/dataset")
.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class));
Assertions.assertEquals(10, dats.count());
Assertions.assertEquals(11, dats.count());
Assertions
.assertEquals(
2,
3,
dats
.filter(s -> s.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Twitter Data")))
.count());