forked from D-Net/dnet-hadoop
[EOSCTag] fixed issue in case description is null. Modified test resources and classes
This commit is contained in:
parent
a21fe310e5
commit
3aeedd931a
|
@ -179,12 +179,11 @@ public class SparkEoscTag {
|
|||
Set<String> words = getWordsSP(r.getTitle());
|
||||
words.addAll(getWordsF(r.getDescription()));
|
||||
if (words.contains("galaxy") &&
|
||||
(words.contains("workflow") || words.contains("software")))
|
||||
words.contains("workflow"))
|
||||
return true;
|
||||
|
||||
if (r.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("galaxy")) &&
|
||||
(r.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("workflow"))) ||
|
||||
r.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("software")))
|
||||
r.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("workflow")))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
@ -214,17 +213,30 @@ public class SparkEoscTag {
|
|||
|
||||
private static Set<String> getWordsSP(List<StructuredProperty> elem) {
|
||||
Set<String> words = new HashSet<>();
|
||||
elem
|
||||
Optional
|
||||
.ofNullable(elem)
|
||||
.ifPresent(
|
||||
e -> e
|
||||
.forEach(
|
||||
t -> words.addAll(Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" "))));
|
||||
t -> words
|
||||
.addAll(
|
||||
Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" ")))));
|
||||
return words;
|
||||
}
|
||||
|
||||
private static Set<String> getWordsF(List<Field<String>> elem) {
|
||||
Set<String> words = new HashSet<>();
|
||||
elem
|
||||
Optional
|
||||
.ofNullable(elem)
|
||||
.ifPresent(
|
||||
e -> e
|
||||
.forEach(
|
||||
t -> words.addAll(Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" "))));
|
||||
t -> words
|
||||
.addAll(
|
||||
Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" ")))));
|
||||
// elem
|
||||
// .forEach(
|
||||
// t -> words.addAll(Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" "))));
|
||||
return words;
|
||||
|
||||
}
|
||||
|
|
|
@ -29,7 +29,7 @@
|
|||
</configuration>
|
||||
</global>
|
||||
|
||||
<start to="reset_outputpath"/>
|
||||
<start to="eosc_tag"/>
|
||||
|
||||
<kill name="Kill">
|
||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||
|
@ -226,7 +226,7 @@
|
|||
<arg>--sourcePath</arg><arg>${outputPath}</arg>
|
||||
<arg>--workingPath</arg><arg>${workingDir}/eoscTag</arg>
|
||||
</spark>
|
||||
<ok to="wait"/>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
|
|
|
@ -322,7 +322,7 @@ public class EOSCTagJobTest {
|
|||
.textFile(workingDir.toString() + "/input/software")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, Software.class));
|
||||
|
||||
Assertions.assertEquals(10, tmp.count());
|
||||
Assertions.assertEquals(11, tmp.count());
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
|
@ -534,11 +534,11 @@ public class EOSCTagJobTest {
|
|||
.textFile(workingDir.toString() + "/input/dataset")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class));
|
||||
|
||||
Assertions.assertEquals(10, dats.count());
|
||||
Assertions.assertEquals(11, dats.count());
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
2,
|
||||
3,
|
||||
dats
|
||||
.filter(s -> s.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Twitter Data")))
|
||||
.count());
|
||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue