forked from antonis.lempesis/dnet-hadoop
[EOSCTag] fixed issue in case description is null. Modified test resources and classes
This commit is contained in:
parent
a21fe310e5
commit
3aeedd931a
|
@ -179,12 +179,11 @@ public class SparkEoscTag {
|
||||||
Set<String> words = getWordsSP(r.getTitle());
|
Set<String> words = getWordsSP(r.getTitle());
|
||||||
words.addAll(getWordsF(r.getDescription()));
|
words.addAll(getWordsF(r.getDescription()));
|
||||||
if (words.contains("galaxy") &&
|
if (words.contains("galaxy") &&
|
||||||
(words.contains("workflow") || words.contains("software")))
|
words.contains("workflow"))
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
if (r.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("galaxy")) &&
|
if (r.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("galaxy")) &&
|
||||||
(r.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("workflow"))) ||
|
r.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("workflow")))
|
||||||
r.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("software")))
|
|
||||||
return true;
|
return true;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -214,17 +213,30 @@ public class SparkEoscTag {
|
||||||
|
|
||||||
private static Set<String> getWordsSP(List<StructuredProperty> elem) {
|
private static Set<String> getWordsSP(List<StructuredProperty> elem) {
|
||||||
Set<String> words = new HashSet<>();
|
Set<String> words = new HashSet<>();
|
||||||
elem
|
Optional
|
||||||
.forEach(
|
.ofNullable(elem)
|
||||||
t -> words.addAll(Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" "))));
|
.ifPresent(
|
||||||
|
e -> e
|
||||||
|
.forEach(
|
||||||
|
t -> words
|
||||||
|
.addAll(
|
||||||
|
Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" ")))));
|
||||||
return words;
|
return words;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Set<String> getWordsF(List<Field<String>> elem) {
|
private static Set<String> getWordsF(List<Field<String>> elem) {
|
||||||
Set<String> words = new HashSet<>();
|
Set<String> words = new HashSet<>();
|
||||||
elem
|
Optional
|
||||||
.forEach(
|
.ofNullable(elem)
|
||||||
t -> words.addAll(Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" "))));
|
.ifPresent(
|
||||||
|
e -> e
|
||||||
|
.forEach(
|
||||||
|
t -> words
|
||||||
|
.addAll(
|
||||||
|
Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" ")))));
|
||||||
|
// elem
|
||||||
|
// .forEach(
|
||||||
|
// t -> words.addAll(Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" "))));
|
||||||
return words;
|
return words;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -29,7 +29,7 @@
|
||||||
</configuration>
|
</configuration>
|
||||||
</global>
|
</global>
|
||||||
|
|
||||||
<start to="reset_outputpath"/>
|
<start to="eosc_tag"/>
|
||||||
|
|
||||||
<kill name="Kill">
|
<kill name="Kill">
|
||||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||||
|
@ -226,7 +226,7 @@
|
||||||
<arg>--sourcePath</arg><arg>${outputPath}</arg>
|
<arg>--sourcePath</arg><arg>${outputPath}</arg>
|
||||||
<arg>--workingPath</arg><arg>${workingDir}/eoscTag</arg>
|
<arg>--workingPath</arg><arg>${workingDir}/eoscTag</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="wait"/>
|
<ok to="End"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
|
|
|
@ -322,7 +322,7 @@ public class EOSCTagJobTest {
|
||||||
.textFile(workingDir.toString() + "/input/software")
|
.textFile(workingDir.toString() + "/input/software")
|
||||||
.map(item -> OBJECT_MAPPER.readValue(item, Software.class));
|
.map(item -> OBJECT_MAPPER.readValue(item, Software.class));
|
||||||
|
|
||||||
Assertions.assertEquals(10, tmp.count());
|
Assertions.assertEquals(11, tmp.count());
|
||||||
|
|
||||||
Assertions
|
Assertions
|
||||||
.assertEquals(
|
.assertEquals(
|
||||||
|
@ -534,11 +534,11 @@ public class EOSCTagJobTest {
|
||||||
.textFile(workingDir.toString() + "/input/dataset")
|
.textFile(workingDir.toString() + "/input/dataset")
|
||||||
.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class));
|
.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class));
|
||||||
|
|
||||||
Assertions.assertEquals(10, dats.count());
|
Assertions.assertEquals(11, dats.count());
|
||||||
|
|
||||||
Assertions
|
Assertions
|
||||||
.assertEquals(
|
.assertEquals(
|
||||||
2,
|
3,
|
||||||
dats
|
dats
|
||||||
.filter(s -> s.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Twitter Data")))
|
.filter(s -> s.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Twitter Data")))
|
||||||
.count());
|
.count());
|
||||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue