forked from antonis.lempesis/dnet-hadoop
[EOSC tag] avoid NPEs
This commit is contained in:
parent
3329b6ce6b
commit
0727f0ef48
|
@ -23,6 +23,10 @@ import eu.dnetlib.dhp.schema.oaf.*;
|
|||
public class SparkEoscTag {
|
||||
private static final Logger log = LoggerFactory.getLogger(SparkEoscTag.class);
|
||||
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||
public static final String EOSC_GALAXY_WORKFLOW = "EOSC::Galaxy Workflow";
|
||||
public static final String EOSC_TWITTER_DATA = "EOSC::Twitter Data";
|
||||
public static final String EOSC_JUPYTER_NOTEBOOK = "EOSC::Jupyter Notebook";
|
||||
public static final String COMPLIES_WITH = "compliesWith";
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
String jsonConfiguration = IOUtils
|
||||
|
@ -76,8 +80,8 @@ public class SparkEoscTag {
|
|||
if (!Optional.ofNullable(s.getEoscifguidelines()).isPresent())
|
||||
s.setEoscifguidelines(new ArrayList<>());
|
||||
addEIG(
|
||||
s.getEoscifguidelines(), "EOSC::Jupyter Notebook", "EOSC::Jupyter Notebook", "",
|
||||
"compliesWith");
|
||||
s.getEoscifguidelines(), EOSC_JUPYTER_NOTEBOOK, EOSC_JUPYTER_NOTEBOOK, "",
|
||||
COMPLIES_WITH);
|
||||
|
||||
}
|
||||
if (containsCriteriaGalaxy(s)) {
|
||||
|
@ -85,7 +89,7 @@ public class SparkEoscTag {
|
|||
s.setEoscifguidelines(new ArrayList<>());
|
||||
|
||||
addEIG(
|
||||
s.getEoscifguidelines(), "EOSC::Galaxy Workflow", "EOSC::Galaxy Workflow", "", "compliesWith");
|
||||
s.getEoscifguidelines(), EOSC_GALAXY_WORKFLOW, EOSC_GALAXY_WORKFLOW, "", COMPLIES_WITH);
|
||||
}
|
||||
return s;
|
||||
}, Encoders.bean(Software.class))
|
||||
|
@ -108,11 +112,11 @@ public class SparkEoscTag {
|
|||
|
||||
if (containsCriteriaGalaxy(orp)) {
|
||||
addEIG(
|
||||
orp.getEoscifguidelines(), "EOSC::Galaxy Workflow", "EOSC::Galaxy Workflow", "",
|
||||
"compliesWith");
|
||||
orp.getEoscifguidelines(), EOSC_GALAXY_WORKFLOW, EOSC_GALAXY_WORKFLOW, "",
|
||||
COMPLIES_WITH);
|
||||
}
|
||||
if (containscriteriaTwitter(orp)) {
|
||||
addEIG(orp.getEoscifguidelines(), "EOSC::Twitter Data", "EOSC::Twitter Data", "", "compliesWith");
|
||||
addEIG(orp.getEoscifguidelines(), EOSC_TWITTER_DATA, EOSC_TWITTER_DATA, "", COMPLIES_WITH);
|
||||
}
|
||||
return orp;
|
||||
}, Encoders.bean(OtherResearchProduct.class))
|
||||
|
@ -133,7 +137,7 @@ public class SparkEoscTag {
|
|||
if (!Optional.ofNullable(d.getEoscifguidelines()).isPresent())
|
||||
d.setEoscifguidelines(new ArrayList<>());
|
||||
if (containscriteriaTwitter(d)) {
|
||||
addEIG(d.getEoscifguidelines(), "EOSC::Twitter Data", "EOSC::Twitter Data", "", "compliesWith");
|
||||
addEIG(d.getEoscifguidelines(), EOSC_TWITTER_DATA, EOSC_TWITTER_DATA, "", COMPLIES_WITH);
|
||||
}
|
||||
return d;
|
||||
}, Encoders.bean(Dataset.class))
|
||||
|
@ -163,10 +167,12 @@ public class SparkEoscTag {
|
|||
(words.contains("data") || words.contains("dataset")))
|
||||
return true;
|
||||
|
||||
if (r.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("twitter")) &&
|
||||
r.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("data")))
|
||||
return true;
|
||||
return false;
|
||||
return Optional
|
||||
.ofNullable(r.getSubject())
|
||||
.map(
|
||||
s -> s.stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("twitter")) &&
|
||||
s.stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("data")))
|
||||
.orElse(false);
|
||||
}
|
||||
|
||||
private static boolean containsCriteriaGalaxy(Result r) {
|
||||
|
@ -176,14 +182,16 @@ public class SparkEoscTag {
|
|||
words.contains("workflow"))
|
||||
return true;
|
||||
|
||||
if (r.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("galaxy")) &&
|
||||
r.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("workflow")))
|
||||
return true;
|
||||
return false;
|
||||
return Optional
|
||||
.ofNullable(r.getSubject())
|
||||
.map(
|
||||
s -> s.stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("galaxy")) &&
|
||||
s.stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("workflow")))
|
||||
.orElse(false);
|
||||
}
|
||||
|
||||
private static boolean containsCriteriaNotebook(Software s) {
|
||||
if(!Optional.ofNullable(s.getSubject()).isPresent())
|
||||
if (!Optional.ofNullable(s.getSubject()).isPresent())
|
||||
return false;
|
||||
if (s.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("jupyter")))
|
||||
return true;
|
||||
|
@ -225,6 +233,5 @@ public class SparkEoscTag {
|
|||
Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" ")))));
|
||||
|
||||
return words;
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -6,7 +6,6 @@ import java.nio.file.Files;
|
|||
import java.nio.file.Path;
|
||||
import java.util.List;
|
||||
|
||||
import eu.dnetlib.dhp.bulktag.eosc.SparkEoscTag;
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
|
@ -24,6 +23,7 @@ import org.slf4j.LoggerFactory;
|
|||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.bulktag.eosc.SparkEoscTag;
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
|
||||
public class EOSCTagJobTest {
|
||||
|
|
Loading…
Reference in New Issue