forked from antonis.lempesis/dnet-hadoop
[EOSC tag] avoid NPEs
This commit is contained in:
parent
3329b6ce6b
commit
0727f0ef48
|
@ -23,6 +23,10 @@ import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
public class SparkEoscTag {
|
public class SparkEoscTag {
|
||||||
private static final Logger log = LoggerFactory.getLogger(SparkEoscTag.class);
|
private static final Logger log = LoggerFactory.getLogger(SparkEoscTag.class);
|
||||||
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
public static final String EOSC_GALAXY_WORKFLOW = "EOSC::Galaxy Workflow";
|
||||||
|
public static final String EOSC_TWITTER_DATA = "EOSC::Twitter Data";
|
||||||
|
public static final String EOSC_JUPYTER_NOTEBOOK = "EOSC::Jupyter Notebook";
|
||||||
|
public static final String COMPLIES_WITH = "compliesWith";
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception {
|
public static void main(String[] args) throws Exception {
|
||||||
String jsonConfiguration = IOUtils
|
String jsonConfiguration = IOUtils
|
||||||
|
@ -76,8 +80,8 @@ public class SparkEoscTag {
|
||||||
if (!Optional.ofNullable(s.getEoscifguidelines()).isPresent())
|
if (!Optional.ofNullable(s.getEoscifguidelines()).isPresent())
|
||||||
s.setEoscifguidelines(new ArrayList<>());
|
s.setEoscifguidelines(new ArrayList<>());
|
||||||
addEIG(
|
addEIG(
|
||||||
s.getEoscifguidelines(), "EOSC::Jupyter Notebook", "EOSC::Jupyter Notebook", "",
|
s.getEoscifguidelines(), EOSC_JUPYTER_NOTEBOOK, EOSC_JUPYTER_NOTEBOOK, "",
|
||||||
"compliesWith");
|
COMPLIES_WITH);
|
||||||
|
|
||||||
}
|
}
|
||||||
if (containsCriteriaGalaxy(s)) {
|
if (containsCriteriaGalaxy(s)) {
|
||||||
|
@ -85,7 +89,7 @@ public class SparkEoscTag {
|
||||||
s.setEoscifguidelines(new ArrayList<>());
|
s.setEoscifguidelines(new ArrayList<>());
|
||||||
|
|
||||||
addEIG(
|
addEIG(
|
||||||
s.getEoscifguidelines(), "EOSC::Galaxy Workflow", "EOSC::Galaxy Workflow", "", "compliesWith");
|
s.getEoscifguidelines(), EOSC_GALAXY_WORKFLOW, EOSC_GALAXY_WORKFLOW, "", COMPLIES_WITH);
|
||||||
}
|
}
|
||||||
return s;
|
return s;
|
||||||
}, Encoders.bean(Software.class))
|
}, Encoders.bean(Software.class))
|
||||||
|
@ -108,11 +112,11 @@ public class SparkEoscTag {
|
||||||
|
|
||||||
if (containsCriteriaGalaxy(orp)) {
|
if (containsCriteriaGalaxy(orp)) {
|
||||||
addEIG(
|
addEIG(
|
||||||
orp.getEoscifguidelines(), "EOSC::Galaxy Workflow", "EOSC::Galaxy Workflow", "",
|
orp.getEoscifguidelines(), EOSC_GALAXY_WORKFLOW, EOSC_GALAXY_WORKFLOW, "",
|
||||||
"compliesWith");
|
COMPLIES_WITH);
|
||||||
}
|
}
|
||||||
if (containscriteriaTwitter(orp)) {
|
if (containscriteriaTwitter(orp)) {
|
||||||
addEIG(orp.getEoscifguidelines(), "EOSC::Twitter Data", "EOSC::Twitter Data", "", "compliesWith");
|
addEIG(orp.getEoscifguidelines(), EOSC_TWITTER_DATA, EOSC_TWITTER_DATA, "", COMPLIES_WITH);
|
||||||
}
|
}
|
||||||
return orp;
|
return orp;
|
||||||
}, Encoders.bean(OtherResearchProduct.class))
|
}, Encoders.bean(OtherResearchProduct.class))
|
||||||
|
@ -133,7 +137,7 @@ public class SparkEoscTag {
|
||||||
if (!Optional.ofNullable(d.getEoscifguidelines()).isPresent())
|
if (!Optional.ofNullable(d.getEoscifguidelines()).isPresent())
|
||||||
d.setEoscifguidelines(new ArrayList<>());
|
d.setEoscifguidelines(new ArrayList<>());
|
||||||
if (containscriteriaTwitter(d)) {
|
if (containscriteriaTwitter(d)) {
|
||||||
addEIG(d.getEoscifguidelines(), "EOSC::Twitter Data", "EOSC::Twitter Data", "", "compliesWith");
|
addEIG(d.getEoscifguidelines(), EOSC_TWITTER_DATA, EOSC_TWITTER_DATA, "", COMPLIES_WITH);
|
||||||
}
|
}
|
||||||
return d;
|
return d;
|
||||||
}, Encoders.bean(Dataset.class))
|
}, Encoders.bean(Dataset.class))
|
||||||
|
@ -163,10 +167,12 @@ public class SparkEoscTag {
|
||||||
(words.contains("data") || words.contains("dataset")))
|
(words.contains("data") || words.contains("dataset")))
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
if (r.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("twitter")) &&
|
return Optional
|
||||||
r.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("data")))
|
.ofNullable(r.getSubject())
|
||||||
return true;
|
.map(
|
||||||
return false;
|
s -> s.stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("twitter")) &&
|
||||||
|
s.stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("data")))
|
||||||
|
.orElse(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static boolean containsCriteriaGalaxy(Result r) {
|
private static boolean containsCriteriaGalaxy(Result r) {
|
||||||
|
@ -176,10 +182,12 @@ public class SparkEoscTag {
|
||||||
words.contains("workflow"))
|
words.contains("workflow"))
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
if (r.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("galaxy")) &&
|
return Optional
|
||||||
r.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("workflow")))
|
.ofNullable(r.getSubject())
|
||||||
return true;
|
.map(
|
||||||
return false;
|
s -> s.stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("galaxy")) &&
|
||||||
|
s.stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("workflow")))
|
||||||
|
.orElse(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static boolean containsCriteriaNotebook(Software s) {
|
private static boolean containsCriteriaNotebook(Software s) {
|
||||||
|
@ -225,6 +233,5 @@ public class SparkEoscTag {
|
||||||
Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" ")))));
|
Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" ")))));
|
||||||
|
|
||||||
return words;
|
return words;
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,7 +6,6 @@ import java.nio.file.Files;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.bulktag.eosc.SparkEoscTag;
|
|
||||||
import org.apache.commons.io.FileUtils;
|
import org.apache.commons.io.FileUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.JavaRDD;
|
import org.apache.spark.api.java.JavaRDD;
|
||||||
|
@ -24,6 +23,7 @@ import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.bulktag.eosc.SparkEoscTag;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
|
||||||
public class EOSCTagJobTest {
|
public class EOSCTagJobTest {
|
||||||
|
|
Loading…
Reference in New Issue