forked from D-Net/dnet-hadoop
[FoS] added check for null on level1 subject
This commit is contained in:
parent
7473093c84
commit
7184cc0804
|
@ -61,7 +61,7 @@ public class Constants {
|
||||||
|
|
||||||
public static StructuredProperty getSubject(String sbj, String classid, String classname,
|
public static StructuredProperty getSubject(String sbj, String classid, String classname,
|
||||||
String diqualifierclassid) {
|
String diqualifierclassid) {
|
||||||
if (sbj.equals(NULL))
|
if (sbj == null || sbj.equals(NULL))
|
||||||
return null;
|
return null;
|
||||||
StructuredProperty sp = new StructuredProperty();
|
StructuredProperty sp = new StructuredProperty();
|
||||||
sp.setValue(sbj);
|
sp.setValue(sbj);
|
||||||
|
|
|
@ -0,0 +1,96 @@
|
||||||
|
package eu.dnetlib.dhp.actionmanager.createunresolvedentities;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.FOSDataModel;
|
||||||
|
import org.apache.commons.io.FileUtils;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.LocalFileSystem;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.JavaRDD;
|
||||||
|
import org.apache.spark.api.java.JavaSparkContext;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.junit.jupiter.api.AfterAll;
|
||||||
|
import org.junit.jupiter.api.Assertions;
|
||||||
|
import org.junit.jupiter.api.BeforeAll;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author miriam.baglioni
|
||||||
|
* @Date 13/02/23
|
||||||
|
*/
|
||||||
|
public class GetFosTest {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(ProduceTest.class);
|
||||||
|
|
||||||
|
private static Path workingDir;
|
||||||
|
private static SparkSession spark;
|
||||||
|
private static LocalFileSystem fs;
|
||||||
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
|
@BeforeAll
|
||||||
|
public static void beforeAll() throws IOException {
|
||||||
|
workingDir = Files.createTempDirectory(PrepareTest.class.getSimpleName());
|
||||||
|
|
||||||
|
fs = FileSystem.getLocal(new Configuration());
|
||||||
|
log.info("using work dir {}", workingDir);
|
||||||
|
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
conf.setAppName(ProduceTest.class.getSimpleName());
|
||||||
|
|
||||||
|
conf.setMaster("local[*]");
|
||||||
|
conf.set("spark.driver.host", "localhost");
|
||||||
|
conf.set("hive.metastore.local", "true");
|
||||||
|
conf.set("spark.ui.enabled", "false");
|
||||||
|
conf.set("spark.sql.warehouse.dir", workingDir.toString());
|
||||||
|
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
|
||||||
|
|
||||||
|
spark = SparkSession
|
||||||
|
.builder()
|
||||||
|
.appName(PrepareTest.class.getSimpleName())
|
||||||
|
.config(conf)
|
||||||
|
.getOrCreate();
|
||||||
|
}
|
||||||
|
|
||||||
|
@AfterAll
|
||||||
|
public static void afterAll() throws IOException {
|
||||||
|
FileUtils.deleteDirectory(workingDir.toFile());
|
||||||
|
spark.stop();
|
||||||
|
}
|
||||||
|
@Test
|
||||||
|
void test3() throws Exception {
|
||||||
|
final String sourcePath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos_sbs.tsv")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
|
||||||
|
final String outputPath = workingDir.toString() + "/fos.json";
|
||||||
|
GetFOSSparkJob
|
||||||
|
.main(
|
||||||
|
new String[] {
|
||||||
|
"--isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
|
"--sourcePath", sourcePath,
|
||||||
|
|
||||||
|
"-outputPath", outputPath
|
||||||
|
|
||||||
|
});
|
||||||
|
|
||||||
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
|
JavaRDD<FOSDataModel> tmp = sc
|
||||||
|
.textFile(outputPath)
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, FOSDataModel.class));
|
||||||
|
|
||||||
|
tmp.foreach(t -> Assertions.assertTrue(t.getDoi() != null));
|
||||||
|
tmp.foreach(t -> Assertions.assertTrue(t.getLevel1() != null));
|
||||||
|
tmp.foreach(t -> Assertions.assertTrue(t.getLevel2() != null));
|
||||||
|
tmp.foreach(t -> Assertions.assertTrue(t.getLevel3() != null));
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,40 @@
|
||||||
|
doi level1 level2 level3
|
||||||
|
10.1080/09638237.2018.1466033 03 medical and health sciences 0302 clinical medicine 030212 general & internal medicine
|
||||||
|
10.1016/j.dsi.2015.10.003 03 medical and health sciences 0301 basic medicine 030105 genetics & heredity
|
||||||
|
10.1007/s10072-017-2914-9 03 medical and health sciences 0302 clinical medicine 030217 neurology & neurosurgery
|
||||||
|
10.1016/j.bspc.2021.102726 02 engineering and technology 0206 medical engineering 020601 biomedical engineering
|
||||||
|
10.1177/0306312706069439 06 humanities and the arts 0601 history and archaeology 060101 anthropology
|
||||||
|
10.1016/j.jacep.2016.05.010 03 medical and health sciences 0302 clinical medicine 030212 general & internal medicine
|
||||||
|
10.1111/anae.13418 03 medical and health sciences 0302 clinical medicine 030212 general & internal medicine
|
||||||
|
10.1142/s1793744210000168 01 natural sciences 0103 physical sciences 010306 general physics
|
||||||
|
10.1016/j.jadohealth.2019.04.029 03 medical and health sciences 0302 clinical medicine 030212 general & internal medicine
|
||||||
|
10.1109/icais50930.2021.9395847 02 engineering and technology 0202 electrical engineering, electronic engineering, information engineering 020201 artificial intelligence & image processing
|
||||||
|
10.1145/3154837 01 natural sciences 0101 mathematics 010102 general mathematics
|
||||||
|
10.1038/srep38130 03 medical and health sciences 0301 basic medicine 030106 microbiology
|
||||||
|
10.1007/s13369-017-2871-x 02 engineering and technology 0202 electrical engineering, electronic engineering, information engineering 020201 artificial intelligence & image processing
|
||||||
|
10.1063/1.4964718 03 medical and health sciences 0301 basic medicine 030104 developmental biology
|
||||||
|
10.1007/s12603-019-1276-9 03 medical and health sciences 0302 clinical medicine 030212 general & internal medicine
|
||||||
|
10.1002/cam4.1463 03 medical and health sciences 0301 basic medicine 030104 developmental biology
|
||||||
|
10.1164/rccm.201611-2290ed 03 medical and health sciences 0302 clinical medicine 030212 general & internal medicine
|
||||||
|
10.1088/1757-899x/225/1/012132 01 natural sciences 0105 earth and related environmental sciences 010504 meteorology & atmospheric sciences
|
||||||
|
10.1117/1.jmm.15.1.015501 02 engineering and technology 0210 nano-technology 021001 nanoscience & nanotechnology
|
||||||
|
10.1088/1361-6587/ab569d 01 natural sciences 0103 physical sciences 010303 astronomy & astrophysics
|
||||||
|
10.1016/j.rser.2015.11.092 02 engineering and technology 0202 electrical engineering, electronic engineering, information engineering 020209 energy
|
||||||
|
10.1016/j.jhydrol.2013.06.035 01 natural sciences 0105 earth and related environmental sciences 010504 meteorology & atmospheric sciences
|
||||||
|
10.1111/php.12892 03 medical and health sciences 0301 basic medicine 030104 developmental biology
|
||||||
|
10.1088/0264-9381/27/10/105001 01 natural sciences 0103 physical sciences 010308 nuclear & particles physics
|
||||||
|
10.1016/j.matchemphys.2018.02.039 02 engineering and technology 0210 nano-technology 021001 nanoscience & nanotechnology
|
||||||
|
10.1098/rsos.160993 03 medical and health sciences 0301 basic medicine 030104 developmental biology
|
||||||
|
10.1016/j.rinp.2017.07.054 02 engineering and technology 0209 industrial biotechnology 020901 industrial engineering & automation
|
||||||
|
10.1111/eip.12348 03 medical and health sciences 0302 clinical medicine 030227 psychiatry
|
||||||
|
10.20965/jrm.2016.p0371 02 engineering and technology 0201 civil engineering 020101 civil engineering
|
||||||
|
10.2337/dci19-0036 03 medical and health sciences 0302 clinical medicine 030212 general & internal medicine
|
||||||
|
10.1155/2018/7692913 01 natural sciences 0104 chemical sciences 010404 medicinal & biomolecular chemistry
|
||||||
|
10.1117/12.2262306 02 engineering and technology 0202 electrical engineering, electronic engineering, information engineering 020206 networking & telecommunications
|
||||||
|
10.1021/acs.jpcb.7b01885 01 natural sciences 0104 chemical sciences 010405 organic chemistry
|
||||||
|
10.1177/0033294117711131 05 social sciences 0502 economics and business 050203 business & management
|
||||||
|
10.1016/j.jrurstud.2017.08.019 05 social sciences 0502 economics and business 050203 business & management
|
||||||
|
10.1111/febs.15296 03 medical and health sciences 0301 basic medicine 030104 developmental biology
|
||||||
|
10.3923/jeasci.2017.6922.6927 05 social sciences 0505 law 050501 criminology
|
||||||
|
10.1007/s10854-017-6376-x 02 engineering and technology 0202 electrical engineering, electronic engineering, information engineering 020208 electrical & electronic engineering
|
||||||
|
10.3390/app10176095 02 engineering and technology 0202 electrical engineering, electronic engineering, information engineering 020209 energy
|
|
Loading…
Reference in New Issue