[FoS] added check for null on level1 subject

This commit is contained in:
Miriam Baglioni 2023-02-13 13:03:49 +01:00
parent 7473093c84
commit 7184cc0804
3 changed files with 137 additions and 1 deletions

View File

@ -61,7 +61,7 @@ public class Constants {
public static StructuredProperty getSubject(String sbj, String classid, String classname, public static StructuredProperty getSubject(String sbj, String classid, String classname,
String diqualifierclassid) { String diqualifierclassid) {
if (sbj.equals(NULL)) if (sbj == null || sbj.equals(NULL))
return null; return null;
StructuredProperty sp = new StructuredProperty(); StructuredProperty sp = new StructuredProperty();
sp.setValue(sbj); sp.setValue(sbj);

View File

@ -0,0 +1,96 @@
package eu.dnetlib.dhp.actionmanager.createunresolvedentities;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.FOSDataModel;
import org.apache.commons.io.FileUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocalFileSystem;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SparkSession;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
/**
* @author miriam.baglioni
* @Date 13/02/23
*/
public class GetFosTest {
private static final Logger log = LoggerFactory.getLogger(ProduceTest.class);
private static Path workingDir;
private static SparkSession spark;
private static LocalFileSystem fs;
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
@BeforeAll
public static void beforeAll() throws IOException {
workingDir = Files.createTempDirectory(PrepareTest.class.getSimpleName());
fs = FileSystem.getLocal(new Configuration());
log.info("using work dir {}", workingDir);
SparkConf conf = new SparkConf();
conf.setAppName(ProduceTest.class.getSimpleName());
conf.setMaster("local[*]");
conf.set("spark.driver.host", "localhost");
conf.set("hive.metastore.local", "true");
conf.set("spark.ui.enabled", "false");
conf.set("spark.sql.warehouse.dir", workingDir.toString());
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
spark = SparkSession
.builder()
.appName(PrepareTest.class.getSimpleName())
.config(conf)
.getOrCreate();
}
@AfterAll
public static void afterAll() throws IOException {
FileUtils.deleteDirectory(workingDir.toFile());
spark.stop();
}
@Test
void test3() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos_sbs.tsv")
.getPath();
final String outputPath = workingDir.toString() + "/fos.json";
GetFOSSparkJob
.main(
new String[] {
"--isSparkSessionManaged", Boolean.FALSE.toString(),
"--sourcePath", sourcePath,
"-outputPath", outputPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<FOSDataModel> tmp = sc
.textFile(outputPath)
.map(item -> OBJECT_MAPPER.readValue(item, FOSDataModel.class));
tmp.foreach(t -> Assertions.assertTrue(t.getDoi() != null));
tmp.foreach(t -> Assertions.assertTrue(t.getLevel1() != null));
tmp.foreach(t -> Assertions.assertTrue(t.getLevel2() != null));
tmp.foreach(t -> Assertions.assertTrue(t.getLevel3() != null));
}
}

View File

@ -0,0 +1,40 @@
doi level1 level2 level3
10.1080/09638237.2018.1466033 03 medical and health sciences 0302 clinical medicine 030212 general & internal medicine
10.1016/j.dsi.2015.10.003 03 medical and health sciences 0301 basic medicine 030105 genetics & heredity
10.1007/s10072-017-2914-9 03 medical and health sciences 0302 clinical medicine 030217 neurology & neurosurgery
10.1016/j.bspc.2021.102726 02 engineering and technology 0206 medical engineering 020601 biomedical engineering
10.1177/0306312706069439 06 humanities and the arts 0601 history and archaeology 060101 anthropology
10.1016/j.jacep.2016.05.010 03 medical and health sciences 0302 clinical medicine 030212 general & internal medicine
10.1111/anae.13418 03 medical and health sciences 0302 clinical medicine 030212 general & internal medicine
10.1142/s1793744210000168 01 natural sciences 0103 physical sciences 010306 general physics
10.1016/j.jadohealth.2019.04.029 03 medical and health sciences 0302 clinical medicine 030212 general & internal medicine
10.1109/icais50930.2021.9395847 02 engineering and technology 0202 electrical engineering, electronic engineering, information engineering 020201 artificial intelligence & image processing
10.1145/3154837 01 natural sciences 0101 mathematics 010102 general mathematics
10.1038/srep38130 03 medical and health sciences 0301 basic medicine 030106 microbiology
10.1007/s13369-017-2871-x 02 engineering and technology 0202 electrical engineering, electronic engineering, information engineering 020201 artificial intelligence & image processing
10.1063/1.4964718 03 medical and health sciences 0301 basic medicine 030104 developmental biology
10.1007/s12603-019-1276-9 03 medical and health sciences 0302 clinical medicine 030212 general & internal medicine
10.1002/cam4.1463 03 medical and health sciences 0301 basic medicine 030104 developmental biology
10.1164/rccm.201611-2290ed 03 medical and health sciences 0302 clinical medicine 030212 general & internal medicine
10.1088/1757-899x/225/1/012132 01 natural sciences 0105 earth and related environmental sciences 010504 meteorology & atmospheric sciences
10.1117/1.jmm.15.1.015501 02 engineering and technology 0210 nano-technology 021001 nanoscience & nanotechnology
10.1088/1361-6587/ab569d 01 natural sciences 0103 physical sciences 010303 astronomy & astrophysics
10.1016/j.rser.2015.11.092 02 engineering and technology 0202 electrical engineering, electronic engineering, information engineering 020209 energy
10.1016/j.jhydrol.2013.06.035 01 natural sciences 0105 earth and related environmental sciences 010504 meteorology & atmospheric sciences
10.1111/php.12892 03 medical and health sciences 0301 basic medicine 030104 developmental biology
10.1088/0264-9381/27/10/105001 01 natural sciences 0103 physical sciences 010308 nuclear & particles physics
10.1016/j.matchemphys.2018.02.039 02 engineering and technology 0210 nano-technology 021001 nanoscience & nanotechnology
10.1098/rsos.160993 03 medical and health sciences 0301 basic medicine 030104 developmental biology
10.1016/j.rinp.2017.07.054 02 engineering and technology 0209 industrial biotechnology 020901 industrial engineering & automation
10.1111/eip.12348 03 medical and health sciences 0302 clinical medicine 030227 psychiatry
10.20965/jrm.2016.p0371 02 engineering and technology 0201 civil engineering 020101 civil engineering
10.2337/dci19-0036 03 medical and health sciences 0302 clinical medicine 030212 general & internal medicine
10.1155/2018/7692913 01 natural sciences 0104 chemical sciences 010404 medicinal & biomolecular chemistry
10.1117/12.2262306 02 engineering and technology 0202 electrical engineering, electronic engineering, information engineering 020206 networking & telecommunications
10.1021/acs.jpcb.7b01885 01 natural sciences 0104 chemical sciences 010405 organic chemistry
10.1177/0033294117711131 05 social sciences 0502 economics and business 050203 business & management
10.1016/j.jrurstud.2017.08.019 05 social sciences 0502 economics and business 050203 business & management
10.1111/febs.15296 03 medical and health sciences 0301 basic medicine 030104 developmental biology
10.3923/jeasci.2017.6922.6927 05 social sciences 0505 law 050501 criminology
10.1007/s10854-017-6376-x 02 engineering and technology 0202 electrical engineering, electronic engineering, information engineering 020208 electrical & electronic engineering
10.3390/app10176095 02 engineering and technology 0202 electrical engineering, electronic engineering, information engineering 020209 energy
1 doi level1 level2 level3
2 10.1080/09638237.2018.1466033 03 medical and health sciences 0302 clinical medicine 030212 general & internal medicine
3 10.1016/j.dsi.2015.10.003 03 medical and health sciences 0301 basic medicine 030105 genetics & heredity
4 10.1007/s10072-017-2914-9 03 medical and health sciences 0302 clinical medicine 030217 neurology & neurosurgery
5 10.1016/j.bspc.2021.102726 02 engineering and technology 0206 medical engineering 020601 biomedical engineering
6 10.1177/0306312706069439 06 humanities and the arts 0601 history and archaeology 060101 anthropology
7 10.1016/j.jacep.2016.05.010 03 medical and health sciences 0302 clinical medicine 030212 general & internal medicine
8 10.1111/anae.13418 03 medical and health sciences 0302 clinical medicine 030212 general & internal medicine
9 10.1142/s1793744210000168 01 natural sciences 0103 physical sciences 010306 general physics
10 10.1016/j.jadohealth.2019.04.029 03 medical and health sciences 0302 clinical medicine 030212 general & internal medicine
11 10.1109/icais50930.2021.9395847 02 engineering and technology 0202 electrical engineering, electronic engineering, information engineering 020201 artificial intelligence & image processing
12 10.1145/3154837 01 natural sciences 0101 mathematics 010102 general mathematics
13 10.1038/srep38130 03 medical and health sciences 0301 basic medicine 030106 microbiology
14 10.1007/s13369-017-2871-x 02 engineering and technology 0202 electrical engineering, electronic engineering, information engineering 020201 artificial intelligence & image processing
15 10.1063/1.4964718 03 medical and health sciences 0301 basic medicine 030104 developmental biology
16 10.1007/s12603-019-1276-9 03 medical and health sciences 0302 clinical medicine 030212 general & internal medicine
17 10.1002/cam4.1463 03 medical and health sciences 0301 basic medicine 030104 developmental biology
18 10.1164/rccm.201611-2290ed 03 medical and health sciences 0302 clinical medicine 030212 general & internal medicine
19 10.1088/1757-899x/225/1/012132 01 natural sciences 0105 earth and related environmental sciences 010504 meteorology & atmospheric sciences
20 10.1117/1.jmm.15.1.015501 02 engineering and technology 0210 nano-technology 021001 nanoscience & nanotechnology
21 10.1088/1361-6587/ab569d 01 natural sciences 0103 physical sciences 010303 astronomy & astrophysics
22 10.1016/j.rser.2015.11.092 02 engineering and technology 0202 electrical engineering, electronic engineering, information engineering 020209 energy
23 10.1016/j.jhydrol.2013.06.035 01 natural sciences 0105 earth and related environmental sciences 010504 meteorology & atmospheric sciences
24 10.1111/php.12892 03 medical and health sciences 0301 basic medicine 030104 developmental biology
25 10.1088/0264-9381/27/10/105001 01 natural sciences 0103 physical sciences 010308 nuclear & particles physics
26 10.1016/j.matchemphys.2018.02.039 02 engineering and technology 0210 nano-technology 021001 nanoscience & nanotechnology
27 10.1098/rsos.160993 03 medical and health sciences 0301 basic medicine 030104 developmental biology
28 10.1016/j.rinp.2017.07.054 02 engineering and technology 0209 industrial biotechnology 020901 industrial engineering & automation
29 10.1111/eip.12348 03 medical and health sciences 0302 clinical medicine 030227 psychiatry
30 10.20965/jrm.2016.p0371 02 engineering and technology 0201 civil engineering 020101 civil engineering
31 10.2337/dci19-0036 03 medical and health sciences 0302 clinical medicine 030212 general & internal medicine
32 10.1155/2018/7692913 01 natural sciences 0104 chemical sciences 010404 medicinal & biomolecular chemistry
33 10.1117/12.2262306 02 engineering and technology 0202 electrical engineering, electronic engineering, information engineering 020206 networking & telecommunications
34 10.1021/acs.jpcb.7b01885 01 natural sciences 0104 chemical sciences 010405 organic chemistry
35 10.1177/0033294117711131 05 social sciences 0502 economics and business 050203 business & management
36 10.1016/j.jrurstud.2017.08.019 05 social sciences 0502 economics and business 050203 business & management
37 10.1111/febs.15296 03 medical and health sciences 0301 basic medicine 030104 developmental biology
38 10.3923/jeasci.2017.6922.6927 05 social sciences 0505 law 050501 criminology
39 10.1007/s10854-017-6376-x 02 engineering and technology 0202 electrical engineering, electronic engineering, information engineering 020208 electrical & electronic engineering
40 10.3390/app10176095 02 engineering and technology 0202 electrical engineering, electronic engineering, information engineering 020209 energy