From b81efb6a9d0c8dac4b4cc551e7574878b1e377f8 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 22 Dec 2021 21:40:35 +0100 Subject: [PATCH] [FOS]changed the mapping between the csv and the model. Changed Test classes and resources --- .../dnetlib/dhp/actionmanager/Constants.java | 46 +++++------ .../createunresolvedentities/GetFOSData.java | 2 +- .../PrepareFOSSparkJob.java | 35 +++++---- .../model/FOSDataModel.java | 8 +- .../createunresolvedentities/PrepareTest.java | 6 +- .../createunresolvedentities/ProduceTest.java | 25 ++++-- .../createunresolvedentities/fos/fos.json | 77 ++++++++++--------- .../createunresolvedentities/fos/fos_sbs.csv | 39 ++++++++++ .../fos/h2020_fos_sbs.csv | 38 --------- 9 files changed, 148 insertions(+), 128 deletions(-) create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos_sbs.csv delete mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/h2020_fos_sbs.csv diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java index 7a3814b710..153a98d3ff 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java @@ -3,9 +3,6 @@ package eu.dnetlib.dhp.actionmanager; import java.util.Optional; -import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.StructuredProperty; -import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; @@ -14,6 +11,9 @@ import org.apache.spark.sql.SparkSession; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.StructuredProperty; +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; public class Constants { @@ -55,28 +55,28 @@ public class Constants { StructuredProperty sp = new StructuredProperty(); sp.setValue(sbj); sp - .setQualifier( - OafMapperUtils - .qualifier(classid - , - classname, - ModelConstants.DNET_SUBJECT_TYPOLOGIES, - ModelConstants.DNET_SUBJECT_TYPOLOGIES)); + .setQualifier( + OafMapperUtils + .qualifier( + classid, + classname, + ModelConstants.DNET_SUBJECT_TYPOLOGIES, + ModelConstants.DNET_SUBJECT_TYPOLOGIES)); sp - .setDataInfo( + .setDataInfo( + OafMapperUtils + .dataInfo( + false, + UPDATE_DATA_INFO_TYPE, + true, + false, OafMapperUtils - .dataInfo( - false, - UPDATE_DATA_INFO_TYPE, - true, - false, - OafMapperUtils - .qualifier( - UPDATE_SUBJECT_FOS_CLASS_ID, - UPDATE_CLASS_NAME, - ModelConstants.DNET_PROVENANCE_ACTIONS, - ModelConstants.DNET_PROVENANCE_ACTIONS), - "")); + .qualifier( + UPDATE_SUBJECT_FOS_CLASS_ID, + UPDATE_CLASS_NAME, + ModelConstants.DNET_PROVENANCE_ACTIONS, + ModelConstants.DNET_PROVENANCE_ACTIONS), + "")); return sp; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/GetFOSData.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/GetFOSData.java index 9dec3e8626..bd430d940f 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/GetFOSData.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/GetFOSData.java @@ -21,7 +21,7 @@ public class GetFOSData implements Serializable { private static final Logger log = LoggerFactory.getLogger(GetFOSData.class); - public static final char DEFAULT_DELIMITER = '\t'; + public static final char DEFAULT_DELIMITER = ','; public static void main(final String[] args) throws Exception { final ArgumentApplicationParser parser = new ArgumentApplicationParser( diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java index 1a6e9ddfcb..8aadaf98e0 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java @@ -11,6 +11,7 @@ import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.api.java.function.MapGroupsFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; @@ -64,26 +65,32 @@ public class PrepareFOSSparkJob implements Serializable { private static void distributeFOSdois(SparkSession spark, String sourcePath, String outputPath) { Dataset fosDataset = readPath(spark, sourcePath, FOSDataModel.class); - fosDataset.map((MapFunction) value -> { - Result r = new Result(); - r.setId(DHPUtils.generateUnresolvedIdentifier(value.getDoi(), DOI)); - r.setSubject(getSubjects(value)); - return r; - }, Encoders.bean(Result.class)) + fosDataset.groupByKey((MapFunction)v->v.getDoi(), Encoders.STRING()) + .mapGroups((MapGroupsFunction)(k,it)->{ + Result r = new Result(); + FOSDataModel first = it.next(); + r.setId(DHPUtils.generateUnresolvedIdentifier(first.getDoi(), DOI)); + HashSet level1 = new HashSet<>(); + HashSet level2 = new HashSet<>(); + HashSet level3 = new HashSet<>(); + addLevels(level1, level2, level3, first); + it.forEachRemaining(v -> addLevels(level1, level2, level3, v)); + Listsbjs = new ArrayList<>(); + level1.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME))); + level2.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME))); + level3.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME))); + return r; + }, Encoders.bean(Result.class)) .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") .json(outputPath + "/fos"); } - private static List getSubjects(FOSDataModel fos) { - return Arrays - .asList(getSubject(fos.getLevel1(), FOS_CLASS_ID, FOS_CLASS_NAME), - getSubject(fos.getLevel2(), FOS_CLASS_ID, FOS_CLASS_NAME), - getSubject(fos.getLevel3(), FOS_CLASS_ID, FOS_CLASS_NAME)) - .stream() - .filter(Objects::nonNull) - .collect(Collectors.toList()); + private static void addLevels(HashSet level1, HashSet level2, HashSet level3, FOSDataModel first) { + level1.add(first.getLevel1()); + level2.add(first.getLevel2()); + level3.add(first.getLevel3()); } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/FOSDataModel.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/FOSDataModel.java index befb230cb4..e98ba74a1a 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/FOSDataModel.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/FOSDataModel.java @@ -6,19 +6,19 @@ import java.io.Serializable; import com.opencsv.bean.CsvBindByPosition; public class FOSDataModel implements Serializable { - @CsvBindByPosition(position = 1) + @CsvBindByPosition(position = 0) // @CsvBindByName(column = "doi") private String doi; - @CsvBindByPosition(position = 2) + @CsvBindByPosition(position = 1) // @CsvBindByName(column = "level1") private String level1; - @CsvBindByPosition(position = 3) + @CsvBindByPosition(position = 2) // @CsvBindByName(column = "level2") private String level2; - @CsvBindByPosition(position = 4) + @CsvBindByPosition(position = 3) // @CsvBindByName(column = "level3") private String level3; diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java index 125aa60fe2..67a0f34654 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java @@ -155,14 +155,14 @@ public class PrepareTest { void getFOSFileTest() throws IOException, ClassNotFoundException { final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/h2020_fos_sbs.csv") + .getResource("/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos_sbs.csv") .getPath(); final String outputPath = workingDir.toString() + "/fos.json"; new GetFOSData() .doRewrite( sourcePath, outputPath, "eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.FOSDataModel", - '\t', fs); + ',', fs); BufferedReader in = new BufferedReader( new InputStreamReader(fs.open(new org.apache.hadoop.fs.Path(outputPath)))); @@ -176,7 +176,7 @@ public class PrepareTest { count += 1; } - assertEquals(38, count); + assertEquals(39, count); } diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java index 36417f6141..8635dcfb8f 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java @@ -107,13 +107,13 @@ public class ProduceTest { .textFile(workingDir.toString() + "/unresolved") .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); - Assertions.assertEquals(135, tmp.count()); + Assertions.assertEquals(105, tmp.count()); Assertions.assertEquals(1, tmp.filter(row -> row.getId().equals("unresolved::10.3390/s18072310::doi")).count()); Assertions .assertEquals( - 3, tmp + 6, tmp .filter(row -> row.getId().equals("unresolved::10.3390/s18072310::doi")) .collect() .get(0) @@ -175,9 +175,20 @@ public class ProduceTest { ModelConstants.DNET_PROVENANCE_ACTIONS, sbj.getDataInfo().getProvenanceaction().getSchemename())); - sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("engineering and technology")); - sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("nano-technology")); - sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("nanoscience & nanotechnology")); + Assertions + .assertEquals( + true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("04 agricultural and veterinary sciences"))); + Assertions.assertEquals(false, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("nano-technology"))); + Assertions + .assertEquals( + true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("0404 agricultural biotechnology"))); + Assertions.assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("040502 food science"))); + + Assertions + .assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("03 medical and health sciences"))); + Assertions.assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("0303 health sciences"))); + Assertions + .assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("030309 nutrition & dietetics"))); List measures = tmp .filter(row -> row.getId().equals("unresolved::10.3390/s18072310::doi")) @@ -219,7 +230,7 @@ public class ProduceTest { Assertions .assertEquals( - 49, tmp + 19, tmp .filter(row -> !row.getId().equals("unresolved::10.3390/s18072310::doi")) .filter(row -> row.getSubject() != null) .count()); @@ -229,7 +240,7 @@ public class ProduceTest { 85, tmp .filter(row -> !row.getId().equals("unresolved::10.3390/s18072310::doi")) - .filter(r -> r.getInstance() != null) + .filter(r -> r.getInstance() != null && r.getInstance().size() > 0) .count()); } diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos.json index 1b46a3d259..a8221324fd 100644 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos.json +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos.json @@ -1,38 +1,39 @@ -{"doi":"10.3390/s18072310","level1":"engineering and technology","level2":"nano-technology","level3":"nanoscience & nanotechnology"} -{"doi":"10.1111/1365-2656.12831\u000210.17863/cam.24369","level1":"social sciences","level2":"psychology and cognitive sciences","level3":"NULL"} -{"doi":"10.3929/ethz-b-000187584\u000210.1002/chem.201701644","level1":"natural sciences","level2":"NULL","level3":"NULL"} -{"doi":"10.1080/01913123.2017.1367361","level1":"medical and health sciences","level2":"clinical medicine","level3":"oncology & carcinogenesis"} -{"doi":"10.1051/e3sconf/20199207011","level1":"natural sciences","level2":"earth and related environmental sciences","level3":"environmental sciences"} -{"doi":"10.1038/onc.2015.333","level1":"medical and health sciences","level2":"clinical medicine","level3":"oncology & carcinogenesis"} -{"doi":"10.1093/mnras/staa256","level1":"natural sciences","level2":"physical sciences","level3":"NULL"} -{"doi":"10.1016/j.jclepro.2018.07.166","level1":"engineering and technology","level2":"other engineering and technologies","level3":"building & construction"} -{"doi":"10.1103/physrevlett.125.037403","level1":"natural sciences","level2":"physical sciences","level3":"nuclear & particles physics"} -{"doi":"10.1080/03602532.2017.1316285","level1":"natural sciences","level2":"NULL","level3":"NULL"} -{"doi":"10.1001/jamanetworkopen.2019.1868","level1":"medical and health sciences","level2":"other medical science","level3":"health policy & services"} -{"doi":"10.1128/mra.00874-18","level1":"natural sciences","level2":"biological sciences","level3":"plant biology & botany"} -{"doi":"10.1016/j.nancom.2018.03.001","level1":"engineering and technology","level2":"NULL","level3":"NULL"} -{"doi":"10.1112/topo.12174","level1":"natural sciences","level2":"NULL","level3":"NULL"} -{"doi":"10.12688/wellcomeopenres.15846.1","level1":"medical and health sciences","level2":"health sciences","level3":"NULL"} -{"doi":"10.21468/scipostphys.3.1.001","level1":"natural sciences","level2":"physical sciences","level3":"NULL"} -{"doi":"10.1088/1741-4326/ab6c77","level1":"natural sciences","level2":"physical sciences","level3":"nuclear & particles physics"} -{"doi":"10.1109/tpwrs.2019.2944747","level1":"engineering and technology","level2":"electrical engineering, electronic engineering, information engineering","level3":"electrical & electronic engineering"} -{"doi":"10.1016/j.expthermflusci.2019.109994\u000210.17863/cam.46212","level1":"engineering and technology","level2":"mechanical engineering","level3":"mechanical engineering & transports"} -{"doi":"10.1109/tc.2018.2860012","level1":"engineering and technology","level2":"electrical engineering, electronic engineering, information engineering","level3":"computer hardware & architecture"} -{"doi":"10.1002/mma.6622","level1":"natural sciences","level2":"mathematics","level3":"numerical & computational mathematics"} -{"doi":"10.1051/radiopro/2020020","level1":"natural sciences","level2":"chemical sciences","level3":"NULL"} -{"doi":"10.1007/s12268-019-1003-4","level1":"medical and health sciences","level2":"basic medicine","level3":"NULL"} -{"doi":"10.3390/cancers12010236","level1":"medical and health sciences","level2":"health sciences","level3":"biochemistry & molecular biology"} -{"doi":"10.6084/m9.figshare.9912614\u000210.6084/m9.figshare.9912614.v1\u000210.1080/00268976.2019.1665199","level1":"natural sciences","level2":"chemical sciences","level3":"physical chemistry"} -{"doi":"10.1175/jpo-d-17-0239.1","level1":"natural sciences","level2":"biological sciences","level3":"marine biology & hydrobiology"} -{"doi":"10.1007/s13218-020-00674-7","level1":"engineering and technology","level2":"industrial biotechnology","level3":"industrial engineering & automation"} -{"doi":"10.1016/j.psyneuen.2016.02.003\u000210.1016/j.psyneuen.2016.02.00310.7892/boris.78886\u000210.7892/boris.78886","level1":"medical and health sciences","level2":"basic medicine","level3":"NULL"} -{"doi":"10.1109/ted.2018.2813542","level1":"engineering and technology","level2":"electrical engineering, electronic engineering, information engineering","level3":"electrical & electronic engineering"} -{"doi":"10.3989/scimar.04739.25a","level1":"natural sciences","level2":"biological sciences","level3":"NULL"} -{"doi":"10.3390/su12187503","level1":"natural sciences","level2":"earth and related environmental sciences","level3":"NULL"} -{"doi":"10.1016/j.ccell.2018.08.017","level1":"medical and health sciences","level2":"basic medicine","level3":"biochemistry & molecular biology"} -{"doi":"10.1103/physrevresearch.2.023322","level1":"natural sciences","level2":"physical sciences","level3":"nuclear & particles physics"} -{"doi":"10.1039/c8cp03234c","level1":"natural sciences","level2":"NULL","level3":"NULL"} -{"doi":"10.5281/zenodo.3696557\u000210.5281/zenodo.3696556\u000210.1109/jsac.2016.2545384","level1":"engineering and technology","level2":"electrical engineering, electronic engineering, information engineering","level3":"networking & telecommunications"} -{"doi":"10.1038/ng.3667\u000210.1038/ng.3667.\u000210.17615/tct6-4m26\u000210.17863/cam.15649","level1":"medical and health sciences","level2":"health sciences","level3":"genetics & heredity"} -{"doi":"10.1016/j.jclepro.2019.119065","level1":"engineering and technology","level2":"other engineering and technologies","level3":"building & construction"} -{"doi":"10.1111/pce.13392","level1":"agricultural and veterinary sciences","level2":"agriculture, forestry, and fisheries","level3":"agronomy & agriculture"} \ No newline at end of file +{"doi":"10.1080/1536383x.2020.1868997","level1":"02 engineering and technology","level2":"0210 nano-technology","level3":"021001 nanoscience & nanotechnology"} +{"doi":"10.1080/1536383x.2020.1868997","level1":"01 natural sciences","level2":"0104 chemical sciences","level3":"010402 general chemistry"} +{"doi":"10.1186/s40425-019-0732-8","level1":"03 medical and health sciences","level2":"0302 clinical medicine","level3":"030220 oncology & carcinogenesis"} +{"doi":"10.1186/s40425-019-0732-8","level1":"03 medical and health sciences","level2":"0301 basic medicine","level3":"030304 developmental biology"} +{"doi":"10.1007/s10482-021-01529-3","level1":"03 medical and health sciences","level2":"0301 basic medicine","level3":"030304 developmental biology"} +{"doi":"10.1007/s10482-021-01529-3","level1":"03 medical and health sciences","level2":"0301 basic medicine","level3":"030306 microbiology"} +{"doi":"10.1155/2021/6643273","level1":"01 natural sciences","level2":"0103 physical sciences","level3":"010301 acoustics"} +{"doi":"10.1155/2021/6643273","level1":"02 engineering and technology","level2":"0209 industrial biotechnology","level3":"020901 industrial engineering & automation"} +{"doi":"10.12737/article_5d6613dbf2ad51.82646096","level1":"02 engineering and technology","level2":"0210 nano-technology","level3":"021001 nanoscience & nanotechnology"} +{"doi":"10.12737/article_5d6613dbf2ad51.82646096","level1":"01 natural sciences","level2":"0103 physical sciences","level3":"010302 applied physics"} +{"doi":"10.1216/jie.2020.32.457","level1":"01 natural sciences","level2":"0101 mathematics","level3":"010101 applied mathematics"} +{"doi":"10.1216/jie.2020.32.457","level1":"01 natural sciences","level2":"0101 mathematics","level3":"010102 general mathematics"} +{"doi":"10.3934/naco.2021021","level1":"02 engineering and technology","level2":"0211 other engineering and technologies","level3":"021103 operations research"} +{"doi":"10.3934/naco.2021021","level1":"02 engineering and technology","level2":"0209 industrial biotechnology","level3":"020901 industrial engineering & automation"} +{"doi":"10.1080/1034912x.2021.1910933","level1":"05 social sciences","level2":"050301 education","level3":"050301 education"} +{"doi":"10.1080/1034912x.2021.1910933","level1":"05 social sciences","level2":"0501 psychology and cognitive sciences","level3":"050104 developmental & child psychology"} +{"doi":"10.1016/j.rtbm.2020.100596","level1":"05 social sciences","level2":"0502 economics and business","level3":"050211 marketing"} +{"doi":"10.1016/j.rtbm.2020.100596","level1":"05 social sciences","level2":"0502 economics and business","level3":"050212 sport, leisure & tourism"} +{"doi":"10.14807/ijmp.v11i8.1220","level1":"05 social sciences","level2":"0502 economics and business","level3":"050211 marketing"} +{"doi":"10.14807/ijmp.v11i8.1220","level1":"05 social sciences","level2":"0502 economics and business","level3":"050203 business & management"} +{"doi":"10.1007/s13205-020-02415-x","level1":"03 medical and health sciences","level2":"0303 health sciences","level3":"030304 developmental biology"} +{"doi":"10.1007/s13205-020-02415-x","level1":"03 medical and health sciences","level2":"0303 health sciences","level3":"030302 biochemistry & molecular biology"} +{"doi":"10.3390/s18072310","level1":"04 agricultural and veterinary sciences","level2":"0404 agricultural biotechnology","level3":"040502 food science"} +{"doi":"10.3390/s18072310","level1":"03 medical and health sciences","level2":"0303 health sciences","level3":"030309 nutrition & dietetics"} +{"doi":"10.1063/5.0032658","level1":"01 natural sciences","level2":"0103 physical sciences","level3":"010304 chemical physics"} +{"doi":"10.1063/5.0032658","level1":"01 natural sciences","level2":"0104 chemical sciences","level3":"010402 general chemistry"} +{"doi":"10.1145/3411174.3411195","level1":"02 engineering and technology","level2":"0202 electrical engineering, electronic engineering, information engineering","level3":"020201 artificial intelligence & image processing"} +{"doi":"10.1145/3411174.3411195","level1":"02 engineering and technology","level2":"0202 electrical engineering, electronic engineering, information engineering","level3":"020206 networking & telecommunications"} +{"doi":"10.1021/acs.joc.0c02755","level1":"01 natural sciences","level2":"0104 chemical sciences","level3":"010405 organic chemistry"} +{"doi":"10.1021/acs.joc.0c02755","level1":"01 natural sciences","level2":"0104 chemical sciences","level3":"010402 general chemistry"} +{"doi":"10.1002/jcp.28608","level1":"03 medical and health sciences","level2":"0302 clinical medicine","level3":"030220 oncology & carcinogenesis"} +{"doi":"10.1002/jcp.28608","level1":"03 medical and health sciences","level2":"0301 basic medicine","level3":"030304 developmental biology"} +{"doi":"10.1097/cmr.0000000000000579","level1":"03 medical and health sciences","level2":"0302 clinical medicine","level3":"030220 oncology & carcinogenesis"} +{"doi":"10.1097/cmr.0000000000000579","level1":"03 medical and health sciences","level2":"0301 basic medicine","level3":"030304 developmental biology"} +{"doi":"10.1007/s11164-020-04383-6","level1":"01 natural sciences","level2":"0104 chemical sciences","level3":"010405 organic chemistry"} +{"doi":"10.1007/s11164-020-04383-6","level1":"01 natural sciences","level2":"0104 chemical sciences","level3":"010402 general chemistry"} +{"doi":"10.1016/j.actpsy.2020.103155","level1":"05 social sciences","level2":"0501 psychology and cognitive sciences","level3":"050105 experimental psychology"} +{"doi":"10.1016/j.actpsy.2020.103155","level1":"03 medical and health sciences","level2":"0302 clinical medicine","level3":"030217 neurology & neurosurgery"} +{"doi":"10.1109/memea49120.2020.9137187","level1":"02 engineering and technology","level2":"0202 electrical engineering, electronic engineering, information engineering","level3":"020208 electrical & electronic engineering"} \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos_sbs.csv b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos_sbs.csv new file mode 100644 index 0000000000..c5a2a821a8 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos_sbs.csv @@ -0,0 +1,39 @@ +10.1080/1536383x.2020.1868997,02 engineering and technology,0210 nano-technology,021001 nanoscience & nanotechnology +10.1080/1536383x.2020.1868997,01 natural sciences,0104 chemical sciences,010402 general chemistry +10.1186/s40425-019-0732-8,03 medical and health sciences,0302 clinical medicine,030220 oncology & carcinogenesis +10.1186/s40425-019-0732-8,03 medical and health sciences,0301 basic medicine,030304 developmental biology +10.1007/s10482-021-01529-3,03 medical and health sciences,0301 basic medicine,030304 developmental biology +10.1007/s10482-021-01529-3,03 medical and health sciences,0301 basic medicine,030306 microbiology +10.1155/2021/6643273,01 natural sciences,0103 physical sciences,010301 acoustics +10.1155/2021/6643273,02 engineering and technology,0209 industrial biotechnology,020901 industrial engineering & automation +10.12737/article_5d6613dbf2ad51.82646096,02 engineering and technology,0210 nano-technology,021001 nanoscience & nanotechnology +10.12737/article_5d6613dbf2ad51.82646096,01 natural sciences,0103 physical sciences,010302 applied physics +10.1216/jie.2020.32.457,01 natural sciences,0101 mathematics,010101 applied mathematics +10.1216/jie.2020.32.457,01 natural sciences,0101 mathematics,010102 general mathematics +10.3934/naco.2021021,02 engineering and technology,0211 other engineering and technologies,021103 operations research +10.3934/naco.2021021,02 engineering and technology,0209 industrial biotechnology,020901 industrial engineering & automation +10.1080/1034912x.2021.1910933,05 social sciences,050301 education,050301 education +10.1080/1034912x.2021.1910933,05 social sciences,0501 psychology and cognitive sciences,050104 developmental & child psychology +10.1016/j.rtbm.2020.100596,05 social sciences,0502 economics and business,050211 marketing +10.1016/j.rtbm.2020.100596,05 social sciences,0502 economics and business,"050212 sport, leisure & tourism" +10.14807/ijmp.v11i8.1220,05 social sciences,0502 economics and business,050211 marketing +10.14807/ijmp.v11i8.1220,05 social sciences,0502 economics and business,050203 business & management +10.1007/s13205-020-02415-x,03 medical and health sciences,0303 health sciences,030304 developmental biology +10.1007/s13205-020-02415-x,03 medical and health sciences,0303 health sciences,030302 biochemistry & molecular biology +10.3390/foods10040865,04 agricultural and veterinary sciences,0404 agricultural biotechnology,040502 food science +10.3390/foods10040865,03 medical and health sciences,0303 health sciences,030309 nutrition & dietetics +10.1063/5.0032658,01 natural sciences,0103 physical sciences,010304 chemical physics +10.1063/5.0032658,01 natural sciences,0104 chemical sciences,010402 general chemistry +10.1145/3411174.3411195,02 engineering and technology,"0202 electrical engineering, electronic engineering, information engineering",020201 artificial intelligence & image processing +10.1145/3411174.3411195,02 engineering and technology,"0202 electrical engineering, electronic engineering, information engineering",020206 networking & telecommunications +10.1021/acs.joc.0c02755,01 natural sciences,0104 chemical sciences,010405 organic chemistry +10.1021/acs.joc.0c02755,01 natural sciences,0104 chemical sciences,010402 general chemistry +10.1002/jcp.28608,03 medical and health sciences,0302 clinical medicine,030220 oncology & carcinogenesis +10.1002/jcp.28608,03 medical and health sciences,0301 basic medicine,030304 developmental biology +10.1097/cmr.0000000000000579,03 medical and health sciences,0302 clinical medicine,030220 oncology & carcinogenesis +10.1097/cmr.0000000000000579,03 medical and health sciences,0301 basic medicine,030304 developmental biology +10.1007/s11164-020-04383-6,01 natural sciences,0104 chemical sciences,010405 organic chemistry +10.1007/s11164-020-04383-6,01 natural sciences,0104 chemical sciences,010402 general chemistry +10.1016/j.actpsy.2020.103155,05 social sciences,0501 psychology and cognitive sciences,050105 experimental psychology +10.1016/j.actpsy.2020.103155,03 medical and health sciences,0302 clinical medicine,030217 neurology & neurosurgery +10.1109/memea49120.2020.9137187,02 engineering and technology,"0202 electrical engineering, electronic engineering, information engineering",020208 electrical & electronic engineering \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/h2020_fos_sbs.csv b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/h2020_fos_sbs.csv deleted file mode 100644 index e874353e8f..0000000000 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/h2020_fos_sbs.csv +++ /dev/null @@ -1,38 +0,0 @@ -dedup_wf_001::ddcc7a56fa13e49bcc59c6bdd19ad26c 10.3390/s18072310 engineering and technology nano-technology nanoscience & nanotechnology -dedup_wf_001::b76062d56e28224eac56111a4e1e5ecf 10.1111/1365-2656.1283110.17863/cam.24369 social sciences psychology and cognitive sciences NULL -dedup_wf_001::bb752acb8f403a25fa7851a302f7b7ac 10.3929/ethz-b-00018758410.1002/chem.201701644 natural sciences NULL NULL -dedup_wf_001::2f1435a9201ecf5cbbcb12c9b2d971cd 10.1080/01913123.2017.1367361 medical and health sciences clinical medicine oncology & carcinogenesis -dedup_wf_001::fc9e47ec16c67b101724320d4b030514 10.1051/e3sconf/20199207011 natural sciences earth and related environmental sciences environmental sciences -dedup_wf_001::caa1e5b4de387cb31751552f4f0f5d72 10.1038/onc.2015.333 medical and health sciences clinical medicine oncology & carcinogenesis -dedup_wf_001::c2a98df5637d69bf0524eaf40fe6bf11 10.1093/mnras/staa256 natural sciences physical sciences NULL -dedup_wf_001::c221262bdc77cbfd59859a402f0e3991 10.1016/j.jclepro.2018.07.166 engineering and technology other engineering and technologies building & construction -doiboost____::d56d9dc21f317b3e009d5b6c8ea87212 10.1103/physrevlett.125.037403 natural sciences physical sciences nuclear & particles physics -dedup_wf_001::8a7269c8ee6470b2fb4fd384bc389e08 10.1080/03602532.2017.1316285 natural sciences NULL NULL -dedup_wf_001::28342ebbc19833e4e1f4a2b23cf5ee20 10.1001/jamanetworkopen.2019.1868 medical and health sciences other medical science health policy & services -dedup_wf_001::c1e1daf2b55dd9ec8e1c7c7458bbc7bc 10.1128/mra.00874-18 natural sciences biological sciences plant biology & botany -dedup_wf_001::a2ef4a2720c71907180750e5871298ef 10.1016/j.nancom.2018.03.001 engineering and technology NULL NULL -dedup_wf_001::676f46a31519e83a89efcb1c626286fb 10.1112/topo.12174 natural sciences NULL NULL -dedup_wf_001::6f2761642f1e39313388e2c4060657dd 10.12688/wellcomeopenres.15846.1 medical and health sciences health sciences NULL -dedup_wf_001::e414c1dec599521a9635a60de0f6755b 10.21468/scipostphys.3.1.001 natural sciences physical sciences NULL -dedup_wf_001::f3395fe0f330164ea424dc61c86c9a3d 10.1088/1741-4326/ab6c77 natural sciences physical sciences nuclear & particles physics -dedup_wf_001::a4f32a97a783117012f1de11797e73f2 10.1109/tpwrs.2019.2944747 engineering and technology electrical engineering, electronic engineering, information engineering electrical & electronic engineering -dedup_wf_001::313ae1cd083ae1696d12dd1909f97df8 10.1016/j.expthermflusci.2019.10999410.17863/cam.46212 engineering and technology mechanical engineering mechanical engineering & transports -dedup_wf_001::2a300a7d3ca7347791ebcef986bc0682 10.1109/tc.2018.2860012 engineering and technology electrical engineering, electronic engineering, information engineering computer hardware & architecture -doiboost____::5b79bd7bd9f87361b4a4abc3cbb2df75 10.1002/mma.6622 natural sciences mathematics numerical & computational mathematics -dedup_wf_001::6a3f61f217a2519fbaddea1094e3bfc2 10.1051/radiopro/2020020 natural sciences chemical sciences NULL -dedup_wf_001::a3f0430309a639f4234a0e57b10f2dee 10.1007/s12268-019-1003-4 medical and health sciences basic medicine NULL -dedup_wf_001::b6b8a3a1cccbee459cf3343485efdb12 10.3390/cancers12010236 medical and health sciences health sciences biochemistry & molecular biology -dedup_wf_001::dd06ee7974730e7b09a4f03c83b3f9bd 10.6084/m9.figshare.991261410.6084/m9.figshare.9912614.v110.1080/00268976.2019.1665199 natural sciences chemical sciences physical chemistry -dedup_wf_001::027c78bef6f972b5e26dfea55d30fbe3 10.1175/jpo-d-17-0239.1 natural sciences biological sciences marine biology & hydrobiology -dedup_wf_001::43edc179aa9e1fbaf582c5203b18b519 10.1007/s13218-020-00674-7 engineering and technology industrial biotechnology industrial engineering & automation -dedup_wf_001::e7770e11cd6eb514bb52c07b5a8a80f0 10.1016/j.psyneuen.2016.02.00310.1016/j.psyneuen.2016.02.00310.7892/boris.7888610.7892/boris.78886 medical and health sciences basic medicine NULL -dedup_wf_001::80bc15d69bdc589149631f3439dde5aa 10.1109/ted.2018.2813542 engineering and technology electrical engineering, electronic engineering, information engineering electrical & electronic engineering -dedup_wf_001::42c1cfa33e7872944b920cff90f4d99e 10.3989/scimar.04739.25a natural sciences biological sciences NULL -dedup_wf_001::9bacdbbaa9da3658b7243d5de8e3ce14 10.3390/su12187503 natural sciences earth and related environmental sciences NULL -dedup_wf_001::59e43d3527dcfecb6097fbd5740c8950 10.1016/j.ccell.2018.08.017 medical and health sciences basic medicine biochemistry & molecular biology -doiboost____::e024d1b738df3b24bc58fa0228542571 10.1103/physrevresearch.2.023322 natural sciences physical sciences nuclear & particles physics -dedup_wf_001::66e9a3237fa8178886d26d3c2d5b9e66 10.1039/c8cp03234c natural sciences NULL NULL -dedup_wf_001::83737ab4205bae751571bb3b166efa18 10.5281/zenodo.369655710.5281/zenodo.369655610.1109/jsac.2016.2545384 engineering and technology electrical engineering, electronic engineering, information engineering networking & telecommunications -dedup_wf_001::e3f892db413a689e572dd256acad55fe 10.1038/ng.366710.1038/ng.3667.10.17615/tct6-4m2610.17863/cam.15649 medical and health sciences health sciences genetics & heredity -dedup_wf_001::14ba594e8fd081847bc3f50f56335003 10.1016/j.jclepro.2019.119065 engineering and technology other engineering and technologies building & construction -dedup_wf_001::08ac7b33a41bcea2d055ecd8585d632e 10.1111/pce.13392 agricultural and veterinary sciences agriculture, forestry, and fisheries agronomy & agriculture \ No newline at end of file