[SDG] logic to create unresolved entities out of SDG input. This changes also some classes related to FOS to reuse the same code. The code under createunresolvedentities create results with the merged update of the the inputs provided (bip at the level of the isntance, fos and sdg for subjects)
parent
2a67ee13ec
commit
7a1b440413
@ -0,0 +1,86 @@
|
||||
|
||||
package eu.dnetlib.dhp.actionmanager.createunresolvedentities;
|
||||
|
||||
import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.SDGDataModel;
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.api.java.function.MapGroupsFunction;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.SaveMode;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import static eu.dnetlib.dhp.actionmanager.Constants.*;
|
||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||
|
||||
public class PrepareSDGSparkJob implements Serializable {
|
||||
private static final Logger log = LoggerFactory.getLogger(PrepareSDGSparkJob.class);
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
|
||||
String jsonConfiguration = IOUtils
|
||||
.toString(
|
||||
PrepareSDGSparkJob.class
|
||||
.getResourceAsStream(
|
||||
"/eu/dnetlib/dhp/actionmanager/createunresolvedentities/prepare_parameters.json"));
|
||||
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||
|
||||
parser.parseArgument(args);
|
||||
|
||||
Boolean isSparkSessionManaged = isSparkSessionManaged(parser);
|
||||
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||
|
||||
String sourcePath = parser.get("sourcePath");
|
||||
log.info("sourcePath: {}", sourcePath);
|
||||
|
||||
final String outputPath = parser.get("outputPath");
|
||||
log.info("outputPath: {}", outputPath);
|
||||
|
||||
SparkConf conf = new SparkConf();
|
||||
runWithSparkSession(
|
||||
conf,
|
||||
isSparkSessionManaged,
|
||||
spark -> {
|
||||
doPrepare(
|
||||
spark,
|
||||
sourcePath,
|
||||
|
||||
outputPath);
|
||||
});
|
||||
}
|
||||
|
||||
private static void doPrepare(SparkSession spark, String sourcePath, String outputPath) {
|
||||
Dataset<SDGDataModel> sdgDataset = readPath(spark, sourcePath, SDGDataModel.class);
|
||||
|
||||
|
||||
sdgDataset.groupByKey((MapFunction<SDGDataModel,String>)r -> r.getDoi().toLowerCase(),Encoders.STRING())
|
||||
.mapGroups((MapGroupsFunction<String, SDGDataModel, Result>)(k,it) -> {
|
||||
Result r = new Result();
|
||||
r.setId(DHPUtils.generateUnresolvedIdentifier(k, DOI));
|
||||
SDGDataModel first = it.next();
|
||||
List<StructuredProperty>sbjs = new ArrayList<>();
|
||||
sbjs.add(getSubject(first.getSbj(), SDG_CLASS_ID, SDG_CLASS_NAME, UPDATE_SUBJECT_SDG_CLASS_ID));
|
||||
it.forEachRemaining(s -> sbjs.add(getSubject(s.getSbj(),SDG_CLASS_ID, SDG_CLASS_NAME, UPDATE_SUBJECT_SDG_CLASS_ID)));
|
||||
r.setSubject(sbjs);
|
||||
return r;
|
||||
},Encoders.bean(Result.class))
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(outputPath + "/sdg");
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
@ -1,6 +1,48 @@
|
||||
package eu.dnetlib.dhp.actionmanager.createunresolvedentities.model;
|
||||
|
||||
import com.opencsv.bean.CsvBindByPosition;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
public class SDGDataModel implements Serializable {
|
||||
public class SDGDataModel implements Serializable{
|
||||
|
||||
@CsvBindByPosition(position = 0)
|
||||
// @CsvBindByName(column = "doi")
|
||||
private String doi;
|
||||
|
||||
@CsvBindByPosition(position = 1)
|
||||
// @CsvBindByName(column = "sdg")
|
||||
private String sbj;
|
||||
|
||||
|
||||
public SDGDataModel() {
|
||||
|
||||
}
|
||||
|
||||
public SDGDataModel(String doi, String sbj) {
|
||||
this.doi = doi;
|
||||
this.sbj = sbj;
|
||||
|
||||
}
|
||||
|
||||
public static SDGDataModel newInstance(String d, String sbj) {
|
||||
return new SDGDataModel(d, sbj);
|
||||
}
|
||||
|
||||
public String getDoi() {
|
||||
return doi;
|
||||
}
|
||||
|
||||
public void setDoi(String doi) {
|
||||
this.doi = doi;
|
||||
}
|
||||
|
||||
|
||||
public String getSbj() {
|
||||
return sbj;
|
||||
}
|
||||
|
||||
public void setSbj(String sbj) {
|
||||
this.sbj = sbj;
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,37 @@
|
||||
{"doi":"10.1001/amaguidesnewsletters.2019.mayjun02","sbj":"10. No inequality"}
|
||||
{"doi":"10.1001/amaguidesnewsletters.2019.novdec01","sbj":"10. No inequality"}
|
||||
{"doi":"10.1001/amaguidesnewsletters.2019.sepoct02","sbj":"3. Good health"}
|
||||
{"doi":"10.1001/amaguidesnewsletters.2019.sepoct02","sbj":"8. Economic growth"}
|
||||
{"doi":"10.1001/amaguidesnewsletters.2020.janfeb01","sbj":"8. Economic growth"}
|
||||
{"doi":"10.1001/amaguidesnewsletters.2020.janfeb02","sbj":"3. Good health"}
|
||||
{"doi":"10.1001/amaguidesnewsletters.2020.janfeb02","sbj":"8. Economic growth"}
|
||||
{"doi":"10.1001/amaguidesnewsletters.2020.julaug01","sbj":"3. Good health"}
|
||||
{"doi":"10.1001/amaguidesnewsletters.2020.marapr01","sbj":"3. Good health"}
|
||||
{"doi":"10.1001/amaguidesnewsletters.2020.mayjun01","sbj":"3. Good health"}
|
||||
{"doi":"10.1001/amaguidesnewsletters.2020.mayjun02","sbj":"16. Peace & justice"}
|
||||
{"doi":"10.1001/amaguidesnewsletters.2020.mayjun02","sbj":"10. No inequality"}
|
||||
{"doi":"10.1001/amaguidesnewsletters.2021.julaug01","sbj":"1. No poverty"}
|
||||
{"doi":"10.1001/amaguidesnewsletters.2021.mayjune01","sbj":"10. No inequality"}
|
||||
{"doi":"10.1001/amaguidesnewsletters.2021.mayjune02","sbj":"10. No inequality"}
|
||||
{"doi":"10.4336/2021.pfb.41e201902078","sbj":"15. Life on land"}
|
||||
{"doi":"10.4337/ejeep.2019.00045","sbj":"16. Peace & justice"}
|
||||
{"doi":"10.4337/ejeep.2019.00050","sbj":"1. No poverty"}
|
||||
{"doi":"10.4337/ejeep.2019.0045","sbj":"16. Peace & justice"}
|
||||
{"doi":"10.4337/ejeep.2019.0050","sbj":"1. No poverty"}
|
||||
{"doi":"10.4337/ejeep.2019.0051","sbj":"16. Peace & justice"}
|
||||
{"doi":"10.4337/ejeep.2019.0052","sbj":"16. Peace & justice"}
|
||||
{"doi":"10.4337/ejeep.2020.0058","sbj":"1. No poverty"}
|
||||
{"doi":"10.4337/ejeep.2020.0058","sbj":"10. No inequality"}
|
||||
{"doi":"10.4337/ejeep.2020.0060","sbj":"10. No inequality"}
|
||||
{"doi":"10.4337/ejeep.2020.0065","sbj":"16. Peace & justice"}
|
||||
{"doi":"10.4337/ejeep.2020.02.03","sbj":"16. Peace & justice"}
|
||||
{"doi":"10.4337/ejeep.2020.02.05","sbj":"8. Economic growth"}
|
||||
{"doi":"10.4337/ejeep.2020.02.06","sbj":"16. Peace & justice"}
|
||||
{"doi":"10.4337/ejeep.2020.02.09","sbj":"16. Peace & justice"}
|
||||
{"doi":"10.4337/roke.2020.01.01","sbj":"16. Peace & justice"}
|
||||
{"doi":"10.4337/roke.2020.01.03","sbj":"16. Peace & justice"}
|
||||
{"doi":"10.4337/roke.2020.01.05","sbj":"1. No poverty"}
|
||||
{"doi":"10.4337/roke.2020.01.05","sbj":"8. Economic growth"}
|
||||
{"doi":"10.4337/roke.2020.01.07","sbj":"8. Economic growth"}
|
||||
{"doi":"10.4337/roke.2020.02.03","sbj":"8. Economic growth"}
|
||||
{"doi":"10.3390/s18072310","sbj":"1. No poverty"}
|
@ -0,0 +1,37 @@
|
||||
10.1001/amaguidesnewsletters.2019.mayjun02,10. No inequality
|
||||
10.1001/amaguidesnewsletters.2019.novdec01,10. No inequality
|
||||
10.1001/amaguidesnewsletters.2019.sepoct02,3. Good health
|
||||
10.1001/amaguidesnewsletters.2019.sepoct02,8. Economic growth
|
||||
10.1001/amaguidesnewsletters.2020.janfeb01,8. Economic growth
|
||||
10.1001/amaguidesnewsletters.2020.janfeb02,3. Good health
|
||||
10.1001/amaguidesnewsletters.2020.janfeb02,8. Economic growth
|
||||
10.1001/amaguidesnewsletters.2020.julaug01,3. Good health
|
||||
10.1001/amaguidesnewsletters.2020.marapr01,3. Good health
|
||||
10.1001/amaguidesnewsletters.2020.mayjun01,3. Good health
|
||||
10.1001/amaguidesnewsletters.2020.mayjun02,16. Peace & justice
|
||||
10.1001/amaguidesnewsletters.2020.mayjun02,10. No inequality
|
||||
10.1001/amaguidesnewsletters.2021.julaug01,1. No poverty
|
||||
10.1001/amaguidesnewsletters.2021.mayjune01,10. No inequality
|
||||
10.1001/amaguidesnewsletters.2021.mayjune02,10. No inequality
|
||||
10.4336/2021.pfb.41e201902078,15. Life on land
|
||||
10.4337/ejeep.2019.00045,16. Peace & justice
|
||||
10.4337/ejeep.2019.00050,1. No poverty
|
||||
10.4337/ejeep.2019.0045,16. Peace & justice
|
||||
10.4337/ejeep.2019.0050,1. No poverty
|
||||
10.4337/ejeep.2019.0051,16. Peace & justice
|
||||
10.4337/ejeep.2019.0052,16. Peace & justice
|
||||
10.4337/ejeep.2020.0058,1. No poverty
|
||||
10.4337/ejeep.2020.0058,10. No inequality
|
||||
10.4337/ejeep.2020.0060,10. No inequality
|
||||
10.4337/ejeep.2020.0065,16. Peace & justice
|
||||
10.4337/ejeep.2020.02.03,16. Peace & justice
|
||||
10.4337/ejeep.2020.02.05,8. Economic growth
|
||||
10.4337/ejeep.2020.02.06,16. Peace & justice
|
||||
10.4337/ejeep.2020.02.09,16. Peace & justice
|
||||
10.4337/roke.2020.01.01,16. Peace & justice
|
||||
10.4337/roke.2020.01.03,16. Peace & justice
|
||||
10.4337/roke.2020.01.05,1. No poverty
|
||||
10.4337/roke.2020.01.05,8. Economic growth
|
||||
10.4337/roke.2020.01.07,8. Economic growth
|
||||
10.4337/roke.2020.02.03,8. Economic growth
|
||||
10.4337/roke.2020.02.04,1. No poverty
|
|
Loading…
Reference in New Issue