1
0
Fork 0

merged from branch beta the bulk tagging (single step, negative constraints), the cleanig worflow (single step, pid type based cleaning), instance level fulltext

This commit is contained in:
Claudio Atzori 2023-06-28 13:33:57 +02:00
parent 4ef0f2ec26
commit f3a85e224b
186 changed files with 16092 additions and 6039 deletions

View File

@ -10,6 +10,12 @@ public class Constants {
public static final Map<String, String> accessRightsCoarMap = Maps.newHashMap(); public static final Map<String, String> accessRightsCoarMap = Maps.newHashMap();
public static final Map<String, String> coarCodeLabelMap = Maps.newHashMap(); public static final Map<String, String> coarCodeLabelMap = Maps.newHashMap();
public static final String ROR_NS_PREFIX = "ror_________";
public static final String ROR_OPENAIRE_ID = "10|openaire____::993a7ae7a863813cf95028b50708e222";
public static final String ROR_DATASOURCE_NAME = "Research Organization Registry (ROR)";
public static String COAR_ACCESS_RIGHT_SCHEMA = "http://vocabularies.coar-repositories.org/documentation/access_rights/"; public static String COAR_ACCESS_RIGHT_SCHEMA = "http://vocabularies.coar-repositories.org/documentation/access_rights/";
private Constants() { private Constants() {

View File

@ -0,0 +1,100 @@
package eu.dnetlib.dhp.common;
/**
* This utility represent the Metadata Store information
* needed during the migration from mongo to HDFS to store
*/
public class MDStoreInfo {
private String mdstore;
private String currentId;
private Long latestTimestamp;
/**
* Instantiates a new Md store info.
*/
public MDStoreInfo() {
}
/**
* Instantiates a new Md store info.
*
* @param mdstore the mdstore
* @param currentId the current id
* @param latestTimestamp the latest timestamp
*/
public MDStoreInfo(String mdstore, String currentId, Long latestTimestamp) {
this.mdstore = mdstore;
this.currentId = currentId;
this.latestTimestamp = latestTimestamp;
}
/**
* Gets mdstore.
*
* @return the mdstore
*/
public String getMdstore() {
return mdstore;
}
/**
* Sets mdstore.
*
* @param mdstore the mdstore
* @return the mdstore
*/
public MDStoreInfo setMdstore(String mdstore) {
this.mdstore = mdstore;
return this;
}
/**
* Gets current id.
*
* @return the current id
*/
public String getCurrentId() {
return currentId;
}
/**
* Sets current id.
*
* @param currentId the current id
* @return the current id
*/
public MDStoreInfo setCurrentId(String currentId) {
this.currentId = currentId;
return this;
}
/**
* Gets latest timestamp.
*
* @return the latest timestamp
*/
public Long getLatestTimestamp() {
return latestTimestamp;
}
/**
* Sets latest timestamp.
*
* @param latestTimestamp the latest timestamp
* @return the latest timestamp
*/
public MDStoreInfo setLatestTimestamp(Long latestTimestamp) {
this.latestTimestamp = latestTimestamp;
return this;
}
@Override
public String toString() {
return "MDStoreInfo{" +
"mdstore='" + mdstore + '\'' +
", currentId='" + currentId + '\'' +
", latestTimestamp=" + latestTimestamp +
'}';
}
}

View File

@ -45,28 +45,17 @@ public class MakeTarArchive implements Serializable {
.map(Integer::valueOf) .map(Integer::valueOf)
.orElse(10); .orElse(10);
final boolean rename = Optional
.ofNullable(parser.get("rename"))
.map(Boolean::valueOf)
.orElse(Boolean.FALSE);
Configuration conf = new Configuration(); Configuration conf = new Configuration();
conf.set("fs.defaultFS", hdfsNameNode); conf.set("fs.defaultFS", hdfsNameNode);
FileSystem fileSystem = FileSystem.get(conf); FileSystem fileSystem = FileSystem.get(conf);
makeTArArchive(fileSystem, inputPath, outputPath, gBperSplit, rename); makeTArArchive(fileSystem, inputPath, outputPath, gBperSplit);
} }
public static void makeTArArchive(FileSystem fileSystem, String inputPath, String outputPath, int gBperSplit) public static void makeTArArchive(FileSystem fileSystem, String inputPath, String outputPath, int gBperSplit)
throws IOException { throws IOException {
makeTArArchive(fileSystem, inputPath, outputPath, gBperSplit, false);
}
public static void makeTArArchive(FileSystem fileSystem, String inputPath, String outputPath, int gBperSplit,
boolean rename)
throws IOException {
RemoteIterator<LocatedFileStatus> dirIterator = fileSystem.listLocatedStatus(new Path(inputPath)); RemoteIterator<LocatedFileStatus> dirIterator = fileSystem.listLocatedStatus(new Path(inputPath));
@ -77,7 +66,7 @@ public class MakeTarArchive implements Serializable {
String pathString = p.toString(); String pathString = p.toString();
String entity = pathString.substring(pathString.lastIndexOf("/") + 1); String entity = pathString.substring(pathString.lastIndexOf("/") + 1);
MakeTarArchive.tarMaxSize(fileSystem, pathString, outputPath + "/" + entity, entity, gBperSplit, rename); MakeTarArchive.tarMaxSize(fileSystem, pathString, outputPath + "/" + entity, entity, gBperSplit);
} }
} }
@ -90,8 +79,7 @@ public class MakeTarArchive implements Serializable {
return new TarArchiveOutputStream(fileSystem.create(hdfsWritePath).getWrappedStream()); return new TarArchiveOutputStream(fileSystem.create(hdfsWritePath).getWrappedStream());
} }
private static void write(FileSystem fileSystem, String inputPath, String outputPath, String dirName, private static void write(FileSystem fileSystem, String inputPath, String outputPath, String dirName)
boolean rename)
throws IOException { throws IOException {
Path hdfsWritePath = new Path(outputPath); Path hdfsWritePath = new Path(outputPath);
@ -107,20 +95,20 @@ public class MakeTarArchive implements Serializable {
new Path(inputPath), true); new Path(inputPath), true);
while (iterator.hasNext()) { while (iterator.hasNext()) {
writeCurrentFile(fileSystem, dirName, iterator, ar, 0, rename); writeCurrentFile(fileSystem, dirName, iterator, ar, 0);
} }
} }
} }
public static void tarMaxSize(FileSystem fileSystem, String inputPath, String outputPath, String dir_name, public static void tarMaxSize(FileSystem fileSystem, String inputPath, String outputPath, String dir_name,
int gBperSplit, boolean rename) throws IOException { int gBperSplit) throws IOException {
final long bytesPerSplit = 1024L * 1024L * 1024L * gBperSplit; final long bytesPerSplit = 1024L * 1024L * 1024L * gBperSplit;
long sourceSize = fileSystem.getContentSummary(new Path(inputPath)).getSpaceConsumed(); long sourceSize = fileSystem.getContentSummary(new Path(inputPath)).getSpaceConsumed();
if (sourceSize < bytesPerSplit) { if (sourceSize < bytesPerSplit) {
write(fileSystem, inputPath, outputPath + ".tar", dir_name, rename); write(fileSystem, inputPath, outputPath + ".tar", dir_name);
} else { } else {
int partNum = 0; int partNum = 0;
@ -133,8 +121,7 @@ public class MakeTarArchive implements Serializable {
long currentSize = 0; long currentSize = 0;
while (next && currentSize < bytesPerSplit) { while (next && currentSize < bytesPerSplit) {
currentSize = writeCurrentFile( currentSize = writeCurrentFile(fileSystem, dir_name, fileStatusListIterator, ar, currentSize);
fileSystem, dir_name, fileStatusListIterator, ar, currentSize, rename);
next = fileStatusListIterator.hasNext(); next = fileStatusListIterator.hasNext();
} }
@ -147,7 +134,7 @@ public class MakeTarArchive implements Serializable {
private static long writeCurrentFile(FileSystem fileSystem, String dirName, private static long writeCurrentFile(FileSystem fileSystem, String dirName,
RemoteIterator<LocatedFileStatus> fileStatusListIterator, RemoteIterator<LocatedFileStatus> fileStatusListIterator,
TarArchiveOutputStream ar, long currentSize, boolean rename) throws IOException { TarArchiveOutputStream ar, long currentSize) throws IOException {
LocatedFileStatus fileStatus = fileStatusListIterator.next(); LocatedFileStatus fileStatus = fileStatusListIterator.next();
Path p = fileStatus.getPath(); Path p = fileStatus.getPath();
@ -161,11 +148,6 @@ public class MakeTarArchive implements Serializable {
} }
name = tmp; name = tmp;
} }
if (rename) {
if (name.endsWith(".txt.gz"))
name = name.replace(".txt.gz", ".json.gz");
}
TarArchiveEntry entry = new TarArchiveEntry(dirName + "/" + name); TarArchiveEntry entry = new TarArchiveEntry(dirName + "/" + name);
entry.setSize(fileStatus.getLen()); entry.setSize(fileStatus.getLen());
currentSize += fileStatus.getLen(); currentSize += fileStatus.getLen();

View File

@ -1,12 +1,12 @@
package eu.dnetlib.dhp.common; package eu.dnetlib.dhp.common;
import static com.mongodb.client.model.Sorts.descending;
import java.io.Closeable; import java.io.Closeable;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.*;
import java.util.HashMap; import java.util.stream.Collectors;
import java.util.Map;
import java.util.Optional;
import java.util.stream.StreamSupport; import java.util.stream.StreamSupport;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
@ -38,6 +38,26 @@ public class MdstoreClient implements Closeable {
this.db = getDb(client, dbName); this.db = getDb(client, dbName);
} }
private Long parseTimestamp(Document f) {
if (f == null || !f.containsKey("timestamp"))
return null;
Object ts = f.get("timestamp");
return Long.parseLong(ts.toString());
}
public Long getLatestTimestamp(final String collectionId) {
MongoCollection<Document> collection = db.getCollection(collectionId);
FindIterable<Document> result = collection.find().sort(descending("timestamp")).limit(1);
if (result == null) {
return null;
}
Document f = result.first();
return parseTimestamp(f);
}
public MongoCollection<Document> mdStore(final String mdId) { public MongoCollection<Document> mdStore(final String mdId) {
BasicDBObject query = (BasicDBObject) QueryBuilder.start("mdId").is(mdId).get(); BasicDBObject query = (BasicDBObject) QueryBuilder.start("mdId").is(mdId).get();
@ -54,6 +74,16 @@ public class MdstoreClient implements Closeable {
return getColl(db, currentId, true); return getColl(db, currentId, true);
} }
public List<MDStoreInfo> mdStoreWithTimestamp(final String mdFormat, final String mdLayout,
final String mdInterpretation) {
Map<String, String> res = validCollections(mdFormat, mdLayout, mdInterpretation);
return res
.entrySet()
.stream()
.map(e -> new MDStoreInfo(e.getKey(), e.getValue(), getLatestTimestamp(e.getValue())))
.collect(Collectors.toList());
}
public Map<String, String> validCollections( public Map<String, String> validCollections(
final String mdFormat, final String mdLayout, final String mdInterpretation) { final String mdFormat, final String mdLayout, final String mdInterpretation) {

View File

@ -9,13 +9,13 @@ import java.util.concurrent.TimeUnit;
import org.apache.http.HttpHeaders; import org.apache.http.HttpHeaders;
import org.apache.http.entity.ContentType; import org.apache.http.entity.ContentType;
import org.jetbrains.annotations.NotNull;
import com.google.gson.Gson; import com.google.gson.Gson;
import eu.dnetlib.dhp.common.api.zenodo.ZenodoModel; import eu.dnetlib.dhp.common.api.zenodo.ZenodoModel;
import eu.dnetlib.dhp.common.api.zenodo.ZenodoModelList; import eu.dnetlib.dhp.common.api.zenodo.ZenodoModelList;
import okhttp3.*; import okhttp3.*;
import org.jetbrains.annotations.NotNull;
public class ZenodoAPIClient implements Serializable { public class ZenodoAPIClient implements Serializable {
@ -155,7 +155,6 @@ public class ZenodoAPIClient implements Serializable {
conn.setDoOutput(true); conn.setDoOutput(true);
conn.setRequestMethod("PUT"); conn.setRequestMethod("PUT");
try (OutputStream os = conn.getOutputStream()) { try (OutputStream os = conn.getOutputStream()) {
byte[] input = metadata.getBytes("utf-8"); byte[] input = metadata.getBytes("utf-8");
os.write(input, 0, input.length); os.write(input, 0, input.length);
@ -169,7 +168,6 @@ public class ZenodoAPIClient implements Serializable {
return responseCode; return responseCode;
} }
private boolean checkOKStatus(int responseCode) { private boolean checkOKStatus(int responseCode) {
@ -233,7 +231,6 @@ public class ZenodoAPIClient implements Serializable {
conn.setDoOutput(true); conn.setDoOutput(true);
conn.setRequestMethod("POST"); conn.setRequestMethod("POST");
try (OutputStream os = conn.getOutputStream()) { try (OutputStream os = conn.getOutputStream()) {
byte[] input = json.getBytes("utf-8"); byte[] input = json.getBytes("utf-8");
os.write(input, 0, input.length); os.write(input, 0, input.length);
@ -296,7 +293,6 @@ public class ZenodoAPIClient implements Serializable {
ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class); ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class);
bucket = zenodoModel.getLinks().getBucket(); bucket = zenodoModel.getLinks().getBucket();
return responseCode; return responseCode;
} }
@ -331,8 +327,6 @@ public class ZenodoAPIClient implements Serializable {
conn.setDoOutput(true); conn.setDoOutput(true);
conn.setRequestMethod("GET"); conn.setRequestMethod("GET");
String body = getBody(conn); String body = getBody(conn);
int responseCode = conn.getResponseCode(); int responseCode = conn.getResponseCode();
@ -341,12 +335,8 @@ public class ZenodoAPIClient implements Serializable {
if (!checkOKStatus(responseCode)) if (!checkOKStatus(responseCode))
throw new IOException("Unexpected code " + responseCode + body); throw new IOException("Unexpected code " + responseCode + body);
return body; return body;
} }
private String getBucket(String inputUurl) throws IOException { private String getBucket(String inputUurl) throws IOException {
@ -370,8 +360,6 @@ public class ZenodoAPIClient implements Serializable {
return zenodoModel.getLinks().getBucket(); return zenodoModel.getLinks().getBucket();
} }
} }

View File

@ -4,6 +4,7 @@ package eu.dnetlib.dhp.common.vocabulary;
import java.io.Serializable; import java.io.Serializable;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map; import java.util.Map;
import java.util.Objects;
import java.util.Optional; import java.util.Optional;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
@ -66,21 +67,39 @@ public class Vocabulary implements Serializable {
} }
public Qualifier getTermAsQualifier(final String termId) { public Qualifier getTermAsQualifier(final String termId) {
if (StringUtils.isBlank(termId)) { return getTermAsQualifier(termId, false);
}
public Qualifier getTermAsQualifier(final String termId, boolean strict) {
final VocabularyTerm term = getTerm(termId);
if (Objects.nonNull(term)) {
return OafMapperUtils.qualifier(term.getId(), term.getName(), getId(), getName());
} else if (Objects.isNull(term) && strict) {
return OafMapperUtils.unknown(getId(), getName()); return OafMapperUtils.unknown(getId(), getName());
} else if (termExists(termId)) {
final VocabularyTerm t = getTerm(termId);
return OafMapperUtils.qualifier(t.getId(), t.getName(), getId(), getName());
} else { } else {
return OafMapperUtils.qualifier(termId, termId, getId(), getName()); return OafMapperUtils.qualifier(termId, termId, getId(), getName());
} }
} }
public Qualifier getSynonymAsQualifier(final String syn) { public Qualifier getSynonymAsQualifier(final String syn) {
return getSynonymAsQualifier(syn, false);
}
public Qualifier getSynonymAsQualifier(final String syn, boolean strict) {
return Optional return Optional
.ofNullable(getTermBySynonym(syn)) .ofNullable(getTermBySynonym(syn))
.map(term -> getTermAsQualifier(term.getId())) .map(term -> getTermAsQualifier(term.getId(), strict))
.orElse(null); .orElse(null);
} }
public Qualifier lookup(String id) {
return lookup(id, false);
}
public Qualifier lookup(String id, boolean strict) {
return Optional
.ofNullable(getSynonymAsQualifier(id, strict))
.orElse(getTermAsQualifier(id, strict));
}
} }

View File

@ -81,6 +81,13 @@ public class VocabularyGroup implements Serializable {
vocs.put(id.toLowerCase(), new Vocabulary(id, name)); vocs.put(id.toLowerCase(), new Vocabulary(id, name));
} }
public Optional<Vocabulary> find(final String vocId) {
return Optional
.ofNullable(vocId)
.map(String::toLowerCase)
.map(vocs::get);
}
public void addTerm(final String vocId, final String id, final String name) { public void addTerm(final String vocId, final String id, final String name) {
if (vocabularyExists(vocId)) { if (vocabularyExists(vocId)) {
vocs.get(vocId.toLowerCase()).addTerm(id, name); vocs.get(vocId.toLowerCase()).addTerm(id, name);

View File

@ -0,0 +1,14 @@
package eu.dnetlib.dhp.schema.oaf.utils;
public class DoiCleaningRule {
public static String clean(final String doi) {
return doi
.toLowerCase()
.replaceAll("\\s", "")
.replaceAll("^doi:", "")
.replaceFirst(CleaningFunctions.DOI_PREFIX_REGEX, CleaningFunctions.DOI_PREFIX);
}
}

View File

@ -0,0 +1,23 @@
package eu.dnetlib.dhp.schema.oaf.utils;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class FundRefCleaningRule {
public static String clean(final String fundrefId) {
String s = fundrefId
.toLowerCase()
.replaceAll("\\s", "");
Matcher m = Pattern.compile("\\d+").matcher(s);
if (m.matches()) {
return m.group();
} else {
return "";
}
}
}

View File

@ -1,6 +1,8 @@
package eu.dnetlib.dhp.schema.oaf.utils; package eu.dnetlib.dhp.schema.oaf.utils;
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.getProvenance;
import java.time.LocalDate; import java.time.LocalDate;
import java.time.ZoneId; import java.time.ZoneId;
import java.time.format.DateTimeFormatter; import java.time.format.DateTimeFormatter;
@ -11,12 +13,15 @@ import java.util.stream.Collectors;
import java.util.stream.Stream; import java.util.stream.Stream;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Encoders;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.github.sisyphsu.dateparser.DateParserUtils; import com.github.sisyphsu.dateparser.DateParserUtils;
import com.google.common.collect.Lists; import com.google.common.collect.Lists;
import com.google.common.collect.Sets; import com.google.common.collect.Sets;
import eu.dnetlib.dhp.common.vocabulary.Vocabulary;
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.common.ModelSupport;
@ -35,20 +40,137 @@ public class GraphCleaningFunctions extends CleaningFunctions {
public static final int TITLE_FILTER_RESIDUAL_LENGTH = 5; public static final int TITLE_FILTER_RESIDUAL_LENGTH = 5;
public static <T extends Oaf> T fixVocabularyNames(T value) { public static <T extends Oaf> T cleanContext(T value, String contextId, String verifyParam) {
if (value instanceof Datasource) { if (ModelSupport.isSubClass(value, Result.class)) {
// nothing to clean here final Result res = (Result) value;
} else if (value instanceof Project) { if (shouldCleanContext(res, verifyParam)) {
// nothing to clean here res
} else if (value instanceof Organization) { .setContext(
Organization o = (Organization) value; res
if (Objects.nonNull(o.getCountry())) { .getContext()
fixVocabName(o.getCountry(), ModelConstants.DNET_COUNTRY_TYPE); .stream()
.filter(c -> !StringUtils.startsWith(c.getId().toLowerCase(), contextId))
.collect(Collectors.toList()));
}
return (T) res;
} else {
return value;
}
} }
} else if (value instanceof Relation) {
// nothing to clean here
} else if (value instanceof Result) {
private static boolean shouldCleanContext(Result res, String verifyParam) {
boolean titleMatch = res
.getTitle()
.stream()
.filter(
t -> t
.getQualifier()
.getClassid()
.equalsIgnoreCase(ModelConstants.MAIN_TITLE_QUALIFIER.getClassid()))
.anyMatch(t -> t.getValue().toLowerCase().startsWith(verifyParam.toLowerCase()));
return titleMatch && Objects.nonNull(res.getContext());
}
public static <T extends Oaf> T cleanCountry(T value, String[] verifyParam, Set<String> hostedBy,
String collectedfrom, String country) {
if (ModelSupport.isSubClass(value, Result.class)) {
final Result res = (Result) value;
if (res.getInstance().stream().anyMatch(i -> hostedBy.contains(i.getHostedby().getKey())) ||
!res.getCollectedfrom().stream().anyMatch(cf -> cf.getValue().equals(collectedfrom))) {
return (T) res;
}
List<StructuredProperty> ids = getPidsAndAltIds(res).collect(Collectors.toList());
if (ids
.stream()
.anyMatch(
p -> p
.getQualifier()
.getClassid()
.equals(PidType.doi.toString()) && pidInParam(p.getValue(), verifyParam))) {
res
.setCountry(
res
.getCountry()
.stream()
.filter(
c -> toTakeCountry(c, country))
.collect(Collectors.toList()));
}
return (T) res;
} else {
return value;
}
}
private static <T extends Result> Stream<StructuredProperty> getPidsAndAltIds(T r) {
final Stream<StructuredProperty> resultPids = Optional
.ofNullable(r.getPid())
.map(Collection::stream)
.orElse(Stream.empty());
final Stream<StructuredProperty> instancePids = Optional
.ofNullable(r.getInstance())
.map(
instance -> instance
.stream()
.flatMap(
i -> Optional
.ofNullable(i.getPid())
.map(Collection::stream)
.orElse(Stream.empty())))
.orElse(Stream.empty());
final Stream<StructuredProperty> instanceAltIds = Optional
.ofNullable(r.getInstance())
.map(
instance -> instance
.stream()
.flatMap(
i -> Optional
.ofNullable(i.getAlternateIdentifier())
.map(Collection::stream)
.orElse(Stream.empty())))
.orElse(Stream.empty());
return Stream
.concat(
Stream.concat(resultPids, instancePids),
instanceAltIds);
}
private static boolean pidInParam(String value, String[] verifyParam) {
for (String s : verifyParam)
if (value.startsWith(s))
return true;
return false;
}
private static boolean toTakeCountry(Country c, String country) {
// If dataInfo is not set, or dataInfo.inferenceprovenance is not set or not present then it cannot be
// inserted via propagation
if (!Optional.ofNullable(c.getDataInfo()).isPresent())
return true;
if (!Optional.ofNullable(c.getDataInfo().getInferenceprovenance()).isPresent())
return true;
return !(c
.getClassid()
.equalsIgnoreCase(country) &&
c.getDataInfo().getInferenceprovenance().equals("propagation"));
}
public static <T extends Oaf> T fixVocabularyNames(T value) {
if (value instanceof OafEntity) {
OafEntity e = (OafEntity) value;
Optional
.ofNullable(e.getPid())
.ifPresent(pid -> pid.forEach(p -> fixVocabName(p.getQualifier(), ModelConstants.DNET_PID_TYPES)));
if (value instanceof Result) {
Result r = (Result) value; Result r = (Result) value;
fixVocabName(r.getLanguage(), ModelConstants.DNET_LANGUAGES); fixVocabName(r.getLanguage(), ModelConstants.DNET_LANGUAGES);
@ -62,6 +184,11 @@ public class GraphCleaningFunctions extends CleaningFunctions {
for (Instance i : r.getInstance()) { for (Instance i : r.getInstance()) {
fixVocabName(i.getAccessright(), ModelConstants.DNET_ACCESS_MODES); fixVocabName(i.getAccessright(), ModelConstants.DNET_ACCESS_MODES);
fixVocabName(i.getRefereed(), ModelConstants.DNET_REVIEW_LEVELS); fixVocabName(i.getRefereed(), ModelConstants.DNET_REVIEW_LEVELS);
Optional
.ofNullable(i.getPid())
.ifPresent(
pid -> pid.forEach(p -> fixVocabName(p.getQualifier(), ModelConstants.DNET_PID_TYPES)));
} }
} }
if (Objects.nonNull(r.getAuthor())) { if (Objects.nonNull(r.getAuthor())) {
@ -82,6 +209,19 @@ public class GraphCleaningFunctions extends CleaningFunctions {
} else if (value instanceof Software) { } else if (value instanceof Software) {
} }
} else if (value instanceof Datasource) {
// nothing to clean here
} else if (value instanceof Project) {
// nothing to clean here
} else if (value instanceof Organization) {
Organization o = (Organization) value;
if (Objects.nonNull(o.getCountry())) {
fixVocabName(o.getCountry(), ModelConstants.DNET_COUNTRY_TYPE);
}
}
} else if (value instanceof Relation) {
// nothing to clean here
} }
return value; return value;
@ -134,6 +274,14 @@ public class GraphCleaningFunctions extends CleaningFunctions {
} }
public static <T extends Oaf> T cleanup(T value, VocabularyGroup vocs) { public static <T extends Oaf> T cleanup(T value, VocabularyGroup vocs) {
if (value instanceof OafEntity) {
OafEntity e = (OafEntity) value;
if (Objects.nonNull(e.getPid())) {
e.setPid(processPidCleaning(e.getPid()));
}
if (value instanceof Datasource) { if (value instanceof Datasource) {
// nothing to clean here // nothing to clean here
} else if (value instanceof Project) { } else if (value instanceof Project) {
@ -143,19 +291,7 @@ public class GraphCleaningFunctions extends CleaningFunctions {
if (Objects.isNull(o.getCountry()) || StringUtils.isBlank(o.getCountry().getClassid())) { if (Objects.isNull(o.getCountry()) || StringUtils.isBlank(o.getCountry().getClassid())) {
o.setCountry(ModelConstants.UNKNOWN_COUNTRY); o.setCountry(ModelConstants.UNKNOWN_COUNTRY);
} }
} else if (value instanceof Relation) {
Relation r = (Relation) value;
Optional<String> validationDate = doCleanDate(r.getValidationDate());
if (validationDate.isPresent()) {
r.setValidationDate(validationDate.get());
r.setValidated(true);
} else {
r.setValidationDate(null);
r.setValidated(false);
}
} else if (value instanceof Result) { } else if (value instanceof Result) {
Result r = (Result) value; Result r = (Result) value;
if (Objects.nonNull(r.getDateofacceptance())) { if (Objects.nonNull(r.getDateofacceptance())) {
@ -191,8 +327,8 @@ public class GraphCleaningFunctions extends CleaningFunctions {
qualifier("und", "Undetermined", ModelConstants.DNET_LANGUAGES)); qualifier("und", "Undetermined", ModelConstants.DNET_LANGUAGES));
} }
if (Objects.nonNull(r.getSubject())) { if (Objects.nonNull(r.getSubject())) {
r List<Subject> subjects = Lists
.setSubject( .newArrayList(
r r
.getSubject() .getSubject()
.stream() .stream()
@ -200,8 +336,26 @@ public class GraphCleaningFunctions extends CleaningFunctions {
.filter(sp -> StringUtils.isNotBlank(sp.getValue())) .filter(sp -> StringUtils.isNotBlank(sp.getValue()))
.filter(sp -> Objects.nonNull(sp.getQualifier())) .filter(sp -> Objects.nonNull(sp.getQualifier()))
.filter(sp -> StringUtils.isNotBlank(sp.getQualifier().getClassid())) .filter(sp -> StringUtils.isNotBlank(sp.getQualifier().getClassid()))
.map(s -> {
if ("dnet:result_subject".equals(s.getQualifier().getClassid())) {
s.getQualifier().setClassid(ModelConstants.DNET_SUBJECT_TYPOLOGIES);
s.getQualifier().setClassname(ModelConstants.DNET_SUBJECT_TYPOLOGIES);
}
return s;
})
.map(GraphCleaningFunctions::cleanValue) .map(GraphCleaningFunctions::cleanValue)
.collect(Collectors.toList())); .collect(
Collectors
.toMap(
s -> Optional
.ofNullable(s.getQualifier())
.map(q -> q.getClassid() + s.getValue())
.orElse(s.getValue()),
Function.identity(),
(s1, s2) -> Collections
.min(Lists.newArrayList(s1, s2), new SubjectProvenanceComparator())))
.values());
r.setSubject(subjects);
} }
if (Objects.nonNull(r.getTitle())) { if (Objects.nonNull(r.getTitle())) {
r r
@ -250,9 +404,6 @@ public class GraphCleaningFunctions extends CleaningFunctions {
.map(GraphCleaningFunctions::cleanValue) .map(GraphCleaningFunctions::cleanValue)
.collect(Collectors.toList())); .collect(Collectors.toList()));
} }
if (Objects.nonNull(r.getPid())) {
r.setPid(processPidCleaning(r.getPid()));
}
if (Objects.isNull(r.getResourcetype()) || StringUtils.isBlank(r.getResourcetype().getClassid())) { if (Objects.isNull(r.getResourcetype()) || StringUtils.isBlank(r.getResourcetype().getClassid())) {
r r
.setResourcetype( .setResourcetype(
@ -261,13 +412,15 @@ public class GraphCleaningFunctions extends CleaningFunctions {
if (Objects.nonNull(r.getInstance())) { if (Objects.nonNull(r.getInstance())) {
for (Instance i : r.getInstance()) { for (Instance i : r.getInstance()) {
if (!vocs.termExists(ModelConstants.DNET_PUBLICATION_RESOURCE, i.getInstancetype().getClassid())) { if (!vocs
.termExists(ModelConstants.DNET_PUBLICATION_RESOURCE, i.getInstancetype().getClassid())) {
if (r instanceof Publication) { if (r instanceof Publication) {
i i
.setInstancetype( .setInstancetype(
OafMapperUtils OafMapperUtils
.qualifier( .qualifier(
"0038", "Other literature type", ModelConstants.DNET_PUBLICATION_RESOURCE, "0038", "Other literature type",
ModelConstants.DNET_PUBLICATION_RESOURCE,
ModelConstants.DNET_PUBLICATION_RESOURCE)); ModelConstants.DNET_PUBLICATION_RESOURCE));
} else if (r instanceof Dataset) { } else if (r instanceof Dataset) {
i i
@ -311,7 +464,8 @@ public class GraphCleaningFunctions extends CleaningFunctions {
}); });
}); });
if (Objects.isNull(i.getAccessright()) || StringUtils.isBlank(i.getAccessright().getClassid())) { if (Objects.isNull(i.getAccessright())
|| StringUtils.isBlank(i.getAccessright().getClassid())) {
i i
.setAccessright( .setAccessright(
accessRight( accessRight(
@ -321,7 +475,7 @@ public class GraphCleaningFunctions extends CleaningFunctions {
if (Objects.isNull(i.getHostedby()) || StringUtils.isBlank(i.getHostedby().getKey())) { if (Objects.isNull(i.getHostedby()) || StringUtils.isBlank(i.getHostedby().getKey())) {
i.setHostedby(ModelConstants.UNKNOWN_REPOSITORY); i.setHostedby(ModelConstants.UNKNOWN_REPOSITORY);
} }
if (Objects.isNull(i.getRefereed())) { if (Objects.isNull(i.getRefereed()) || StringUtils.isBlank(i.getRefereed().getClassid())) {
i.setRefereed(qualifier("0000", "Unknown", ModelConstants.DNET_REVIEW_LEVELS)); i.setRefereed(qualifier("0000", "Unknown", ModelConstants.DNET_REVIEW_LEVELS));
} }
if (Objects.nonNull(i.getDateofacceptance())) { if (Objects.nonNull(i.getDateofacceptance())) {
@ -334,7 +488,8 @@ public class GraphCleaningFunctions extends CleaningFunctions {
} }
} }
} }
if (Objects.isNull(r.getBestaccessright()) || StringUtils.isBlank(r.getBestaccessright().getClassid())) { if (Objects.isNull(r.getBestaccessright())
|| StringUtils.isBlank(r.getBestaccessright().getClassid())) {
Qualifier bestaccessrights = OafMapperUtils.createBestAccessRights(r.getInstance()); Qualifier bestaccessrights = OafMapperUtils.createBestAccessRights(r.getInstance());
if (Objects.isNull(bestaccessrights)) { if (Objects.isNull(bestaccessrights)) {
r r
@ -382,14 +537,7 @@ public class GraphCleaningFunctions extends CleaningFunctions {
.filter(p -> StringUtils.isNotBlank(p.getValue())) .filter(p -> StringUtils.isNotBlank(p.getValue()))
.map(p -> { .map(p -> {
// hack to distinguish orcid from orcid_pending // hack to distinguish orcid from orcid_pending
String pidProvenance = Optional String pidProvenance = getProvenance(p.getDataInfo());
.ofNullable(p.getDataInfo())
.map(
d -> Optional
.ofNullable(d.getProvenanceaction())
.map(Qualifier::getClassid)
.orElse(""))
.orElse("");
if (p if (p
.getQualifier() .getQualifier()
.getClassid() .getClassid()
@ -437,6 +585,20 @@ public class GraphCleaningFunctions extends CleaningFunctions {
} else if (value instanceof Software) { } else if (value instanceof Software) {
} }
}
} else if (value instanceof Relation) {
Relation r = (Relation) value;
Optional<String> validationDate = doCleanDate(r.getValidationDate());
if (validationDate.isPresent()) {
r.setValidationDate(validationDate.get());
r.setValidated(true);
} else {
r.setValidationDate(null);
r.setValidated(false);
}
} }
return value; return value;
@ -491,7 +653,7 @@ public class GraphCleaningFunctions extends CleaningFunctions {
.filter(sp -> !PID_BLACKLIST.contains(sp.getValue().trim().toLowerCase())) .filter(sp -> !PID_BLACKLIST.contains(sp.getValue().trim().toLowerCase()))
.filter(sp -> Objects.nonNull(sp.getQualifier())) .filter(sp -> Objects.nonNull(sp.getQualifier()))
.filter(sp -> StringUtils.isNotBlank(sp.getQualifier().getClassid())) .filter(sp -> StringUtils.isNotBlank(sp.getQualifier().getClassid()))
.map(CleaningFunctions::normalizePidValue) .map(PidCleaner::normalizePidValue)
.filter(CleaningFunctions::pidFilter) .filter(CleaningFunctions::pidFilter)
.collect(Collectors.toList()); .collect(Collectors.toList());
} }
@ -520,6 +682,11 @@ public class GraphCleaningFunctions extends CleaningFunctions {
return s; return s;
} }
protected static Subject cleanValue(Subject s) {
s.setValue(s.getValue().replaceAll(CLEANING_REGEX, " "));
return s;
}
protected static Field<String> cleanValue(Field<String> s) { protected static Field<String> cleanValue(Field<String> s) {
s.setValue(s.getValue().replaceAll(CLEANING_REGEX, " ")); s.setValue(s.getValue().replaceAll(CLEANING_REGEX, " "));
return s; return s;

View File

@ -0,0 +1,18 @@
package eu.dnetlib.dhp.schema.oaf.utils;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class GridCleaningRule {
public static String clean(String grid) {
String s = grid
.replaceAll("\\s", "")
.toLowerCase();
Matcher m = Pattern.compile("\\d{4,6}\\.[0-9a-z]{1,2}").matcher(s);
return m.matches() ? "grid." + m.group() : "";
}
}

View File

@ -0,0 +1,19 @@
package eu.dnetlib.dhp.schema.oaf.utils;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
// https://www.wikidata.org/wiki/Property:P213
public class ISNICleaningRule {
public static String clean(final String isni) {
Matcher m = Pattern.compile("([0]{4}) ?([0-9]{4}) ?([0-9]{4}) ?([0-9]{3}[0-9X])").matcher(isni);
if (m.matches()) {
return String.join("", m.group(1), m.group(2), m.group(3), m.group(4));
} else {
return "";
}
}
}

View File

@ -14,6 +14,7 @@ import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import eu.dnetlib.dhp.schema.common.AccessRightComparator; import eu.dnetlib.dhp.schema.common.AccessRightComparator;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.*;
@ -141,7 +142,7 @@ public class OafMapperUtils {
} }
public static Qualifier unknown(final String schemeid, final String schemename) { public static Qualifier unknown(final String schemeid, final String schemename) {
return qualifier("UNKNOWN", "Unknown", schemeid, schemename); return qualifier(UNKNOWN, "Unknown", schemeid, schemename);
} }
public static AccessRight accessRight( public static AccessRight accessRight(
@ -189,6 +190,17 @@ public class OafMapperUtils {
return q; return q;
} }
public static Subject subject(
final String value,
final String classid,
final String classname,
final String schemeid,
final String schemename,
final DataInfo dataInfo) {
return subject(value, qualifier(classid, classname, schemeid, schemename), dataInfo);
}
public static StructuredProperty structuredProperty( public static StructuredProperty structuredProperty(
final String value, final String value,
final String classid, final String classid,
@ -200,6 +212,20 @@ public class OafMapperUtils {
return structuredProperty(value, qualifier(classid, classname, schemeid, schemename), dataInfo); return structuredProperty(value, qualifier(classid, classname, schemeid, schemename), dataInfo);
} }
public static Subject subject(
final String value,
final Qualifier qualifier,
final DataInfo dataInfo) {
if (value == null) {
return null;
}
final Subject s = new Subject();
s.setValue(value);
s.setQualifier(qualifier);
s.setDataInfo(dataInfo);
return s;
}
public static StructuredProperty structuredProperty( public static StructuredProperty structuredProperty(
final String value, final String value,
final Qualifier qualifier, final Qualifier qualifier,
@ -477,4 +503,15 @@ public class OafMapperUtils {
rel.setProperties(properties); rel.setProperties(properties);
return rel; return rel;
} }
public static String getProvenance(DataInfo dataInfo) {
return Optional
.ofNullable(dataInfo)
.map(
d -> Optional
.ofNullable(d.getProvenanceaction())
.map(Qualifier::getClassid)
.orElse(""))
.orElse("");
}
} }

View File

@ -0,0 +1,19 @@
package eu.dnetlib.dhp.schema.oaf.utils;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class PICCleaningRule {
public static String clean(final String pic) {
Matcher m = Pattern.compile("\\d{9}").matcher(pic);
if (m.matches()) {
return m.group();
} else {
return "";
}
}
}

View File

@ -0,0 +1,62 @@
package eu.dnetlib.dhp.schema.oaf.utils;
import java.util.Optional;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
public class PidCleaner {
/**
* Utility method that normalises PID values on a per-type basis.
* @param pid the PID whose value will be normalised.
* @return the PID containing the normalised value.
*/
public static StructuredProperty normalizePidValue(StructuredProperty pid) {
pid
.setValue(
normalizePidValue(
pid.getQualifier().getClassid(),
pid.getValue()));
return pid;
}
public static String normalizePidValue(String pidType, String pidValue) {
String value = Optional
.ofNullable(pidValue)
.map(String::trim)
.orElseThrow(() -> new IllegalArgumentException("PID value cannot be empty"));
switch (pidType) {
// TODO add cleaning for more PID types as needed
// Result
case "doi":
return DoiCleaningRule.clean(value);
case "pmid":
return PmidCleaningRule.clean(value);
case "pmc":
return PmcCleaningRule.clean(value);
case "handle":
case "arXiv":
return value;
// Organization
case "GRID":
return GridCleaningRule.clean(value);
case "ISNI":
return ISNICleaningRule.clean(value);
case "ROR":
return RorCleaningRule.clean(value);
case "PIC":
return PICCleaningRule.clean(value);
case "FundRef":
return FundRefCleaningRule.clean(value);
default:
return value;
}
}
}

View File

@ -0,0 +1,13 @@
package eu.dnetlib.dhp.schema.oaf.utils;
public class PmcCleaningRule {
public static String clean(String pmc) {
String s = pmc
.replaceAll("\\s", "")
.toUpperCase();
return s.matches("^PMC\\d{1,8}$") ? s : "";
}
}

View File

@ -0,0 +1,16 @@
package eu.dnetlib.dhp.schema.oaf.utils;
// https://researchguides.stevens.edu/c.php?g=442331&p=6577176
public class PmidCleaningRule {
public static String clean(String pmid) {
String s = pmid
.toLowerCase()
.replaceAll("\\s", "")
.trim()
.replaceAll("^0+", "");
return s.matches("^\\d{1,8}$") ? s : "";
}
}

View File

@ -0,0 +1,18 @@
package eu.dnetlib.dhp.schema.oaf.utils;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
// https://ror.readme.io/docs/ror-identifier-pattern
public class RorCleaningRule {
public static String clean(String ror) {
String s = ror
.replaceAll("\\s", "")
.toLowerCase();
Matcher m = Pattern.compile("0[a-hj-km-np-tv-z|0-9]{6}[0-9]{2}").matcher(s);
return m.matches() ? "https://ror.org/" + m.group() : "";
}
}

View File

@ -0,0 +1,46 @@
package eu.dnetlib.dhp.schema.oaf.utils;
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.getProvenance;
import static org.apache.commons.lang3.StringUtils.isBlank;
import java.util.Comparator;
import eu.dnetlib.dhp.schema.oaf.Subject;
public class SubjectProvenanceComparator implements Comparator<Subject> {
@Override
public int compare(Subject left, Subject right) {
String lProv = getProvenance(left.getDataInfo());
String rProv = getProvenance(right.getDataInfo());
if (isBlank(lProv) && isBlank(rProv))
return 0;
if (isBlank(lProv))
return 1;
if (isBlank(rProv))
return -1;
if (lProv.equals(rProv))
return 0;
if (lProv.toLowerCase().contains("crosswalk"))
return -1;
if (rProv.toLowerCase().contains("crosswalk"))
return 1;
if (lProv.toLowerCase().contains("user"))
return -1;
if (rProv.toLowerCase().contains("user"))
return 1;
if (lProv.toLowerCase().contains("propagation"))
return -1;
if (rProv.toLowerCase().contains("propagation"))
return 1;
if (lProv.toLowerCase().contains("iis"))
return -1;
if (rProv.toLowerCase().contains("iis"))
return 1;
return 0;
}
}

View File

@ -23,12 +23,6 @@
"paramLongName":"splitSize", "paramLongName":"splitSize",
"paramDescription": "the maximum size of the archive", "paramDescription": "the maximum size of the archive",
"paramRequired": false "paramRequired": false
},
{
"paramName":"rn",
"paramLongName":"rename",
"paramDescription": "if the file has to be renamed",
"paramRequired": false
} }
] ]

View File

@ -0,0 +1,38 @@
package eu.dnetlib.dhp.common;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.List;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
public class MdStoreClientTest {
@Test
@Disabled
public void testMongoCollection() throws IOException {
final MdstoreClient client = new MdstoreClient("mongodb://localhost:27017", "mdstore");
final ObjectMapper mapper = new ObjectMapper();
final List<MDStoreInfo> infos = client.mdStoreWithTimestamp("ODF", "store", "cleaned");
infos.forEach(System.out::println);
final String s = mapper.writeValueAsString(infos);
Path fileName = Paths.get("/Users/sandro/mdstore_info.json");
// Writing into the file
Files.write(fileName, s.getBytes(StandardCharsets.UTF_8));
}
}

View File

@ -1,100 +0,0 @@
package eu.dnetlib.dhp.oa.merge;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.schema.oaf.Author;
import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import eu.dnetlib.pace.util.MapDocumentUtil;
import scala.Tuple2;
class AuthorMergerTest {
private String publicationsBasePath;
private List<List<Author>> authors;
@BeforeEach
public void setUp() throws Exception {
publicationsBasePath = Paths
.get(AuthorMergerTest.class.getResource("/eu/dnetlib/dhp/oa/merge").toURI())
.toFile()
.getAbsolutePath();
authors = readSample(publicationsBasePath + "/publications_with_authors.json", Publication.class)
.stream()
.map(p -> p._2().getAuthor())
.collect(Collectors.toList());
}
@Test
void mergeTest() { // used in the dedup: threshold set to 0.95
for (List<Author> authors1 : authors) {
System.out.println("List " + (authors.indexOf(authors1) + 1));
for (Author author : authors1) {
System.out.println(authorToString(author));
}
}
List<Author> merge = AuthorMerger.merge(authors);
System.out.println("Merge ");
for (Author author : merge) {
System.out.println(authorToString(author));
}
Assertions.assertEquals(7, merge.size());
}
public <T> List<Tuple2<String, T>> readSample(String path, Class<T> clazz) {
List<Tuple2<String, T>> res = new ArrayList<>();
BufferedReader reader;
try {
reader = new BufferedReader(new FileReader(path));
String line = reader.readLine();
while (line != null) {
res
.add(
new Tuple2<>(
MapDocumentUtil.getJPathString("$.id", line),
new ObjectMapper().readValue(line, clazz)));
// read next line
line = reader.readLine();
}
reader.close();
} catch (IOException e) {
e.printStackTrace();
}
return res;
}
public String authorToString(Author a) {
String print = "Fullname = ";
print += a.getFullname() + " pid = [";
if (a.getPid() != null)
for (StructuredProperty sp : a.getPid()) {
print += sp.toComparableString() + " ";
}
print += "]";
return print;
}
}

File diff suppressed because one or more lines are too long

View File

@ -13,6 +13,7 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import eu.dnetlib.dhp.schema.oaf.Subject;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
public class Constants { public class Constants {
@ -59,13 +60,13 @@ public class Constants {
.map((MapFunction<String, R>) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz)); .map((MapFunction<String, R>) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz));
} }
public static StructuredProperty getSubject(String sbj, String classid, String classname, public static Subject getSubject(String sbj, String classid, String classname,
String diqualifierclassid) { String diqualifierclassid) {
if (sbj == null || sbj.equals(NULL)) if (sbj == null || sbj.equals(NULL))
return null; return null;
StructuredProperty sp = new StructuredProperty(); Subject s = new Subject();
sp.setValue(sbj); s.setValue(sbj);
sp s
.setQualifier( .setQualifier(
OafMapperUtils OafMapperUtils
.qualifier( .qualifier(
@ -73,7 +74,7 @@ public class Constants {
classname, classname,
ModelConstants.DNET_SUBJECT_TYPOLOGIES, ModelConstants.DNET_SUBJECT_TYPOLOGIES,
ModelConstants.DNET_SUBJECT_TYPOLOGIES)); ModelConstants.DNET_SUBJECT_TYPOLOGIES));
sp s
.setDataInfo( .setDataInfo(
OafMapperUtils OafMapperUtils
.dataInfo( .dataInfo(
@ -89,7 +90,7 @@ public class Constants {
ModelConstants.DNET_PROVENANCE_ACTIONS), ModelConstants.DNET_PROVENANCE_ACTIONS),
"")); ""));
return sp; return s;
} }
} }

View File

@ -24,6 +24,7 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import eu.dnetlib.dhp.schema.oaf.Subject;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
import eu.dnetlib.dhp.utils.DHPUtils; import eu.dnetlib.dhp.utils.DHPUtils;
@ -79,7 +80,7 @@ public class PrepareFOSSparkJob implements Serializable {
HashSet<String> level3 = new HashSet<>(); HashSet<String> level3 = new HashSet<>();
addLevels(level1, level2, level3, first); addLevels(level1, level2, level3, first);
it.forEachRemaining(v -> addLevels(level1, level2, level3, v)); it.forEachRemaining(v -> addLevels(level1, level2, level3, v));
List<StructuredProperty> sbjs = new ArrayList<>(); List<Subject> sbjs = new ArrayList<>();
level1.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID))); level1.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID)));
level2.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID))); level2.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID)));
level3.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID))); level3.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID)));

View File

@ -24,6 +24,7 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import eu.dnetlib.dhp.schema.oaf.Subject;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
import eu.dnetlib.dhp.utils.DHPUtils; import eu.dnetlib.dhp.utils.DHPUtils;
@ -73,7 +74,7 @@ public class PrepareSDGSparkJob implements Serializable {
Result r = new Result(); Result r = new Result();
r.setId(DHPUtils.generateUnresolvedIdentifier(k, DOI)); r.setId(DHPUtils.generateUnresolvedIdentifier(k, DOI));
SDGDataModel first = it.next(); SDGDataModel first = it.next();
List<StructuredProperty> sbjs = new ArrayList<>(); List<Subject> sbjs = new ArrayList<>();
sbjs.add(getSubject(first.getSbj(), SDG_CLASS_ID, SDG_CLASS_NAME, UPDATE_SUBJECT_SDG_CLASS_ID)); sbjs.add(getSubject(first.getSbj(), SDG_CLASS_ID, SDG_CLASS_NAME, UPDATE_SUBJECT_SDG_CLASS_ID));
it it
.forEachRemaining( .forEachRemaining(

View File

@ -114,10 +114,10 @@ public class CreateActionSetSparkJob implements Serializable {
if (!citing.equals(cited)) { if (!citing.equals(cited)) {
relationList relationList
.addAll( .add(
getRelations( getRelation(
citing, citing,
cited)); cited, ModelConstants.CITES));
if (duplicate && value.getCiting().endsWith(".refs")) { if (duplicate && value.getCiting().endsWith(".refs")) {
citing = ID_PREFIX + IdentifierFactory citing = ID_PREFIX + IdentifierFactory
@ -125,7 +125,7 @@ public class CreateActionSetSparkJob implements Serializable {
CleaningFunctions CleaningFunctions
.normalizePidValue( .normalizePidValue(
"doi", value.getCiting().substring(0, value.getCiting().indexOf(".refs")))); "doi", value.getCiting().substring(0, value.getCiting().indexOf(".refs"))));
relationList.addAll(getRelations(citing, cited)); relationList.add(getRelation(citing, cited, ModelConstants.CITES));
} }
} }

View File

@ -266,11 +266,15 @@ public class PrepareProgramme {
String code = csvProgramme.getCode(); String code = csvProgramme.getCode();
if (!code.endsWith(".") && !code.contains("Euratom") if (!code.endsWith(".") && !code.contains("Euratom")
&& !code.equals("H2020-EC")) && !code.equals("H2020-EC") && !code.equals("H2020") &&
!code.equals("H2020-Topics"))
code += "."; code += ".";
if (map.containsKey(code)) {
csvProgramme.setClassification(map.get(code)._1()); csvProgramme.setClassification(map.get(code)._1());
csvProgramme.setClassification_short(map.get(code)._2()); csvProgramme.setClassification_short(map.get(code)._2());
} else
log.info("WARNING: No entry in map for code " + code);
return csvProgramme; return csvProgramme;
}).collect(); }).collect();

View File

@ -3,12 +3,23 @@ package eu.dnetlib.dhp.actionmanager.project;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.BufferedOutputStream;
import java.io.IOException;
import java.util.*; import java.util.*;
import java.util.zip.GZIPOutputStream;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FlatMapFunction; import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.rdd.RDD;
import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SaveMode;
@ -19,6 +30,7 @@ import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProject; import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProject;
import eu.dnetlib.dhp.actionmanager.project.utils.model.Project;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.common.HdfsSupport;
import scala.Tuple2; import scala.Tuple2;
@ -54,6 +66,9 @@ public class PrepareProjects {
final String projectPath = parser.get("projectPath"); final String projectPath = parser.get("projectPath");
log.info("projectPath {}: ", projectPath); log.info("projectPath {}: ", projectPath);
final String workingPath = parser.get("workingPath");
log.info("workingPath {}: ", workingPath);
final String outputPath = parser.get("outputPath"); final String outputPath = parser.get("outputPath");
log.info("outputPath {}: ", outputPath); log.info("outputPath {}: ", outputPath);
@ -76,7 +91,7 @@ public class PrepareProjects {
} }
private static void exec(SparkSession spark, String projectPath, String dbProjectPath, String outputPath) { private static void exec(SparkSession spark, String projectPath, String dbProjectPath, String outputPath) {
Dataset<CSVProject> project = readPath(spark, projectPath, CSVProject.class); Dataset<Project> project = readPath(spark, projectPath, Project.class);
Dataset<ProjectSubset> dbProjects = readPath(spark, dbProjectPath, ProjectSubset.class); Dataset<ProjectSubset> dbProjects = readPath(spark, dbProjectPath, ProjectSubset.class);
dbProjects dbProjects
@ -90,14 +105,14 @@ public class PrepareProjects {
} }
private static FlatMapFunction<Tuple2<ProjectSubset, CSVProject>, CSVProject> getTuple2CSVProjectFlatMapFunction() { private static FlatMapFunction<Tuple2<ProjectSubset, Project>, CSVProject> getTuple2CSVProjectFlatMapFunction() {
return value -> { return value -> {
Optional<CSVProject> csvProject = Optional.ofNullable(value._2());
List<CSVProject> csvProjectList = new ArrayList<>(); List<CSVProject> csvProjectList = new ArrayList<>();
if (csvProject.isPresent()) { if (Optional.ofNullable(value._2()).isPresent()) {
Project project = value._2();
String[] programme = csvProject.get().getProgramme().split(";"); String[] programme = project.getLegalBasis().split(";");
String topic = csvProject.get().getTopics(); String topic = project.getTopics();
Arrays Arrays
.stream(programme) .stream(programme)
@ -106,7 +121,7 @@ public class PrepareProjects {
proj.setTopics(topic); proj.setTopics(topic);
proj.setProgramme(p); proj.setProgramme(p);
proj.setId(csvProject.get().getId()); proj.setId(project.getId());
csvProjectList.add(proj); csvProjectList.add(proj);
}); });
} }

View File

@ -24,6 +24,7 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProgramme; import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProgramme;
import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProject; import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProject;
import eu.dnetlib.dhp.actionmanager.project.utils.model.EXCELTopic; import eu.dnetlib.dhp.actionmanager.project.utils.model.EXCELTopic;
import eu.dnetlib.dhp.actionmanager.project.utils.model.JsonTopic;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.schema.action.AtomicAction; import eu.dnetlib.dhp.schema.action.AtomicAction;
@ -110,7 +111,7 @@ public class SparkAtomicActionJob {
Dataset<CSVProject> project = readPath(spark, projectPatH, CSVProject.class); Dataset<CSVProject> project = readPath(spark, projectPatH, CSVProject.class);
Dataset<CSVProgramme> programme = readPath(spark, programmePath, CSVProgramme.class); Dataset<CSVProgramme> programme = readPath(spark, programmePath, CSVProgramme.class);
Dataset<EXCELTopic> topic = readPath(spark, topicPath, EXCELTopic.class); Dataset<JsonTopic> topic = readPath(spark, topicPath, JsonTopic.class);
Dataset<Project> aaproject = project Dataset<Project> aaproject = project
.joinWith(programme, project.col("programme").equalTo(programme.col("code")), "left") .joinWith(programme, project.col("programme").equalTo(programme.col("code")), "left")
@ -124,9 +125,7 @@ public class SparkAtomicActionJob {
Project pp = new Project(); Project pp = new Project();
pp pp
.setId( .setId(
createOpenaireId( csvProject.getId());
ModelSupport.entityIdPrefix.get("project"),
"corda__h2020", csvProject.getId()));
pp.setH2020topiccode(csvProject.getTopics()); pp.setH2020topiccode(csvProject.getTopics());
H2020Programme pm = new H2020Programme(); H2020Programme pm = new H2020Programme();
H2020Classification h2020classification = new H2020Classification(); H2020Classification h2020classification = new H2020Classification();
@ -144,10 +143,15 @@ public class SparkAtomicActionJob {
.filter(Objects::nonNull); .filter(Objects::nonNull);
aaproject aaproject
.joinWith(topic, aaproject.col("h2020topiccode").equalTo(topic.col("code")), "left") .joinWith(topic, aaproject.col("id").equalTo(topic.col("projectID")), "left")
.map((MapFunction<Tuple2<Project, EXCELTopic>, Project>) p -> { .map((MapFunction<Tuple2<Project, JsonTopic>, Project>) p -> {
Optional<EXCELTopic> op = Optional.ofNullable(p._2()); Optional<JsonTopic> op = Optional.ofNullable(p._2());
Project rp = p._1(); Project rp = p._1();
rp
.setId(
createOpenaireId(
ModelSupport.entityIdPrefix.get("project"),
"corda__h2020", rp.getId()));
op.ifPresent(excelTopic -> rp.setH2020topicdescription(excelTopic.getTitle())); op.ifPresent(excelTopic -> rp.setH2020topicdescription(excelTopic.getTitle()));
return rp; return rp;
}, Encoders.bean(Project.class)) }, Encoders.bean(Project.class))

View File

@ -22,6 +22,7 @@ import eu.dnetlib.dhp.actionmanager.project.utils.model.EXCELTopic;
/** /**
* Reads a generic excel file and maps it into classes that mirror its schema * Reads a generic excel file and maps it into classes that mirror its schema
*/ */
@Deprecated
public class EXCELParser { public class EXCELParser {
public <R> List<R> parse(InputStream file, String classForName, String sheetName) public <R> List<R> parse(InputStream file, String classForName, String sheetName)

View File

@ -0,0 +1,101 @@
package eu.dnetlib.dhp.actionmanager.project.utils;
import java.io.BufferedWriter;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Serializable;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.actionmanager.project.PrepareProjects;
import eu.dnetlib.dhp.actionmanager.project.utils.model.Project;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
/**
* @author miriam.baglioni
* @Date 28/02/23
*/
public class ExtractFromZip implements Serializable {
private static final Logger log = LoggerFactory.getLogger(PrepareProjects.class);
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
PrepareProjects.class
.getResourceAsStream(
"/eu/dnetlib/dhp/actionmanager/project/extract_fromzip_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
final String inputPath = parser.get("inputPath");
log.info("inputPath {}: ", inputPath);
final String outputPath = parser.get("outputPath");
log.info("outputPath {}: ", outputPath);
final String hdfsNameNode = parser.get("hdfsNameNode");
log.info("hdfsNameNode {}", hdfsNameNode);
Configuration conf = new Configuration();
conf.set("fs.defaultFS", hdfsNameNode);
FileSystem fs = FileSystem.get(conf);
doExtract(inputPath, outputPath, fs);
}
private static void doExtract(String inputFile, String workingPath, FileSystem fileSystem)
throws IOException {
final Path path = new Path(inputFile);
FSDataInputStream project_zip = fileSystem.open(path);
try (ZipInputStream zis = new ZipInputStream(project_zip)) {
ZipEntry entry = null;
while ((entry = zis.getNextEntry()) != null) {
if (!entry.isDirectory()) {
String fileName = entry.getName();
byte buffer[] = new byte[1024];
int count;
try (
FSDataOutputStream out = fileSystem
.create(new Path(workingPath + fileName))) {
while ((count = zis.read(buffer, 0, buffer.length)) != -1)
out.write(buffer, 0, count);
}
}
}
}
}
}

View File

@ -6,7 +6,9 @@ import java.util.Optional;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.collection.GetCSV; import eu.dnetlib.dhp.common.collection.GetCSV;
@ -40,8 +42,11 @@ public class ReadCSV {
conf.set("fs.defaultFS", hdfsNameNode); conf.set("fs.defaultFS", hdfsNameNode);
FileSystem fileSystem = FileSystem.get(conf); FileSystem fileSystem = FileSystem.get(conf);
FSDataInputStream inputStream = fileSystem.open(new Path(fileURL));
BufferedReader reader = new BufferedReader( BufferedReader reader = new BufferedReader(
new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))); new InputStreamReader(inputStream));
GetCSV.getCsv(fileSystem, reader, hdfsPath, classForName, del); GetCSV.getCsv(fileSystem, reader, hdfsPath, classForName, del);

View File

@ -0,0 +1,90 @@
package eu.dnetlib.dhp.actionmanager.project.utils;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.actionmanager.project.PrepareProjects;
import eu.dnetlib.dhp.actionmanager.project.utils.model.Project;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
/**
* @author miriam.baglioni
* @Date 28/02/23
*/
public class ReadProjects implements Serializable {
private static final Logger log = LoggerFactory.getLogger(ReadProjects.class);
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
PrepareProjects.class
.getResourceAsStream(
"/eu/dnetlib/dhp/actionmanager/project/read_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
final String inputPath = parser.get("inputPath");
log.info("inputPath {}: ", inputPath);
final String outputPath = parser.get("outputPath");
log.info("outputPath {}: ", outputPath);
final String hdfsNameNode = parser.get("hdfsNameNode");
log.info("hdfsNameNode {}", hdfsNameNode);
Configuration conf = new Configuration();
conf.set("fs.defaultFS", hdfsNameNode);
FileSystem fs = FileSystem.get(conf);
readProjects(inputPath, outputPath, fs);
}
public static void readProjects(String inputFile, String workingPath, FileSystem fs) throws IOException {
Path hdfsreadpath = new Path(inputFile);
FSDataInputStream inputStream = fs.open(hdfsreadpath);
ArrayList<Project> projects = OBJECT_MAPPER
.readValue(
IOUtils.toString(inputStream, "UTF-8"),
new TypeReference<List<Project>>() {
});
Path hdfsWritePath = new Path(workingPath);
if (fs.exists(hdfsWritePath)) {
fs.delete(hdfsWritePath, false);
}
FSDataOutputStream fos = fs.create(hdfsWritePath);
try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(fos, StandardCharsets.UTF_8))) {
for (Project p : projects) {
writer.write(OBJECT_MAPPER.writeValueAsString(p));
writer.newLine();
}
}
}
}

View File

@ -0,0 +1,92 @@
package eu.dnetlib.dhp.actionmanager.project.utils;
import java.io.BufferedWriter;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Serializable;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.actionmanager.project.PrepareProjects;
import eu.dnetlib.dhp.actionmanager.project.utils.model.JsonTopic;
import eu.dnetlib.dhp.actionmanager.project.utils.model.Project;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
/**
* @author miriam.baglioni
* @Date 28/02/23
*/
public class ReadTopics implements Serializable {
private static final Logger log = LoggerFactory.getLogger(ReadTopics.class);
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
PrepareProjects.class
.getResourceAsStream(
"/eu/dnetlib/dhp/actionmanager/project/read_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
final String inputPath = parser.get("inputPath");
log.info("inputPath {}: ", inputPath);
final String outputPath = parser.get("outputPath");
log.info("outputPath {}: ", outputPath);
final String hdfsNameNode = parser.get("hdfsNameNode");
log.info("hdfsNameNode {}", hdfsNameNode);
Configuration conf = new Configuration();
conf.set("fs.defaultFS", hdfsNameNode);
FileSystem fs = FileSystem.get(conf);
readTopics(inputPath, outputPath, fs);
}
public static void readTopics(String inputFile, String workingPath, FileSystem fs) throws IOException {
Path hdfsreadpath = new Path(inputFile);
FSDataInputStream inputStream = fs.open(hdfsreadpath);
ArrayList<JsonTopic> topics = OBJECT_MAPPER
.readValue(
IOUtils.toString(inputStream, "UTF-8"),
new TypeReference<List<JsonTopic>>() {
});
Path hdfsWritePath = new Path(workingPath);
if (fs.exists(hdfsWritePath)) {
fs.delete(hdfsWritePath, false);
}
FSDataOutputStream fos = fs.create(hdfsWritePath);
try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(fos, StandardCharsets.UTF_8))) {
for (JsonTopic p : topics) {
writer.write(OBJECT_MAPPER.writeValueAsString(p));
writer.newLine();
}
}
}
}

View File

@ -13,7 +13,7 @@ public class CSVProject implements Serializable {
@CsvBindByName(column = "id") @CsvBindByName(column = "id")
private String id; private String id;
@CsvBindByName(column = "programme") @CsvBindByName(column = "legalBasis")
private String programme; private String programme;
@CsvBindByName(column = "topics") @CsvBindByName(column = "topics")

View File

@ -6,6 +6,7 @@ import java.io.Serializable;
/** /**
* the model class for the topic excel file * the model class for the topic excel file
*/ */
@Deprecated
public class EXCELTopic implements Serializable { public class EXCELTopic implements Serializable {
private String rcn; private String rcn;
private String language; private String language;
@ -17,9 +18,27 @@ public class EXCELTopic implements Serializable {
private String title; private String title;
private String shortTitle; private String shortTitle;
private String objective; private String objective;
private String subjects; private String keywords;
private String legalBasis; private String legalBasis;
private String call; private String call;
private String id;
private String contentUpdateDate;
public String getContentUpdateDate() {
return contentUpdateDate;
}
public void setContentUpdateDate(String contentUpdateDate) {
this.contentUpdateDate = contentUpdateDate;
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getRcn() { public String getRcn() {
return rcn; return rcn;
@ -101,12 +120,12 @@ public class EXCELTopic implements Serializable {
this.objective = objective; this.objective = objective;
} }
public String getSubjects() { public String getKeywords() {
return subjects; return keywords;
} }
public void setSubjects(String subjects) { public void setKeywords(String keywords) {
this.subjects = subjects; this.keywords = keywords;
} }
public String getLegalBasis() { public String getLegalBasis() {

View File

@ -0,0 +1,38 @@
package eu.dnetlib.dhp.actionmanager.project.utils.model;
import java.io.Serializable;
/**
* @author miriam.baglioni
* @Date 28/02/23
*/
public class JsonTopic implements Serializable {
private String projectID;
private String title;
private String topic;
public String getProjectID() {
return projectID;
}
public void setProjectID(String projectID) {
this.projectID = projectID;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getTopic() {
return topic;
}
public void setTopic(String topic) {
this.topic = topic;
}
}

View File

@ -0,0 +1,191 @@
package eu.dnetlib.dhp.actionmanager.project.utils.model;
import java.io.Serializable;
/**
* @author miriam.baglioni
* @Date 24/02/23
*/
public class Project implements Serializable {
private String acronym;
private String contentUpdateDate;
private String ecMaxContribution;
private String ecSignatureDate;
private String endDate;
private String frameworkProgramme;
private String fundingScheme;
private String grantDoi;
private String id;
private String legalBasis;
private String masterCall;
private String nature;
private String objective;
private String rcn;
private String startDate;
private String status;
private String subCall;
private String title;
private String topics;
private String totalCost;
public String getAcronym() {
return acronym;
}
public void setAcronym(String acronym) {
this.acronym = acronym;
}
public String getContentUpdateDate() {
return contentUpdateDate;
}
public void setContentUpdateDate(String contentUpdateDate) {
this.contentUpdateDate = contentUpdateDate;
}
public String getEcMaxContribution() {
return ecMaxContribution;
}
public void setEcMaxContribution(String ecMaxContribution) {
this.ecMaxContribution = ecMaxContribution;
}
public String getEcSignatureDate() {
return ecSignatureDate;
}
public void setEcSignatureDate(String ecSignatureDate) {
this.ecSignatureDate = ecSignatureDate;
}
public String getEndDate() {
return endDate;
}
public void setEndDate(String endDate) {
this.endDate = endDate;
}
public String getFrameworkProgramme() {
return frameworkProgramme;
}
public void setFrameworkProgramme(String frameworkProgramme) {
this.frameworkProgramme = frameworkProgramme;
}
public String getFundingScheme() {
return fundingScheme;
}
public void setFundingScheme(String fundingScheme) {
this.fundingScheme = fundingScheme;
}
public String getGrantDoi() {
return grantDoi;
}
public void setGrantDoi(String grantDoi) {
this.grantDoi = grantDoi;
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getLegalBasis() {
return legalBasis;
}
public void setLegalBasis(String legalBasis) {
this.legalBasis = legalBasis;
}
public String getMasterCall() {
return masterCall;
}
public void setMasterCall(String masterCall) {
this.masterCall = masterCall;
}
public String getNature() {
return nature;
}
public void setNature(String nature) {
this.nature = nature;
}
public String getObjective() {
return objective;
}
public void setObjective(String objective) {
this.objective = objective;
}
public String getRcn() {
return rcn;
}
public void setRcn(String rcn) {
this.rcn = rcn;
}
public String getStartDate() {
return startDate;
}
public void setStartDate(String startDate) {
this.startDate = startDate;
}
public String getStatus() {
return status;
}
public void setStatus(String status) {
this.status = status;
}
public String getSubCall() {
return subCall;
}
public void setSubCall(String subCall) {
this.subCall = subCall;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getTopics() {
return topics;
}
public void setTopics(String topics) {
this.topics = topics;
}
public String getTotalCost() {
return totalCost;
}
public void setTotalCost(String totalCost) {
this.totalCost = totalCost;
}
}

View File

@ -40,6 +40,7 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.actionmanager.ror.model.ExternalIdType; import eu.dnetlib.dhp.actionmanager.ror.model.ExternalIdType;
import eu.dnetlib.dhp.actionmanager.ror.model.RorOrganization; import eu.dnetlib.dhp.actionmanager.ror.model.RorOrganization;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.Constants;
import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.schema.action.AtomicAction; import eu.dnetlib.dhp.schema.action.AtomicAction;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
@ -59,10 +60,8 @@ public class GenerateRorActionSetJob {
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private static final String ROR_NS_PREFIX = "ror_________";
private static final List<KeyValue> ROR_COLLECTED_FROM = listKeyValues( private static final List<KeyValue> ROR_COLLECTED_FROM = listKeyValues(
"10|openaire____::993a7ae7a863813cf95028b50708e222", "ROR"); Constants.ROR_OPENAIRE_ID, Constants.ROR_DATASOURCE_NAME);
private static final DataInfo ROR_DATA_INFO = dataInfo( private static final DataInfo ROR_DATA_INFO = dataInfo(
false, "", false, false, ENTITYREGISTRY_PROVENANCE_ACTION, "0.92"); false, "", false, false, ENTITYREGISTRY_PROVENANCE_ACTION, "0.92");
@ -126,7 +125,7 @@ public class GenerateRorActionSetJob {
final Organization o = new Organization(); final Organization o = new Organization();
o.setId(calculateOpenaireId(r.getId())); o.setId(calculateOpenaireId(r.getId()));
o.setOriginalId(Arrays.asList(String.format("%s::%s", ROR_NS_PREFIX, r.getId()))); o.setOriginalId(Arrays.asList(String.format("%s::%s", Constants.ROR_NS_PREFIX, r.getId())));
o.setCollectedfrom(ROR_COLLECTED_FROM); o.setCollectedfrom(ROR_COLLECTED_FROM);
o.setPid(pids(r)); o.setPid(pids(r));
o.setDateofcollection(now.toString()); o.setDateofcollection(now.toString());
@ -170,7 +169,7 @@ public class GenerateRorActionSetJob {
} }
private static String calculateOpenaireId(final String rorId) { private static String calculateOpenaireId(final String rorId) {
return String.format("20|%s::%s", ROR_NS_PREFIX, DHPUtils.md5(rorId)); return String.format("20|%s::%s", Constants.ROR_NS_PREFIX, DHPUtils.md5(rorId));
} }
private static List<StructuredProperty> pids(final RorOrganization r) { private static List<StructuredProperty> pids(final RorOrganization r) {

View File

@ -86,7 +86,7 @@
<spark xmlns="uri:oozie:spark-action:0.2"> <spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master> <master>yarn</master>
<mode>cluster</mode> <mode>cluster</mode>
<name>Produces the unresolved from bip finder!</name> <name>Produces the unresolved from BIP! Finder</name>
<class>eu.dnetlib.dhp.actionmanager.createunresolvedentities.PrepareBipFinder</class> <class>eu.dnetlib.dhp.actionmanager.createunresolvedentities.PrepareBipFinder</class>
<jar>dhp-aggregation-${projectVersion}.jar</jar> <jar>dhp-aggregation-${projectVersion}.jar</jar>
<spark-opts> <spark-opts>
@ -135,7 +135,7 @@
<spark xmlns="uri:oozie:spark-action:0.2"> <spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master> <master>yarn</master>
<mode>cluster</mode> <mode>cluster</mode>
<name>Produces the unresolved from FOS!</name> <name>Produces the unresolved from FOS</name>
<class>eu.dnetlib.dhp.actionmanager.createunresolvedentities.PrepareFOSSparkJob</class> <class>eu.dnetlib.dhp.actionmanager.createunresolvedentities.PrepareFOSSparkJob</class>
<jar>dhp-aggregation-${projectVersion}.jar</jar> <jar>dhp-aggregation-${projectVersion}.jar</jar>
<spark-opts> <spark-opts>
@ -185,7 +185,7 @@
<spark xmlns="uri:oozie:spark-action:0.2"> <spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master> <master>yarn</master>
<mode>cluster</mode> <mode>cluster</mode>
<name>Produces the unresolved from FOS!</name> <name>Produces the unresolved from FOS</name>
<class>eu.dnetlib.dhp.actionmanager.createunresolvedentities.PrepareSDGSparkJob</class> <class>eu.dnetlib.dhp.actionmanager.createunresolvedentities.PrepareSDGSparkJob</class>
<jar>dhp-aggregation-${projectVersion}.jar</jar> <jar>dhp-aggregation-${projectVersion}.jar</jar>
<spark-opts> <spark-opts>

View File

@ -0,0 +1,23 @@
[
{
"paramName": "ip",
"paramLongName": "inputPath",
"paramDescription": "the path where the projects are stored ",
"paramRequired": true
},
{
"paramName": "op",
"paramLongName": "outputPath",
"paramDescription": "the path for the extracted folder",
"paramRequired": true
},
{
"paramName": "hnn",
"paramLongName": "hdfsNameNode",
"paramDescription": "the hdfs namenode",
"paramRequired": true
}
]

View File

@ -0,0 +1,3 @@
#!/bin/bash
hdfs dfs -rm $2
curl -LSs $1 | hdfs dfs -put - $2

View File

@ -1,27 +1,9 @@
<workflow-app name="H2020Classification" xmlns="uri:oozie:workflow:0.5"> <workflow-app name="H2020Classification" xmlns="uri:oozie:workflow:0.5">
<parameters> <parameters>
<property>
<name>projectFileURL</name>
<description>the url where to get the projects file</description>
</property>
<property>
<name>programmeFileURL</name>
<description>the url where to get the programme file</description>
</property>
<property>
<name>topicFileURL</name>
<description>the url where to get the topic file</description>
</property>
<property> <property>
<name>outputPath</name> <name>outputPath</name>
<description>path where to store the action set</description> <description>path where to store the action set</description>
</property> </property>
<property>
<name>sheetName</name>
<description>the name of the sheet to read</description>
</property>
</parameters> </parameters>
<start to="deleteoutputpath"/> <start to="deleteoutputpath"/>
@ -35,40 +17,103 @@
<delete path='${workingDir}'/> <delete path='${workingDir}'/>
<mkdir path='${workingDir}'/> <mkdir path='${workingDir}'/>
</fs> </fs>
<ok to="fork_get_info"/> <ok to="fork_download_info"/>
<error to="Kill"/> <error to="Kill"/>
</action> </action>
<fork name="fork_download_info">
<fork name="fork_get_info">
<path start="fork_get_projects"/> <path start="fork_get_projects"/>
<path start="get_programme_file"/> <path start="download_programme_file"/>
<path start="get_topic_file"/>
</fork> </fork>
<fork name="fork_get_projects"> <fork name="fork_get_projects">
<path start="get_project_file"/> <path start="download_projects"/>
<path start="read_projects"/> <path start="read_projects_from_db"/>
</fork> </fork>
<action name="get_project_file"> <action name="download_projects">
<java> <shell xmlns="uri:oozie:shell-action:0.2">
<main-class>eu.dnetlib.dhp.actionmanager.project.utils.ReadCSV</main-class> <job-tracker>${jobTracker}</job-tracker>
<arg>--hdfsNameNode</arg><arg>${nameNode}</arg> <name-node>${nameNode}</name-node>
<arg>--fileURL</arg><arg>${projectFileURL}</arg> <configuration>
<arg>--hdfsPath</arg><arg>${workingDir}/projects</arg> <property>
<arg>--classForName</arg><arg>eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProject</arg> <name>mapred.job.queue.name</name>
</java> <value>${queueName}</value>
<ok to="wait_projects"/> </property>
</configuration>
<exec>download.sh</exec>
<argument>${downloadH2020Projects}</argument>
<argument>${projectPath}</argument>
<env-var>HADOOP_USER_NAME=${wf:user()}</env-var>
<file>download.sh</file>
<capture-output/>
</shell>
<ok to="extract_projects"/>
<error to="Kill"/> <error to="Kill"/>
</action> </action>
<action name="get_programme_file"> <action name="extract_projects">
<java>
<main-class>eu.dnetlib.dhp.actionmanager.project.utils.ExtractFromZip</main-class>
<arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
<arg>--inputPath</arg><arg>${projectPath}</arg>
<arg>--outputPath</arg><arg>${workingDir}/</arg>
</java>
<ok to="read_from_folder"/>
<error to="Kill"/>
</action>
<fork name="read_from_folder">
<path start="read_projects"/>
<path start="read_topic_file"/>
</fork>
<action name="read_projects">
<java>
<main-class>eu.dnetlib.dhp.actionmanager.project.utils.ReadProjects</main-class>
<arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
<arg>--inputPath</arg><arg>${workingDir}/json/project.json</arg>
<arg>--outputPath</arg><arg>${workingDir}/projects</arg>
</java>
<ok to="wait_read_from_folder"/>
<error to="Kill"/>
</action>
<action name="download_programme_file">
<shell xmlns="uri:oozie:shell-action:0.2">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>mapred.job.queue.name</name>
<value>${queueName}</value>
</property>
</configuration>
<exec>download.sh</exec>
<argument>${downloadH2020Programme}</argument>
<argument>${programmePath}</argument>
<env-var>HADOOP_USER_NAME=${wf:user()}</env-var>
<file>download.sh</file>
<capture-output/>
</shell>
<ok to="extract_programme"/>
<error to="Kill"/>
</action>
<action name="extract_programme">
<java>
<main-class>eu.dnetlib.dhp.actionmanager.project.utils.ExtractFromZip</main-class>
<arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
<arg>--inputPath</arg><arg>${programmePath}</arg>
<arg>--outputPath</arg><arg>${workingDir}/downloadedProgramme/</arg>
</java>
<ok to="read_programme"/>
<error to="Kill"/>
</action>
<action name="read_programme">
<java> <java>
<main-class>eu.dnetlib.dhp.actionmanager.project.utils.ReadCSV</main-class> <main-class>eu.dnetlib.dhp.actionmanager.project.utils.ReadCSV</main-class>
<arg>--hdfsNameNode</arg><arg>${nameNode}</arg> <arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
<arg>--fileURL</arg><arg>${programmeFileURL}</arg> <arg>--fileURL</arg><arg>${workingDir}/downloadedProgramme/csv/programme.csv</arg>
<arg>--hdfsPath</arg><arg>${workingDir}/programme</arg> <arg>--hdfsPath</arg><arg>${workingDir}/programme</arg>
<arg>--classForName</arg><arg>eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProgramme</arg> <arg>--classForName</arg><arg>eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProgramme</arg>
</java> </java>
@ -76,20 +121,18 @@
<error to="Kill"/> <error to="Kill"/>
</action> </action>
<action name="get_topic_file"> <action name="read_topic_file">
<java> <java>
<main-class>eu.dnetlib.dhp.actionmanager.project.utils.ReadExcel</main-class> <main-class>eu.dnetlib.dhp.actionmanager.project.utils.ReadTopics</main-class>
<arg>--hdfsNameNode</arg><arg>${nameNode}</arg> <arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
<arg>--fileURL</arg><arg>${topicFileURL}</arg> <arg>--inputPath</arg><arg>${workingDir}/json/topics.json</arg>
<arg>--hdfsPath</arg><arg>${workingDir}/topic</arg> <arg>--outputPath</arg><arg>${workingDir}/topic</arg>
<arg>--sheetName</arg><arg>${sheetName}</arg>
<arg>--classForName</arg><arg>eu.dnetlib.dhp.actionmanager.project.utils.model.EXCELTopic</arg>
</java> </java>
<ok to="wait"/> <ok to="wait_read_from_folder"/>
<error to="Kill"/> <error to="Kill"/>
</action> </action>
<action name="read_projects"> <action name="read_projects_from_db">
<java> <java>
<main-class>eu.dnetlib.dhp.actionmanager.project.ReadProjectsFromDB</main-class> <main-class>eu.dnetlib.dhp.actionmanager.project.ReadProjectsFromDB</main-class>
<arg>--hdfsPath</arg><arg>${workingDir}/dbProjects</arg> <arg>--hdfsPath</arg><arg>${workingDir}/dbProjects</arg>
@ -123,9 +166,11 @@
<arg>--outputPath</arg><arg>${workingDir}/preparedProgramme</arg> <arg>--outputPath</arg><arg>${workingDir}/preparedProgramme</arg>
</spark> </spark>
<ok to="wait"/> <ok to="wait"/>
<!-- <ok to="End"/>-->
<error to="Kill"/> <error to="Kill"/>
</action> </action>
<join name="wait_read_from_folder" to="wait_projects"/>
<join name="wait" to="create_updates"/> <join name="wait" to="create_updates"/>
<join name="wait_projects" to="prepare_project"/> <join name="wait_projects" to="prepare_project"/>
@ -153,6 +198,7 @@
<arg>--dbProjectPath</arg><arg>${workingDir}/dbProjects</arg> <arg>--dbProjectPath</arg><arg>${workingDir}/dbProjects</arg>
</spark> </spark>
<ok to="wait"/> <ok to="wait"/>
<!-- <ok to="End"/>-->
<error to="Kill"/> <error to="Kill"/>
</action> </action>

View File

@ -0,0 +1,23 @@
[
{
"paramName": "ip",
"paramLongName": "inputPath",
"paramDescription": "the path where the projects are stored ",
"paramRequired": true
},
{
"paramName": "op",
"paramLongName": "outputPath",
"paramDescription": "the path for the extracted folder",
"paramRequired": true
},
{
"paramName": "hnn",
"paramLongName": "hdfsNameNode",
"paramDescription": "the hdfs namenode",
"paramRequired": true
}
]

View File

@ -78,16 +78,6 @@ object DataciteModelConstants {
OafMapperUtils.keyValue(ModelConstants.DATACITE_ID, DATACITE_NAME) OafMapperUtils.keyValue(ModelConstants.DATACITE_ID, DATACITE_NAME)
val subRelTypeMapping: Map[String, OAFRelations] = Map( val subRelTypeMapping: Map[String, OAFRelations] = Map(
ModelConstants.REFERENCES -> OAFRelations(
ModelConstants.REFERENCES,
ModelConstants.IS_REFERENCED_BY,
ModelConstants.RELATIONSHIP
),
ModelConstants.IS_REFERENCED_BY -> OAFRelations(
ModelConstants.IS_REFERENCED_BY,
ModelConstants.REFERENCES,
ModelConstants.RELATIONSHIP
),
ModelConstants.IS_SUPPLEMENTED_BY -> OAFRelations( ModelConstants.IS_SUPPLEMENTED_BY -> OAFRelations(
ModelConstants.IS_SUPPLEMENTED_BY, ModelConstants.IS_SUPPLEMENTED_BY,
ModelConstants.IS_SUPPLEMENT_TO, ModelConstants.IS_SUPPLEMENT_TO,
@ -163,16 +153,6 @@ object DataciteModelConstants {
ModelConstants.IS_SOURCE_OF, ModelConstants.IS_SOURCE_OF,
ModelConstants.VERSION ModelConstants.VERSION
), ),
ModelConstants.CITES -> OAFRelations(
ModelConstants.CITES,
ModelConstants.IS_CITED_BY,
ModelConstants.CITATION
),
ModelConstants.IS_CITED_BY -> OAFRelations(
ModelConstants.IS_CITED_BY,
ModelConstants.CITES,
ModelConstants.CITATION
),
ModelConstants.IS_VARIANT_FORM_OF -> OAFRelations( ModelConstants.IS_VARIANT_FORM_OF -> OAFRelations(
ModelConstants.IS_VARIANT_FORM_OF, ModelConstants.IS_VARIANT_FORM_OF,
ModelConstants.IS_DERIVED_FROM, ModelConstants.IS_DERIVED_FROM,

View File

@ -19,7 +19,7 @@ import java.time.chrono.ThaiBuddhistDate
import java.time.format.DateTimeFormatter import java.time.format.DateTimeFormatter
import java.util.{Date, Locale} import java.util.{Date, Locale}
import scala.collection.JavaConverters._ import scala.collection.JavaConverters._
import scala.io.{Codec, Source} import scala.io.Source
object DataciteToOAFTransformation { object DataciteToOAFTransformation {
@ -49,7 +49,7 @@ object DataciteToOAFTransformation {
/** This method should skip record if json contains invalid text /** This method should skip record if json contains invalid text
* defined in file datacite_filter * defined in file datacite_filter
* *
* @param record : unparsed datacite record * @param record : not parsed Datacite record
* @param json : parsed record * @param json : parsed record
* @return True if the record should be skipped * @return True if the record should be skipped
*/ */
@ -98,6 +98,10 @@ object DataciteToOAFTransformation {
} }
/** This utility method indicates whether the embargo date has been reached
* @param embargo_end_date
* @return True if the embargo date has been reached, false otherwise
*/
def embargo_end(embargo_end_date: String): Boolean = { def embargo_end(embargo_end_date: String): Boolean = {
val dt = LocalDate.parse(embargo_end_date, DateTimeFormatter.ofPattern("[yyyy-MM-dd]")) val dt = LocalDate.parse(embargo_end_date, DateTimeFormatter.ofPattern("[yyyy-MM-dd]"))
val td = LocalDate.now() val td = LocalDate.now()
@ -142,6 +146,21 @@ object DataciteToOAFTransformation {
} }
} }
/** *
* Use the vocabulary dnet:publication_resource to find a synonym to one of these terms and get the instance.type.
* Using the dnet:result_typologies vocabulary, we look up the instance.type synonym
* to generate one of the following main entities:
* - publication
* - dataset
* - software
* - otherresearchproduct
*
* @param resourceType
* @param resourceTypeGeneral
* @param schemaOrg
* @param vocabularies
* @return
*/
def getTypeQualifier( def getTypeQualifier(
resourceType: String, resourceType: String,
resourceTypeGeneral: String, resourceTypeGeneral: String,
@ -252,7 +271,7 @@ object DataciteToOAFTransformation {
.exists(i => i.getHostedby != null && "figshare".equalsIgnoreCase(i.getHostedby.getValue)) .exists(i => i.getHostedby != null && "figshare".equalsIgnoreCase(i.getHostedby.getValue))
if (hosted_by_figshare) { if (hosted_by_figshare) {
r.getInstance().asScala.foreach(i => i.setAccessright(ModelConstants.OPEN_ACCESS_RIGHT())) r.getInstance().asScala.foreach(i => i.setAccessright(ModelConstants.OPEN_ACCESS_RIGHT()))
val l: List[StructuredProperty] = List() val l: List[Subject] = List()
r.setSubject(l.asJava) r.setSubject(l.asJava)
} }
} }
@ -330,6 +349,7 @@ object DataciteToOAFTransformation {
if (result == null) if (result == null)
return List() return List()
// DOI is mapped on a PID inside a Instance object
val doi_q = OafMapperUtils.qualifier( val doi_q = OafMapperUtils.qualifier(
"doi", "doi",
"doi", "doi",
@ -338,6 +358,8 @@ object DataciteToOAFTransformation {
) )
val pid = OafMapperUtils.structuredProperty(doi, doi_q, dataInfo) val pid = OafMapperUtils.structuredProperty(doi, doi_q, dataInfo)
result.setPid(List(pid).asJava) result.setPid(List(pid).asJava)
// This identifiere will be replaced in a second moment using the PID logic generation
result.setId(OafMapperUtils.createOpenaireId(50, s"datacite____::$doi", true)) result.setId(OafMapperUtils.createOpenaireId(50, s"datacite____::$doi", true))
result.setOriginalId(List(doi).asJava) result.setOriginalId(List(doi).asJava)
@ -386,6 +408,10 @@ object DataciteToOAFTransformation {
a a
} }
if (authors == null || authors.isEmpty || !authors.exists(a => a != null))
return List()
result.setAuthor(authors.asJava)
val titles: List[TitleType] = (json \\ "titles").extractOrElse[List[TitleType]](List()) val titles: List[TitleType] = (json \\ "titles").extractOrElse[List[TitleType]](List())
result.setTitle( result.setTitle(
@ -409,10 +435,6 @@ object DataciteToOAFTransformation {
.asJava .asJava
) )
if (authors == null || authors.isEmpty || !authors.exists(a => a != null))
return List()
result.setAuthor(authors.asJava)
val dates = (json \\ "dates").extract[List[DateType]] val dates = (json \\ "dates").extract[List[DateType]]
val publication_year = (json \\ "publicationYear").extractOrElse[String](null) val publication_year = (json \\ "publicationYear").extractOrElse[String](null)
@ -492,7 +514,7 @@ object DataciteToOAFTransformation {
subjects subjects
.filter(s => s.subject.nonEmpty) .filter(s => s.subject.nonEmpty)
.map(s => .map(s =>
OafMapperUtils.structuredProperty( OafMapperUtils.subject(
s.subject.get, s.subject.get,
SUBJ_CLASS, SUBJ_CLASS,
SUBJ_CLASS, SUBJ_CLASS,
@ -623,7 +645,7 @@ object DataciteToOAFTransformation {
id: String, id: String,
date: String date: String
): List[Relation] = { ): List[Relation] = {
rels val bidirectionalRels: List[Relation] = rels
.filter(r => .filter(r =>
subRelTypeMapping subRelTypeMapping
.contains(r.relationType) && (r.relatedIdentifierType.equalsIgnoreCase("doi") || .contains(r.relationType) && (r.relatedIdentifierType.equalsIgnoreCase("doi") ||
@ -631,27 +653,49 @@ object DataciteToOAFTransformation {
r.relatedIdentifierType.equalsIgnoreCase("arxiv")) r.relatedIdentifierType.equalsIgnoreCase("arxiv"))
) )
.map(r => { .map(r => {
val subRelType = subRelTypeMapping(r.relationType).relType
val target = DHPUtils.generateUnresolvedIdentifier(r.relatedIdentifier, r.relatedIdentifierType)
relation(id, target, subRelType, r.relationType, date)
})
val citationRels: List[Relation] = rels
.filter(r =>
(r.relatedIdentifierType.equalsIgnoreCase("doi") ||
r.relatedIdentifierType.equalsIgnoreCase("pmid") ||
r.relatedIdentifierType.equalsIgnoreCase("arxiv")) &&
(r.relationType.toLowerCase.contains("cite") || r.relationType.toLowerCase.contains("reference"))
)
.map(r => {
r.relationType match {
case ModelConstants.CITES | ModelConstants.REFERENCES =>
val target = DHPUtils.generateUnresolvedIdentifier(r.relatedIdentifier, r.relatedIdentifierType)
relation(id, target, ModelConstants.CITATION, ModelConstants.CITES, date)
case ModelConstants.IS_CITED_BY | ModelConstants.IS_REFERENCED_BY =>
val source = DHPUtils.generateUnresolvedIdentifier(r.relatedIdentifier, r.relatedIdentifierType)
relation(source, id, ModelConstants.CITATION, ModelConstants.CITES, date)
}
})
citationRels ::: bidirectionalRels
}
def relation(source: String, target: String, subRelType: String, relClass: String, date: String): Relation = {
val rel = new Relation val rel = new Relation
rel.setCollectedfrom(List(DATACITE_COLLECTED_FROM).asJava) rel.setCollectedfrom(List(DATACITE_COLLECTED_FROM).asJava)
rel.setDataInfo(dataInfo) rel.setDataInfo(dataInfo)
val subRelType = subRelTypeMapping(r.relationType).relType
rel.setRelType(REL_TYPE_VALUE) rel.setRelType(REL_TYPE_VALUE)
rel.setSubRelType(subRelType) rel.setSubRelType(subRelType)
rel.setRelClass(r.relationType) rel.setRelClass(relClass)
val dateProps: KeyValue = OafMapperUtils.keyValue(DATE_RELATION_KEY, date) val dateProps: KeyValue = OafMapperUtils.keyValue(DATE_RELATION_KEY, date)
rel.setProperties(List(dateProps).asJava) rel.setProperties(List(dateProps).asJava)
rel.setSource(id) rel.setSource(source)
rel.setTarget( rel.setTarget(target)
DHPUtils.generateUnresolvedIdentifier(r.relatedIdentifier, r.relatedIdentifierType)
)
rel.setCollectedfrom(List(DATACITE_COLLECTED_FROM).asJava) rel.setCollectedfrom(List(DATACITE_COLLECTED_FROM).asJava)
rel.getCollectedfrom.asScala.map(c => c.getValue).toList rel.getCollectedfrom.asScala.map(c => c.getValue).toList
rel rel
})
} }
def generateDSId(input: String): String = { def generateDSId(input: String): String = {

View File

@ -281,7 +281,7 @@ object BioDBToOAF {
d.setSubject( d.setSubject(
subjects subjects
.map(s => .map(s =>
OafMapperUtils.structuredProperty( OafMapperUtils.subject(
s, s,
SUBJ_CLASS, SUBJ_CLASS,
SUBJ_CLASS, SUBJ_CLASS,

View File

@ -265,8 +265,8 @@ object PubMedToOaf {
result.setLanguage(term) result.setLanguage(term)
} }
val subjects: List[StructuredProperty] = article.getSubjects.asScala.map(s => val subjects: List[Subject] = article.getSubjects.asScala.map(s =>
OafMapperUtils.structuredProperty( OafMapperUtils.subject(
s.getValue, s.getValue,
SUBJ_CLASS, SUBJ_CLASS,
SUBJ_CLASS, SUBJ_CLASS,

View File

@ -72,7 +72,7 @@ public class ProduceTest {
JavaRDD<Result> tmp = getResultJavaRDD(); JavaRDD<Result> tmp = getResultJavaRDD();
List<StructuredProperty> sbjs = tmp List<Subject> sbjs = tmp
.filter(row -> row.getSubject() != null && row.getSubject().size() > 0) .filter(row -> row.getSubject() != null && row.getSubject().size() > 0)
.flatMap(row -> row.getSubject().iterator()) .flatMap(row -> row.getSubject().iterator())
.collect(); .collect();
@ -169,7 +169,7 @@ public class ProduceTest {
.getSubject() .getSubject()
.size()); .size());
List<StructuredProperty> sbjs = tmp List<Subject> sbjs = tmp
.filter(row -> row.getId().equals(doi)) .filter(row -> row.getId().equals(doi))
.flatMap(row -> row.getSubject().iterator()) .flatMap(row -> row.getSubject().iterator())
.collect(); .collect();
@ -396,7 +396,7 @@ public class ProduceTest {
.getSubject() .getSubject()
.size()); .size());
List<StructuredProperty> sbjs = tmp List<Subject> sbjs = tmp
.filter(row -> row.getId().equals(doi)) .filter(row -> row.getId().equals(doi))
.flatMap(row -> row.getSubject().iterator()) .flatMap(row -> row.getSubject().iterator())
.collect(); .collect();
@ -508,7 +508,7 @@ public class ProduceTest {
.getSubject() .getSubject()
.size()); .size());
List<StructuredProperty> sbjs = tmp List<Subject> sbjs = tmp
.filter(row -> row.getId().equals(doi)) .filter(row -> row.getId().equals(doi))
.flatMap(row -> row.getSubject().iterator()) .flatMap(row -> row.getSubject().iterator())
.collect(); .collect();
@ -537,7 +537,7 @@ public class ProduceTest {
JavaRDD<Result> tmp = getResultJavaRDDPlusSDG(); JavaRDD<Result> tmp = getResultJavaRDDPlusSDG();
List<StructuredProperty> sbjs_sdg = tmp List<Subject> sbjs_sdg = tmp
.filter(row -> row.getSubject() != null && row.getSubject().size() > 0) .filter(row -> row.getSubject() != null && row.getSubject().size() > 0)
.flatMap(row -> row.getSubject().iterator()) .flatMap(row -> row.getSubject().iterator())
.filter(sbj -> sbj.getQualifier().getClassid().equals(Constants.SDG_CLASS_ID)) .filter(sbj -> sbj.getQualifier().getClassid().equals(Constants.SDG_CLASS_ID))

View File

@ -99,7 +99,7 @@ public class CreateOpenCitationsASTest {
.map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class))
.map(aa -> ((Relation) aa.getPayload())); .map(aa -> ((Relation) aa.getPayload()));
assertEquals(62, tmp.count()); assertEquals(31, tmp.count());
// tmp.foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r))); // tmp.foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r)));
@ -131,7 +131,7 @@ public class CreateOpenCitationsASTest {
.map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class))
.map(aa -> ((Relation) aa.getPayload())); .map(aa -> ((Relation) aa.getPayload()));
assertEquals(46, tmp.count()); assertEquals(23, tmp.count());
// tmp.foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r))); // tmp.foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r)));
@ -241,7 +241,7 @@ public class CreateOpenCitationsASTest {
assertEquals("resultResult", r.getRelType()); assertEquals("resultResult", r.getRelType());
}); });
assertEquals(23, tmp.filter(r -> r.getRelClass().equals("Cites")).count()); assertEquals(23, tmp.filter(r -> r.getRelClass().equals("Cites")).count());
assertEquals(23, tmp.filter(r -> r.getRelClass().equals("IsCitedBy")).count()); assertEquals(0, tmp.filter(r -> r.getRelClass().equals("IsCitedBy")).count());
} }
@ -318,15 +318,15 @@ public class CreateOpenCitationsASTest {
JavaRDD<Relation> check = tmp.filter(r -> r.getSource().equals(doi1) || r.getTarget().equals(doi1)); JavaRDD<Relation> check = tmp.filter(r -> r.getSource().equals(doi1) || r.getTarget().equals(doi1));
assertEquals(10, check.count()); assertEquals(5, check.count());
check.foreach(r -> { // check.foreach(r -> {
if (r.getSource().equals(doi2) || r.getSource().equals(doi3) || r.getSource().equals(doi4) || // if (r.getSource().equals(doi2) || r.getSource().equals(doi3) || r.getSource().equals(doi4) ||
r.getSource().equals(doi5) || r.getSource().equals(doi6)) { // r.getSource().equals(doi5) || r.getSource().equals(doi6)) {
assertEquals(ModelConstants.IS_CITED_BY, r.getRelClass()); // assertEquals(ModelConstants.IS_CITED_BY, r.getRelClass());
assertEquals(doi1, r.getTarget()); // assertEquals(doi1, r.getTarget());
} // }
}); // });
assertEquals(5, check.filter(r -> r.getSource().equals(doi1)).count()); assertEquals(5, check.filter(r -> r.getSource().equals(doi1)).count());
check.filter(r -> r.getSource().equals(doi1)).foreach(r -> assertEquals(ModelConstants.CITES, r.getRelClass())); check.filter(r -> r.getSource().equals(doi1)).foreach(r -> assertEquals(ModelConstants.CITES, r.getRelClass()));

View File

@ -1,6 +1,8 @@
package eu.dnetlib.dhp.actionmanager.project; package eu.dnetlib.dhp.actionmanager.project;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException; import java.io.IOException;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
@ -16,6 +18,7 @@ import eu.dnetlib.dhp.actionmanager.project.utils.EXCELParser;
import eu.dnetlib.dhp.common.collection.CollectorException; import eu.dnetlib.dhp.common.collection.CollectorException;
import eu.dnetlib.dhp.common.collection.HttpConnector2; import eu.dnetlib.dhp.common.collection.HttpConnector2;
@Deprecated
@Disabled @Disabled
public class EXCELParserTest { public class EXCELParserTest {
@ -43,4 +46,21 @@ public class EXCELParserTest {
Assertions.assertEquals(3878, pl.size()); Assertions.assertEquals(3878, pl.size());
} }
@Test
void test2() throws IOException, ClassNotFoundException, InvalidFormatException, IllegalAccessException,
InstantiationException {
;
EXCELParser excelParser = new EXCELParser();
List<Object> pl = excelParser
.parse(
new FileInputStream(
getClass().getResource("/eu/dnetlib/dhp/actionmanager/project/h2020_topic.xlsx").getPath()),
"eu.dnetlib.dhp.actionmanager.project.utils.model.EXCELTopic",
"DATA");
Assertions.assertEquals(3905, pl.size());
}
} }

View File

@ -73,7 +73,7 @@ public class PrepareH2020ProgrammeTest {
"-isSparkSessionManaged", "-isSparkSessionManaged",
Boolean.FALSE.toString(), Boolean.FALSE.toString(),
"-programmePath", "-programmePath",
getClass().getResource("/eu/dnetlib/dhp/actionmanager/project/whole_programme.json.gz").getPath(), getClass().getResource("/eu/dnetlib/dhp/actionmanager/project/h2020_programme.json.gz").getPath(),
"-outputPath", "-outputPath",
workingDir.toString() + "/preparedProgramme" workingDir.toString() + "/preparedProgramme"
}); });
@ -84,7 +84,7 @@ public class PrepareH2020ProgrammeTest {
.textFile(workingDir.toString() + "/preparedProgramme") .textFile(workingDir.toString() + "/preparedProgramme")
.map(item -> OBJECT_MAPPER.readValue(item, CSVProgramme.class)); .map(item -> OBJECT_MAPPER.readValue(item, CSVProgramme.class));
Assertions.assertEquals(277, tmp.count()); Assertions.assertEquals(279, tmp.count());
Dataset<CSVProgramme> verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(CSVProgramme.class)); Dataset<CSVProgramme> verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(CSVProgramme.class));

View File

@ -4,12 +4,14 @@ package eu.dnetlib.dhp.actionmanager.project;
import java.io.IOException; import java.io.IOException;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.io.FileUtils; import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.ForeachFunction;
import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.SparkSession;
@ -20,9 +22,12 @@ import org.junit.jupiter.api.Test;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProject; import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProject;
import eu.dnetlib.dhp.actionmanager.project.utils.model.Project;
public class PrepareProjectTest { public class PrepareProjectTest {
@ -74,7 +79,7 @@ public class PrepareProjectTest {
"-isSparkSessionManaged", "-isSparkSessionManaged",
Boolean.FALSE.toString(), Boolean.FALSE.toString(),
"-projectPath", "-projectPath",
getClass().getResource("/eu/dnetlib/dhp/actionmanager/project/projects_subset.json").getPath(), getClass().getResource("/eu/dnetlib/dhp/actionmanager/project/projects_nld.json.gz").getPath(),
"-outputPath", "-outputPath",
workingDir.toString() + "/preparedProjects", workingDir.toString() + "/preparedProjects",
"-dbProjectPath", "-dbProjectPath",
@ -94,6 +99,12 @@ public class PrepareProjectTest {
Assertions.assertEquals(0, verificationDataset.filter("length(id) = 0").count()); Assertions.assertEquals(0, verificationDataset.filter("length(id) = 0").count());
Assertions.assertEquals(0, verificationDataset.filter("length(programme) = 0").count()); Assertions.assertEquals(0, verificationDataset.filter("length(programme) = 0").count());
Assertions.assertEquals(0, verificationDataset.filter("length(topics) = 0").count());
CSVProject project = tmp.filter(p -> p.getId().equals("886828")).first();
Assertions.assertEquals("H2020-EU.2.3.", project.getProgramme());
Assertions.assertEquals("EIC-SMEInst-2018-2020", project.getTopics());
} }
} }

View File

@ -1,12 +1,10 @@
package eu.dnetlib.dhp.actionmanager.project; package eu.dnetlib.dhp.actionmanager.project;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assertions.assertTrue;
import java.io.BufferedReader; import java.io.*;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.file.Files; import java.nio.file.Files;
import org.apache.commons.io.FileUtils; import org.apache.commons.io.FileUtils;
@ -24,7 +22,7 @@ import eu.dnetlib.dhp.common.collection.CollectorException;
import eu.dnetlib.dhp.common.collection.GetCSV; import eu.dnetlib.dhp.common.collection.GetCSV;
import eu.dnetlib.dhp.common.collection.HttpConnector2; import eu.dnetlib.dhp.common.collection.HttpConnector2;
public class DownloadCsvTest { public class ReadProgrammeTest {
private static String workingDir; private static String workingDir;
@ -33,22 +31,25 @@ public class DownloadCsvTest {
@BeforeAll @BeforeAll
public static void beforeAll() throws IOException { public static void beforeAll() throws IOException {
workingDir = Files workingDir = Files
.createTempDirectory(DownloadCsvTest.class.getSimpleName()) .createTempDirectory(ReadProgrammeTest.class.getSimpleName())
.toString(); .toString();
fs = FileSystem.getLocal(new Configuration()); fs = FileSystem.getLocal(new Configuration());
} }
@Disabled @AfterAll
@Test public static void cleanup() {
void getProgrammeFileTest() throws Exception { FileUtils.deleteQuietly(new File(workingDir));
}
String fileURL = "https://cordis.europa.eu/data/reference/cordisref-h2020programmes.csv"; @Test
void getLocalProgrammeFileTest() throws Exception {
GetCSV GetCSV
.getCsv( .getCsv(
fs, new BufferedReader( fs, new BufferedReader(
new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))), new FileReader(
getClass().getResource("/eu/dnetlib/dhp/actionmanager/project/h2020_programme.csv").getPath())),
workingDir + "/programme", workingDir + "/programme",
CSVProgramme.class.getName(), ';'); CSVProgramme.class.getName(), ';');
@ -56,10 +57,11 @@ public class DownloadCsvTest {
String line; String line;
int count = 0; int count = 0;
ObjectMapper OBJECT_MAPPER = new ObjectMapper();
while ((line = in.readLine()) != null) { while ((line = in.readLine()) != null) {
CSVProgramme csvp = new ObjectMapper().readValue(line, CSVProgramme.class); CSVProgramme csvp = OBJECT_MAPPER.readValue(line, CSVProgramme.class);
if (count == 0) { if (count == 528) {
assertTrue(csvp.getCode().equals("H2020-EU.5.f.")); assertEquals("H2020-EU.5.f.", csvp.getCode());
assertTrue( assertTrue(
csvp csvp
.getTitle() .getTitle()
@ -69,8 +71,8 @@ public class DownloadCsvTest {
assertTrue(csvp.getShortTitle().equals("")); assertTrue(csvp.getShortTitle().equals(""));
assertTrue(csvp.getLanguage().equals("en")); assertTrue(csvp.getLanguage().equals("en"));
} }
if (count == 28) { if (count == 11) {
assertTrue(csvp.getCode().equals("H2020-EU.3.5.4.")); assertEquals("H2020-EU.3.5.4.", csvp.getCode());
assertTrue( assertTrue(
csvp csvp
.getTitle() .getTitle()
@ -79,7 +81,7 @@ public class DownloadCsvTest {
assertTrue(csvp.getShortTitle().equals("A green economy and society through eco-innovation")); assertTrue(csvp.getShortTitle().equals("A green economy and society through eco-innovation"));
assertTrue(csvp.getLanguage().equals("de")); assertTrue(csvp.getLanguage().equals("de"));
} }
if (count == 229) { if (count == 34) {
assertTrue(csvp.getCode().equals("H2020-EU.3.2.")); assertTrue(csvp.getCode().equals("H2020-EU.3.2."));
assertTrue( assertTrue(
csvp csvp
@ -95,54 +97,7 @@ public class DownloadCsvTest {
count += 1; count += 1;
} }
Assertions.assertEquals(767, count); assertEquals(769, count);
}
@Disabled
@Test
void getProjectFileTest() throws IOException, CollectorException, ClassNotFoundException {
String fileURL = "https://cordis.europa.eu/data/cordis-h2020projects.csv";
GetCSV
.getCsv(
fs,
new BufferedReader(new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))),
workingDir + "/projects",
CSVProject.class.getName(), ';');
BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/projects"))));
String line;
int count = 0;
while ((line = in.readLine()) != null) {
CSVProject csvp = new ObjectMapper().readValue(line, CSVProject.class);
if (count == 0) {
assertTrue(csvp.getId().equals("771736"));
assertTrue(csvp.getProgramme().equals("H2020-EU.1.1."));
assertTrue(csvp.getTopics().equals("ERC-2017-COG"));
}
if (count == 22882) {
assertTrue(csvp.getId().equals("752903"));
assertTrue(csvp.getProgramme().equals("H2020-EU.1.3.2."));
assertTrue(csvp.getTopics().equals("MSCA-IF-2016"));
}
if (count == 223023) {
assertTrue(csvp.getId().equals("861952"));
assertTrue(csvp.getProgramme().equals("H2020-EU.4.e."));
assertTrue(csvp.getTopics().equals("SGA-SEWP-COST-2019"));
}
assertTrue(csvp.getId() != null);
assertTrue(csvp.getProgramme().startsWith("H2020"));
count += 1;
}
Assertions.assertEquals(34957, count);
}
@AfterAll
public static void cleanup() {
FileUtils.deleteQuietly(new File(workingDir));
} }
} }

View File

@ -0,0 +1,104 @@
package eu.dnetlib.dhp.actionmanager.project;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocalFileSystem;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SparkSession;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.actionmanager.project.utils.ReadProjects;
import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProject;
import eu.dnetlib.dhp.actionmanager.project.utils.model.Project;
/**
* @author miriam.baglioni
* @Date 01/03/23
*/
public class ReadProjectsTest {
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private static Path workingDir;
private static LocalFileSystem fs;
private static SparkSession spark;
private static final Logger log = LoggerFactory
.getLogger(ReadProjectsTest.class);
@BeforeAll
public static void beforeAll() throws IOException {
workingDir = Files
.createTempDirectory(ReadProjectsTest.class.getSimpleName());
fs = FileSystem.getLocal(new Configuration());
SparkConf conf = new SparkConf();
conf.setAppName(PrepareProjectTest.class.getSimpleName());
conf.setMaster("local[*]");
conf.set("spark.driver.host", "localhost");
conf.set("hive.metastore.local", "true");
conf.set("spark.ui.enabled", "false");
conf.set("spark.sql.warehouse.dir", workingDir.toString());
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
spark = SparkSession
.builder()
.appName(PrepareProjectTest.class.getSimpleName())
.config(conf)
.getOrCreate();
}
@AfterAll
public static void afterAll() throws IOException {
FileUtils.deleteDirectory(workingDir.toFile());
spark.stop();
}
@Test
void readProjects() throws IOException {
String projects = getClass()
.getResource("/eu/dnetlib/dhp/actionmanager/project/projects.json")
.getPath();
ReadProjects.readProjects(projects, workingDir.toString() + "/projects", fs);
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
JavaRDD<Project> tmp = sc
.textFile(workingDir.toString() + "/projects")
.map(item -> OBJECT_MAPPER.readValue(item, Project.class));
Assertions.assertEquals(19, tmp.count());
Project project = tmp.filter(p -> p.getAcronym().equals("GiSTDS")).first();
Assertions.assertEquals("2022-10-08 18:28:27", project.getContentUpdateDate());
Assertions.assertEquals("894593", project.getId());
Assertions.assertEquals("H2020-EU.1.3.", project.getLegalBasis());
Assertions.assertEquals("MSCA-IF-2019", project.getTopics());
// tmp.foreach(p -> System.out.println(OBJECT_MAPPER.writeValueAsString(p)));
}
}

View File

@ -0,0 +1,99 @@
package eu.dnetlib.dhp.actionmanager.project;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import org.apache.commons.io.FileUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocalFileSystem;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SparkSession;
import org.junit.jupiter.api.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.actionmanager.project.utils.ReadProjects;
import eu.dnetlib.dhp.actionmanager.project.utils.ReadTopics;
import eu.dnetlib.dhp.actionmanager.project.utils.model.JsonTopic;
import eu.dnetlib.dhp.actionmanager.project.utils.model.Project;
/**
* @author miriam.baglioni
* @Date 01/03/23
*/
public class ReadTopicTest {
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private static Path workingDir;
private static LocalFileSystem fs;
private static SparkSession spark;
private static final Logger log = LoggerFactory
.getLogger(ReadTopicTest.class);
@BeforeAll
public static void beforeAll() throws IOException {
workingDir = Files
.createTempDirectory(ReadTopicTest.class.getSimpleName());
fs = FileSystem.getLocal(new Configuration());
SparkConf conf = new SparkConf();
conf.setAppName(PrepareProjectTest.class.getSimpleName());
conf.setMaster("local[*]");
conf.set("spark.driver.host", "localhost");
conf.set("hive.metastore.local", "true");
conf.set("spark.ui.enabled", "false");
conf.set("spark.sql.warehouse.dir", workingDir.toString());
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
spark = SparkSession
.builder()
.appName(PrepareProjectTest.class.getSimpleName())
.config(conf)
.getOrCreate();
}
@AfterAll
public static void afterAll() throws IOException {
FileUtils.deleteDirectory(workingDir.toFile());
spark.stop();
}
@Disabled
@Test
void readTopics() throws IOException {
String topics = getClass()
.getResource("/eu/dnetlib/dhp/actionmanager/project/topics.json")
.getPath();
ReadTopics.readTopics(topics, workingDir.toString() + "/topics", fs);
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
JavaRDD<JsonTopic> tmp = sc
.textFile(workingDir.toString() + "/topics")
.map(item -> OBJECT_MAPPER.readValue(item, JsonTopic.class));
// Assertions.assertEquals(16, tmp.count());
JsonTopic topic = tmp.filter(t -> t.getProjectID().equals("886988")).first();
Assertions.assertEquals("Individual Fellowships", topic.getTitle());
Assertions.assertEquals("MSCA-IF-2019", topic.getTopic());
// tmp.foreach(p -> System.out.println(OBJECT_MAPPER.writeValueAsString(p)));
}
}

View File

@ -11,6 +11,7 @@ import org.apache.hadoop.io.Text;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.ForeachFunction;
import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.Row; import org.apache.spark.sql.Row;
@ -78,12 +79,12 @@ public class SparkUpdateProjectTest {
"-programmePath", "-programmePath",
getClass() getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/actionmanager/project/preparedProgramme_whole.json") "/eu/dnetlib/dhp/actionmanager/project/prepared_h2020_programme.json.gz")
.getPath(), .getPath(),
"-projectPath", "-projectPath",
getClass().getResource("/eu/dnetlib/dhp/actionmanager/project/prepared_projects.json").getPath(), getClass().getResource("/eu/dnetlib/dhp/actionmanager/project/prepared_projects.json.gz").getPath(),
"-topicPath", "-topicPath",
getClass().getResource("/eu/dnetlib/dhp/actionmanager/project/topic.json.gz").getPath(), getClass().getResource("/eu/dnetlib/dhp/actionmanager/project/topics_nld.json.gz").getPath(),
"-outputPath", "-outputPath",
workingDir.toString() + "/actionSet" workingDir.toString() + "/actionSet"
}); });
@ -266,6 +267,7 @@ public class SparkUpdateProjectTest {
.get(1) .get(1)
.getString(0) .getString(0)
.equals("H2020-EU.2.1.4.")); .equals("H2020-EU.2.1.4."));
Assertions Assertions
.assertTrue( .assertTrue(
execverification execverification

File diff suppressed because one or more lines are too long

View File

@ -1,277 +0,0 @@
{"code":"H2020-EU.5.g.","title":"Take due and proportional precautions in research and innovation activities by anticipating and assessing potential environmental, health and safety impacts","shortTitle":"","language":"en","classification":"SCIENCE WITH AND FOR SOCIETY | Take due and proportional precautions in research and innovation activities by anticipating and assessing potential environmental, health and safety impacts","classification_short":"Science with and for Society | Take due and proportional precautions in research and innovation activities by anticipating and assessing potential environmental, health and safety impacts"}
{"code":"H2020-EU.3.4.2.1.","title":"A substantial reduction of traffic congestion","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Better mobility, less congestion, more safety and security | A substantial reduction of traffic congestion","classification_short":"Societal Challenges | Transport | Mobility, safety and security | A substantial reduction of traffic congestion"}
{"code":"H2020-EU.3.4.5.4.","title":"ITD Airframe","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | CLEANSKY2 | ITD Airframe","classification_short":"Societal Challenges | Transport | CLEANSKY2 | ITD Airframe"}
{"code":"H2020-EU.3.3.8.1.","title":"Increase the electrical efficiency and the durability of the different fuel cells used for power production to levels which can compete with conventional technologies, while reducing costs","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | FCH2 (energy objectives) | Increase the electrical efficiency and the durability of the different fuel cells used for power production to levels which can compete with conventional technologies, while reducing costs","classification_short":"Societal Challenges | Energy | FCH2 (energy objectives) | Increase the electrical efficiency and the durability of the different fuel cells used for power production to levels which can compete with conventional technologies, while reducing costs"}
{"code":"H2020-EU.3.7.1.","title":"Fight crime, illegal trafficking and terrorism, including understanding and tackling terrorist ideas and beliefs","shortTitle":"","language":"en","classification":"Societal challenges | Secure societies - Protecting freedom and security of Europe and its citizens | Fight crime, illegal trafficking and terrorism, including understanding and tackling terrorist ideas and beliefs","classification_short":"Societal Challenges | Secure societies | Fight crime, illegal trafficking and terrorism, including understanding and tackling terrorist ideas and beliefs"}
{"code":"H2020-EU.3.4.1.1.","title":"Making aircraft, vehicles and vessels cleaner and quieter will improve environmental performance and reduce perceived noise and vibration","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Resource efficient transport that respects the environment | Making aircraft, vehicles and vessels cleaner and quieter will improve environmental performance and reduce perceived noise and vibration","classification_short":"Societal Challenges | Transport | Resource efficient transport that respects the environment | Making aircraft, vehicles and vessels cleaner and quieter will improve environmental performance and reduce perceived noise and vibration"}
{"code":"H2020-EU.1.4.3.","title":"Reinforcing European research infrastructure policy and international cooperation","shortTitle":"Research infrastructure policy and international cooperation","language":"en","classification":"Excellent science | Research Infrastructures | Reinforcing European research infrastructure policy and international cooperation","classification_short":"Excellent Science | Research Infrastructures | Research infrastructure policy and international cooperation"}
{"code":"H2020-EU.1.4.","title":"EXCELLENT SCIENCE - Research Infrastructures","shortTitle":"Research Infrastructures","language":"en","classification":"Excellent science | Research Infrastructures","classification_short":"Excellent Science | Research Infrastructures"}
{"code":"H2020-EU.3.4.6.1.","title":"Reduce the production cost of fuel cell systems to be used in transport applications, while increasing their lifetime to levels which can compete with conventional technologies","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | FCH2 (transport objectives) | Reduce the production cost of fuel cell systems to be used in transport applications, while increasing their lifetime to levels which can compete with conventional technologies","classification_short":"Societal Challenges | Transport | FCH2 (transport objectives) | Reduce the production cost of fuel cell systems to be used in transport applications, while increasing their lifetime to levels which can compete with conventional technologies"}
{"code":"H2020-EU.3.4.5.5.","title":"ITD Engines","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | CLEANSKY2 | ITD Engines","classification_short":"Societal Challenges | Transport | CLEANSKY2 | ITD Engines"}
{"code":"H2020-EU.2.1.1.7.3.","title":"Multi-disciplinary approaches for smart systems, supported by developments in holistic design and advanced manufacturing to realise self-reliant and adaptable smart systems having sophisticated interfaces and offering complex functionalities based on, for example, the seamless integration of sensing, actuating, processing, energy provision and networking","shortTitle":"","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Information and Communication Technologies (ICT) | ECSEL | Multi-disciplinary approaches for smart systems, supported by developments in holistic design and advanced manufacturing to realise self-reliant and adaptable smart systems having sophisticated interfaces and offering complex functionalities based on, for example, the seamless integration of sensing, actuating, processing, energy provision and networking","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Information and Communication Technologies | ECSEL | Multi-disciplinary approaches for smart systems, supported by developments in holistic design and advanced manufacturing to realise self-reliant and adaptable smart systems having sophisticated interfaces and offering complex functionalities based on, for example, the seamless integration of sensing, actuating, processing, energy provision and networking"}
{"code":"H2020-EU.3.1.6.1.","title":"Promoting integrated care","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Health care provision and integrated care | Promoting integrated care","classification_short":"Societal Challenges | Health | Health care provision and integrated care | Promoting integrated care"}
{"code":"H2020-EU.3.7.6.","title":"Ensure privacy and freedom, including in the Internet and enhance the societal, legal and ethical understanding of all areas of security, risk and management","shortTitle":"","language":"en","classification":"Societal challenges | Secure societies - Protecting freedom and security of Europe and its citizens | Ensure privacy and freedom, including in the Internet and enhance the societal, legal and ethical understanding of all areas of security, risk and management","classification_short":"Societal Challenges | Secure societies | Ensure privacy and freedom, including in the Internet and enhance the societal, legal and ethical understanding of all areas of security, risk and management"}
{"code":"H2020-EU.3.4.2.3.","title":"Developing new concepts of freight transport and logistics","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Better mobility, less congestion, more safety and security | Developing new concepts of freight transport and logistics","classification_short":"Societal Challenges | Transport | Mobility, safety and security | Developing new concepts of freight transport and logistics"}
{"code":"H2020-EU.3.3.2.1.","title":"Develop the full potential of wind energy","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Low-cost, low-carbon energy supply | Develop the full potential of wind energy","classification_short":"Societal Challenges | Energy | Low-cost, low-carbon energy supply | Develop the full potential of wind energy"}
{"code":"H2020-EU.3.2.5.","title":"Cross-cutting marine and maritime research","shortTitle":"Cross-cutting marine and maritime research","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Cross-cutting marine and maritime research","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Cross-cutting marine and maritime research"}
{"code":"H2020-EU.3.4.7.","title":"SESAR JU","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | SESAR JU","classification_short":"Societal Challenges | Transport | SESAR JU"}
{"code":"H2020-EU.2.1.3.3.","title":"Management of materials components","shortTitle":"Management of materials components","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced materials | Management of materials components","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced materials | Management of materials components"}
{"code":"H2020-EU.3.3.3.","title":"Alternative fuels and mobile energy sources","shortTitle":"Alternative fuels and mobile energy sources","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Alternative fuels and mobile energy sources","classification_short":"Societal Challenges | Energy | Alternative fuels and mobile energy sources"}
{"code":"H2020-EU.7.","title":"THE EUROPEAN INSTITUTE OF INNOVATION AND TECHNOLOGY (EIT)","shortTitle":"European Institute of Innovation and Technology (EIT)","language":"en","classification":"THE EUROPEAN INSTITUTE OF INNOVATION AND TECHNOLOGY (EIT)","classification_short":"European Institute of Innovation and Technology (EIT)"}
{"code":"H2020-EU.3.5.4.1.","title":"Strengthen eco-innovative technologies, processes, services and products including exploring ways to reduce the quantities of raw materials in production and consumption, and overcoming barriers in this context and boost their market uptake","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Enabling the transition towards a green economy and society through eco-innovation | Strengthen eco-innovative technologies, processes, services and products including exploring ways to reduce the quantities of raw materials in production and consumption, and overcoming barriers in this context and boost their market uptake","classification_short":"Societal Challenges | Climate and environment | A green economy and society through eco-innovation | Strengthen eco-innovative technologies, processes, services and products including exploring ways to reduce the quantities of raw materials in production and consumption, and overcoming barriers in this context and boost their market uptake"}
{"code":"H2020-EU.3.1.4.","title":"Active ageing and self-management of health","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Active ageing and self-management of health","classification_short":"Societal Challenges | Health | Active ageing and self-management of health"}
{"code":"H2020-EU.1.","title":"Excellent science","shortTitle":"Excellent Science","language":"en","classification":"Excellent science","classification_short":"Excellent Science"}
{"code":"H2020-EU.3.5.6.1.","title":"Identifying resilience levels via observations, monitoring and modelling","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Cultural heritage | Identifying resilience levels via observations, monitoring and modelling","classification_short":"Societal Challenges | Climate and environment | Cultural heritage | Identifying resilience levels via observations, monitoring and modelling"}
{"code":"H2020-EU.3.2.4.3.","title":"Supporting market development for bio-based products and processes","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable and competitive bio-based industries and supporting the development of a European bioeconomy | Supporting market development for bio-based products and processes","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Bio-based industries and supporting bio-economy | Supporting market development for bio-based products and processes"}
{"code":"H2020-EU.2.1.6.1.","title":"Enabling European competitiveness, non-dependence and innovation of the European space sector","shortTitle":"Competitiveness, non-dependence and innovation","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Space | Enabling European competitiveness, non-dependence and innovation of the European space sector","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Space | Competitiveness, non-dependence and innovation"}
{"code":"H2020-EU.4.b.","title":"Twinning of research institutions","shortTitle":"Twinning of research institutions","language":"en","classification":"SPREADING EXCELLENCE AND WIDENING PARTICIPATION | Twinning of research institutions","classification_short":"Spreading excellence and widening participation | Twinning of research institutions"}
{"code":"H2020-EU.3.1.7.6.","title":"Psychiatric diseases","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Psychiatric diseases","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Psychiatric diseases"}
{"code":"H2020-EU.3.1.2.2.","title":"Improving diagnosis and prognosis","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Preventing disease | Improving diagnosis and prognosis","classification_short":"Societal Challenges | Health | Preventing disease | Improving diagnosis and prognosis"}
{"code":"H2020-EU.3.4.5.3.","title":"IADP Fast Rotorcraft","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | CLEANSKY2 | IADP Fast Rotorcraft","classification_short":"Societal Challenges | Transport | CLEANSKY2 | IADP Fast Rotorcraft"}
{"code":"H2020-EU.3.1.3.1.","title":"Treating disease, including developing regenerative medicine","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Treating and managing disease | Treating disease, including developing regenerative medicine","classification_short":"Societal Challenges | Health | Treating and managing disease | Treating disease, including developing regenerative medicine"}
{"code":"H2020-EU.3.4.3.3.","title":"Advanced production processes","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Global leadership for the European transport industry | Advanced production processes","classification_short":"Societal Challenges | Transport | Global leadership for the European transport industry | Advanced production processes"}
{"code":"H2020-EU.3.1.7.","title":"Innovative Medicines Initiative 2 (IMI2)","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2)","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2)"}
{"code":"H2020-EU.3.6.3.2.","title":"Research into European countries' and regions' history, literature, art, philosophy and religions and how these have informed contemporary European diversity","shortTitle":"","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Reflective societies - cultural heritage and European identity | Research into European countries' and regions' history, literature, art, philosophy and religions and how these have informed contemporary European diversity","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Reflective societies | Research into European countries' and regions' history, literature, art, philosophy and religions and how these have informed contemporary European diversity"}
{"code":"H2020-EU.3.5.1.2.","title":"Assess impacts, vulnerabilities and develop innovative cost-effective adaptation and risk prevention and management measures","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Fighting and adapting to climate change | Assess impacts, vulnerabilities and develop innovative cost-effective adaptation and risk prevention and management measures","classification_short":"Societal Challenges | Climate and environment | Fighting and adapting to climate change | Assess impacts, vulnerabilities and develop innovative cost-effective adaptation and risk prevention and management measures"}
{"code":"H2020-EU.3.6.1.","title":"Inclusive societies","shortTitle":"Inclusive societies","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Inclusive societies","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Inclusive societies"}
{"code":"H2020-EU.3.2.","title":"SOCIETAL CHALLENGES - Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy","shortTitle":"Food, agriculture, forestry, marine research and bioeconomy","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy"}
{"code":"H2020-EU.2.1.6.1.2.","title":"Boost innovation between space and non-space sectors","shortTitle":"","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Space | Enabling European competitiveness, non-dependence and innovation of the European space sector | Boost innovation between space and non-space sectors","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Space | Competitiveness, non-dependence and innovation | Boost innovation between space and non-space sectors"}
{"code":"H2020-EU.2.1.3.","title":"INDUSTRIAL LEADERSHIP - Leadership in enabling and industrial technologies - Advanced materials","shortTitle":"Advanced materials","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced materials","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced materials"}
{"code":"H2020-EU.2.1.2.3.","title":"Developing the societal dimension of nanotechnology","shortTitle":"Societal dimension of nanotechnology","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Nanotechnologies | Developing the societal dimension of nanotechnology","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Nanotechnologies | Societal dimension of nanotechnology"}
{"code":"H2020-EU.4.","title":"SPREADING EXCELLENCE AND WIDENING PARTICIPATION","shortTitle":"Spreading excellence and widening participation","language":"en","classification":"SPREADING EXCELLENCE AND WIDENING PARTICIPATION","classification_short":"Spreading excellence and widening participation"}
{"code":"H2020-EU.3.6.1.2.","title":"Trusted organisations, practices, services and policies that are necessary to build resilient, inclusive, participatory, open and creative societies in Europe, in particular taking into account migration, integration and demographic change","shortTitle":"","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Inclusive societies | Trusted organisations, practices, services and policies that are necessary to build resilient, inclusive, participatory, open and creative societies in Europe, in particular taking into account migration, integration and demographic change","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Inclusive societies | Trusted organisations, practices, services and policies that are necessary to build resilient, inclusive, participatory, open and creative societies in Europe, in particular taking into account migration, integration and demographic change"}
{"code":"H2020-EU.3.4.2.","title":"Better mobility, less congestion, more safety and security","shortTitle":"Mobility, safety and security","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Better mobility, less congestion, more safety and security","classification_short":"Societal Challenges | Transport | Mobility, safety and security"}
{"code":"H2020-EU.3.1.7.13.","title":"Other","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Other","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Other"}
{"code":"H2020-EU.3.3.3.3.","title":"New alternative fuels","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Alternative fuels and mobile energy sources | New alternative fuels","classification_short":"Societal Challenges | Energy | Alternative fuels and mobile energy sources | New alternative fuels"}
{"code":"H2020-EU.2.1.3.5.","title":"Materials for creative industries, including heritage","shortTitle":"Materials for creative industries, including heritage","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced materials | Materials for creative industries, including heritage","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced materials | Materials for creative industries, including heritage"}
{"code":"H2020-EU.3.3.3.2.","title":"Reducing time to market for hydrogen and fuel cells technologies","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Alternative fuels and mobile energy sources | Reducing time to market for hydrogen and fuel cells technologies","classification_short":"Societal Challenges | Energy | Alternative fuels and mobile energy sources | Reducing time to market for hydrogen and fuel cells technologies"}
{"code":"H2020-EU.5.d.","title":"Encourage citizens to engage in science through formal and informal science education, and promote the diffusion of science-based activities, namely in science centres and through other appropriate channels","shortTitle":"","language":"en","classification":"SCIENCE WITH AND FOR SOCIETY | Encourage citizens to engage in science through formal and informal science education, and promote the diffusion of science-based activities, namely in science centres and through other appropriate channels","classification_short":"Science with and for Society | Encourage citizens to engage in science through formal and informal science education, and promote the diffusion of science-based activities, namely in science centres and through other appropriate channels"}
{"code":"H2020-EU.3.1.","title":"SOCIETAL CHALLENGES - Health, demographic change and well-being","shortTitle":"Health","language":"en","classification":"Societal challenges | Health, demographic change and well-being","classification_short":"Societal Challenges | Health"}
{"code":"H2020-EU.3.5.3.1.","title":"Improve the knowledge base on the availability of raw materials","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Ensuring the sustainable supply of non-energy and non-agricultural raw materials | Improve the knowledge base on the availability of raw materials","classification_short":"Societal Challenges | Climate and environment | Supply of non-energy and non-agricultural raw materials | Improve the knowledge base on the availability of raw materials"}
{"code":"H2020-EU.3.2.1.4.","title":"Sustainable forestry","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable agriculture and forestry | Sustainable forestry","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Sustainable agriculture and forestry | Sustainable forestry"}
{"code":"H2020-EU.3.3.","title":"SOCIETAL CHALLENGES - Secure, clean and efficient energy","shortTitle":"Energy","language":"en","classification":"Societal challenges | Secure, clean and efficient energy","classification_short":"Societal Challenges | Energy"}
{"code":"H2020-EU.3.4.8.1.","title":"Innovation Programme 1 (IP1): Cost-efficient and reliable trains","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Shift2Rail JU | Innovation Programme 1 (IP1): Cost-efficient and reliable trains","classification_short":"Societal Challenges | Transport | Shift2Rail JU | Innovation Programme 1 (IP1): Cost-efficient and reliable trains"}
{"code":"H2020-EU.2.3.2.1.","title":"Support for research intensive SMEs","shortTitle":"Support for research intensive SMEs","language":"en","classification":"Industrial leadership | Innovation In SMEs | Specific support | Support for research intensive SMEs","classification_short":"Industrial Leadership | Innovation in SMEs | Specific support | Support for research intensive SMEs"}
{"code":"H2020-EU.2.1.3.2.","title":"Materials development and transformation","shortTitle":"Materials development and transformation","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced materials | Materials development and transformation","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced materials | Materials development and transformation"}
{"code":"H2020-EU.1.4.1.3.","title":"Development, deployment and operation of ICT-based e-infrastructures","shortTitle":"","language":"en","classification":"Excellent science | Research Infrastructures | Developing the European research infrastructures for 2020 and beyond | Development, deployment and operation of ICT-based e-infrastructures","classification_short":"Excellent Science | Research Infrastructures | Research infrastructures for 2020 and beyond | Development, deployment and operation of ICT-based e-infrastructures"}
{"code":"H2020-EU.3.5.4.2.","title":"Support innovative policies and societal changes","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Enabling the transition towards a green economy and society through eco-innovation | Support innovative policies and societal changes","classification_short":"Societal Challenges | Climate and environment | A green economy and society through eco-innovation | Support innovative policies and societal changes"}
{"code":"H2020-EU.2.1.3.6.","title":"Metrology, characterisation, standardisation and quality control","shortTitle":"Metrology, characterisation, standardisation and quality control","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced materials | Metrology, characterisation, standardisation and quality control","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced materials | Metrology, characterisation, standardisation and quality control"}
{"code":"H2020-EU.3.4.5.8.","title":"ECO Transverse Area","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | CLEANSKY2 | ECO Transverse Area","classification_short":"Societal Challenges | Transport | CLEANSKY2 | ECO Transverse Area"}
{"code":"H2020-EU.5.f.","title":"Develop the governance for the advancement of responsible research and innovation by all stakeholders, which is sensitive to society needs and demands and promote an ethics framework for research and innovation","shortTitle":"","language":"en","classification":"SCIENCE WITH AND FOR SOCIETY | Develop the governance for the advancement of responsible research and innovation by all stakeholders, which is sensitive to society needs and demands and promote an ethics framework for research and innovation","classification_short":"Science with and for Society | Develop the governance for the advancement of responsible research and innovation by all stakeholders, which is sensitive to society needs and demands and promote an ethics framework for research and innovation"}
{"code":"H2020-EU.5.h.","title":"Improving knowledge on science communication in order to improve the quality and effectiveness of interactions between scientists, general media and the public","shortTitle":"","language":"en","classification":"SCIENCE WITH AND FOR SOCIETY | Improving knowledge on science communication in order to improve the quality and effectiveness of interactions between scientists, general media and the public","classification_short":"Science with and for Society | Improving knowledge on science communication in order to improve the quality and effectiveness of interactions between scientists, general media and the public"}
{"code":"H2020-EU.2.1.1.7.1.","title":"Design technologies, process and integration, equipment, materials and manufacturing for micro- and nanoelectronics while targeting miniaturisation, diversification and differentiation, heterogeneous integration","shortTitle":"","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Information and Communication Technologies (ICT) | ECSEL | Design technologies, process and integration, equipment, materials and manufacturing for micro- and nanoelectronics while targeting miniaturisation, diversification and differentiation, heterogeneous integration","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Information and Communication Technologies | ECSEL | Design technologies, process and integration, equipment, materials and manufacturing for micro- and nanoelectronics while targeting miniaturisation, diversification and differentiation, heterogeneous integration"}
{"code":"H2020-EU.3.7.5.","title":"Increase Europe's resilience to crises and disasters","shortTitle":"","language":"en","classification":"Societal challenges | Secure societies - Protecting freedom and security of Europe and its citizens | Increase Europe's resilience to crises and disasters","classification_short":"Societal Challenges | Secure societies | Increase Europe's resilience to crises and disasters"}
{"code":"H2020-EU.1.4.2.2.","title":"Strengthening the human capital of research infrastructures","shortTitle":"","language":"en","classification":"Excellent science | Research Infrastructures | Fostering the innovation potential of research infrastructures and their human resources | Strengthening the human capital of research infrastructures","classification_short":"Excellent Science | Research Infrastructures | Research infrastructures and their human resources | Strengthening the human capital of research infrastructures"}
{"code":"H2020-EU.3.4.1.2.","title":"Developing smart equipment, infrastructures and services","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Resource efficient transport that respects the environment | Developing smart equipment, infrastructures and services","classification_short":"Societal Challenges | Transport | Resource efficient transport that respects the environment | Developing smart equipment, infrastructures and services"}
{"code":"H2020-EU.2.3.2.2.","title":"Enhancing the innovation capacity of SMEs","shortTitle":"Enhancing the innovation capacity of SMEs","language":"en","classification":"Industrial leadership | Innovation In SMEs | Specific support | Enhancing the innovation capacity of SMEs","classification_short":"Industrial Leadership | Innovation in SMEs | Specific support | Enhancing the innovation capacity of SMEs"}
{"code":"H2020-EU.1.3.5.","title":"Specific support and policy actions","shortTitle":"MSCA Specific support","language":"en","classification":"Excellent science | Marie Skłodowska-Curie Actions | Specific support and policy actions","classification_short":"Excellent Science | Marie-Sklodowska-Curie Actions | MSCA Specific support"}
{"code":"H2020-EU.3.2.3.3.","title":"Boosting marine and maritime innovation through biotechnology","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Unlocking the potential of aquatic living resources | Boosting marine and maritime innovation through biotechnology","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Potential of aquatic living resources | Boosting marine and maritime innovation through biotechnology"}
{"code":"H2020-EU.3.2.1.2.","title":"Providing ecosystems services and public goods","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable agriculture and forestry | Providing ecosystems services and public goods","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Sustainable agriculture and forestry | Providing ecosystems services and public goods"}
{"code":"H2020-EU.2.3.2.3.","title":"Supporting market-driven innovation","shortTitle":"Supporting market-driven innovation","language":"en","classification":"Industrial leadership | Innovation In SMEs | Specific support | Supporting market-driven innovation","classification_short":"Industrial Leadership | Innovation in SMEs | Specific support | Supporting market-driven innovation"}
{"code":"H2020-EU.5.a.","title":"Make scientific and technological careers attractive to young students, and forster sustainable interaction between schools, research institutions, industry and civil society organisations","shortTitle":"","language":"en","classification":"SCIENCE WITH AND FOR SOCIETY | Make scientific and technological careers attractive to young students, and forster sustainable interaction between schools, research institutions, industry and civil society organisations","classification_short":"Science with and for Society | Make scientific and technological careers attractive to young students, and forster sustainable interaction between schools, research institutions, industry and civil society organisations"}
{"code":"H2020-EU.3.1.7.9.","title":"Ageing-associated diseases","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Ageing-associated diseases","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Ageing-associated diseases"}
{"code":"H2020-EU.2.2.1.","title":"The Debt facility providing debt finance for R&I: 'Union loan and guarantee service for research and innovation'","shortTitle":"Debt facility","language":"en","classification":"Industrial leadership | Access to risk finance | The Debt facility providing debt finance for R&I: 'Union loan and guarantee service for research and innovation'","classification_short":"Industrial Leadership | Access to risk finance | Debt facility"}
{"code":"H2020-Euratom-1.8.","title":"Ensure availability and use of research infrastructures of pan_european relevance","shortTitle":"","language":"en","classification":"Euratom | Indirect actions | Ensure availability and use of research infrastructures of pan_european relevance","classification_short":"Euratom | Indirect actions | Ensure availability and use of research infrastructures of pan_european relevance"}
{"code":"H2020-EU.3.2.2.1.","title":"Informed consumer choices","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable and competitive agri-food sector for a safe and healthy diet | Informed consumer choices","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Sustainable and competitive agri-food sector for a safe and healthy diet | Informed consumer choices"}
{"code":"H2020-EU.3.7.","title":"Secure societies - Protecting freedom and security of Europe and its citizens","shortTitle":"Secure societies","language":"en","classification":"Societal challenges | Secure societies - Protecting freedom and security of Europe and its citizens","classification_short":"Societal Challenges | Secure societies"}
{"code":"H2020-EU.1.3.4.","title":"Increasing structural impact by co-funding activities","shortTitle":"MSCA Co-funding","language":"en","classification":"Excellent science | Marie Skłodowska-Curie Actions | Increasing structural impact by co-funding activities","classification_short":"Excellent Science | Marie-Sklodowska-Curie Actions | MSCA Co-funding"}
{"code":"H2020-EU.2.1.","title":"INDUSTRIAL LEADERSHIP - Leadership in enabling and industrial technologies","shortTitle":"Leadership in enabling and industrial technologies (LEIT)","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT)"}
{"code":"H2020-EU.2.1.3.4.","title":"Materials for a sustainable, resource-efficient and low-emission industry","shortTitle":"Materials for a resource-efficient and low-emission industry","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced materials | Materials for a sustainable, resource-efficient and low-emission industry","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced materials | Materials for a resource-efficient and low-emission industry"}
{"code":"H2020-EU.3.4.5.7.","title":"Small Air Transport (SAT) Transverse Area","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | CLEANSKY2 | Small Air Transport (SAT) Transverse Area","classification_short":"Societal Challenges | Transport | CLEANSKY2 | Small Air Transport (SAT) Transverse Area"}
{"code":"H2020-EU.3.4.8.3.","title":"Innovation Programme 3: Cost Efficient and Reliable High Capacity Infrastructure","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Shift2Rail JU | Innovation Programme 3: Cost Efficient and Reliable High Capacity Infrastructure","classification_short":"Societal Challenges | Transport | Shift2Rail JU | Innovation Programme 3: Cost Efficient and Reliable High Capacity Infrastructure"}
{"code":"H2020-Euratom-1.1.","title":"Support safe operation of nuclear systems","shortTitle":"","language":"en","classification":"Euratom | Indirect actions | Support safe operation of nuclear systems","classification_short":"Euratom | Indirect actions | Support safe operation of nuclear systems"}
{"code":"H2020-EU.2.3.1.","title":" Mainstreaming SME support, especially through a dedicated instrument","shortTitle":"Mainstreaming SME support","language":"en","classification":"Industrial leadership | Innovation In SMEs | Mainstreaming SME support, especially through a dedicated instrument","classification_short":"Industrial Leadership | Innovation in SMEs | Mainstreaming SME support"}
{"code":"H2020-EU.1.4.3.1.","title":"Reinforcing European policy for research infrastructures","shortTitle":"","language":"en","classification":"Excellent science | Research Infrastructures | Reinforcing European research infrastructure policy and international cooperation | Reinforcing European policy for research infrastructures","classification_short":"Excellent Science | Research Infrastructures | Research infrastructure policy and international cooperation | Reinforcing European policy for research infrastructures"}
{"code":"H2020-Euratom-1.3.","title":"Support the development and sustainability of nuclear competences at Union level","shortTitle":"","language":"en","classification":"Euratom | Indirect actions | Support the development and sustainability of nuclear competences at Union level","classification_short":"Euratom | Indirect actions | Support the development and sustainability of nuclear competences at Union level"}
{"code":"H2020-EU.3.1.7.1.","title":"Antimicrobial resistance","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Antimicrobial resistance","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Antimicrobial resistance"}
{"code":"H2020-EU.3.7.4.","title":"Improve cyber security","shortTitle":"","language":"en","classification":"Societal challenges | Secure societies - Protecting freedom and security of Europe and its citizens | Improve cyber security","classification_short":"Societal Challenges | Secure societies | Improve cyber security"}
{"code":"H2020-EU.2.1.1.7.2.","title":"Processes, methods, tools and platforms, reference designs and architectures, for software and/or control-intensive embedded/cyber-physical systems, addressing seamless connectivity and interoperability, functional safety, high availability, and security for professional and consumer type applications, and connected services","shortTitle":"","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Information and Communication Technologies (ICT) | ECSEL | Processes, methods, tools and platforms, reference designs and architectures, for software and/or control-intensive embedded/cyber-physical systems, addressing seamless connectivity and interoperability, functional safety, high availability, and security for professional and consumer type applications, and connected services","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Information and Communication Technologies | ECSEL | Processes, methods, tools and platforms, reference designs and architectures, for software and/or control-intensive embedded/cyber-physical systems, addressing seamless connectivity and interoperability, functional safety, high availability, and security for professional and consumer type applications, and connected services"}
{"code":"H2020-EU.3.5.4.","title":"Enabling the transition towards a green economy and society through eco-innovation","shortTitle":"A green economy and society through eco-innovation","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Enabling the transition towards a green economy and society through eco-innovation","classification_short":"Societal Challenges | Climate and environment | A green economy and society through eco-innovation"}
{"code":"H2020-EU.3.5.3.2.","title":"Promote the sustainable supply and use of raw materials, including mineral resources, from land and sea, covering exploration, extraction, processing, re-use, recycling and recovery","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Ensuring the sustainable supply of non-energy and non-agricultural raw materials | Promote the sustainable supply and use of raw materials, including mineral resources, from land and sea, covering exploration, extraction, processing, re-use, recycling and recovery","classification_short":"Societal Challenges | Climate and environment | Supply of non-energy and non-agricultural raw materials | Promote the sustainable supply and use of raw materials, including mineral resources, from land and sea, covering exploration, extraction, processing, re-use, recycling and recovery"}
{"code":"H2020-EU.3.4.5.10.","title":"Thematic Topics","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | CLEANSKY2 | Thematic Topics","classification_short":"Societal Challenges | Transport | CLEANSKY2 | Thematic Topics"}
{"code":"H2020-EU.3.1.5.1.","title":"Improving halth information and better use of health data","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Methods and data | Improving halth information and better use of health data","classification_short":"Societal Challenges | Health | Methods and data | Improving halth information and better use of health data"}
{"code":"H2020-EU.3.3.3.1.","title":"Make bio-energy more competitive and sustainable","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Alternative fuels and mobile energy sources | Make bio-energy more competitive and sustainable","classification_short":"Societal Challenges | Energy | Alternative fuels and mobile energy sources | Make bio-energy more competitive and sustainable"}
{"code":"H2020-EU.3.6.2.1.","title":"Strengthen the evidence base and support for the Innovation Union and ERA","shortTitle":"","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Innovative societies | Strengthen the evidence base and support for the Innovation Union and ERA","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Innovative societies | Strengthen the evidence base and support for the Innovation Union and ERA"}
{"code":"H2020-EU.3.1.7.12.","title":"Vaccine","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Vaccine","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Vaccine"}
{"code":"H2020-EU.3.5.4.3.","title":"Measure and assess progress towards a green economy","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Enabling the transition towards a green economy and society through eco-innovation | Measure and assess progress towards a green economy","classification_short":"Societal Challenges | Climate and environment | A green economy and society through eco-innovation | Measure and assess progress towards a green economy"}
{"code":"H2020-EU.3.4.8.5.","title":"Innovation Programme 5: Technologies for sustainable and attractive European rail freight","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Shift2Rail JU | Innovation Programme 5: Technologies for sustainable and attractive European rail freight","classification_short":"Societal Challenges | Transport | Shift2Rail JU | Innovation Programme 5: Technologies for sustainable and attractive European rail freight"}
{"code":"H2020-EU.3.5.4.4.","title":"Foster resource efficiency through digital systems","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Enabling the transition towards a green economy and society through eco-innovation | Foster resource efficiency through digital systems","classification_short":"Societal Challenges | Climate and environment | A green economy and society through eco-innovation | Foster resource efficiency through digital systems"}
{"code":"H2020-EU.3.3.8.3.","title":"Demonstrate on a large scale the feasibility of using hydrogen to support integration of renewable energy sources into the energy systems, including through its use as a competitive energy storage medium for electricity produced from renewable energy sources","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | FCH2 (energy objectives) | Demonstrate on a large scale the feasibility of using hydrogen to support integration of renewable energy sources into the energy systems, including through its use as a competitive energy storage medium for electricity produced from renewable energy sources","classification_short":"Societal Challenges | Energy | FCH2 (energy objectives) | Demonstrate on a large scale the feasibility of using hydrogen to support integration of renewable energy sources into the energy systems, including through its use as a competitive energy storage medium for electricity produced from renewable energy sources"}
{"code":"H2020-Euratom","title":"Euratom","shortTitle":"","language":"en","classification":"Euratom","classification_short":"Euratom"}
{"code":"H2020-EU.3.5.6.2.","title":"Providing for a better understanding on how communities perceive and respond to climate change and seismic and volcanic hazards","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Cultural heritage | Providing for a better understanding on how communities perceive and respond to climate change and seismic and volcanic hazards","classification_short":"Societal Challenges | Climate and environment | Cultural heritage | Providing for a better understanding on how communities perceive and respond to climate change and seismic and volcanic hazards"}
{"code":"H2020-EU.3.2.5.2.","title":"Develop the potential of marine resources through an integrated approach","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Cross-cutting marine and maritime research | Develop the potential of marine resources through an integrated approach","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Cross-cutting marine and maritime research | Develop the potential of marine resources through an integrated approach"}
{"code":"H2020-EU.2.1.1.5.","title":"Advanced interfaces and robots: Robotics and smart spaces","shortTitle":"","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Information and Communication Technologies (ICT) | Advanced interfaces and robots: Robotics and smart spaces","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Information and Communication Technologies | Advanced interfaces and robots: Robotics and smart spaces"}
{"code":"H2020-EU.3.3.5.","title":"New knowledge and technologies","shortTitle":"New knowledge and technologies","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | New knowledge and technologies","classification_short":"Societal Challenges | Energy | New knowledge and technologies"}
{"code":"H2020-EU.1.2.2.","title":"FET Proactive","shortTitle":"FET Proactive","language":"en","classification":"Excellent science | Future and Emerging Technologies (FET) | FET Proactive","classification_short":"Excellent Science | Future and Emerging Technologies (FET) | FET Proactive"}
{"code":"H2020-EU.3.6.1.3.","title":"Europe's role as a global actor, notably regarding human rights and global justice","shortTitle":"","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Inclusive societies | Europe's role as a global actor, notably regarding human rights and global justice","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Inclusive societies | Europe's role as a global actor, notably regarding human rights and global justice"}
{"code":"H2020-EU.2.1.4.1.","title":"Boosting cutting-edge biotechnologies as a future innovation driver","shortTitle":"Cutting-edge biotechnologies as future innovation driver","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Biotechnology | Boosting cutting-edge biotechnologies as a future innovation driver","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Biotechnology | Cutting-edge biotechnologies as future innovation driver"}
{"code":"H2020-EU.3.1.3.","title":"Treating and managing disease","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Treating and managing disease","classification_short":"Societal Challenges | Health | Treating and managing disease"}
{"code":"H2020-EU.3.3.4.","title":"A single, smart European electricity grid","shortTitle":"A single, smart European electricity grid","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | A single, smart European electricity grid","classification_short":"Societal Challenges | Energy | A single, smart European electricity grid"}
{"code":"H2020-EU.3.2.6.","title":"Bio-based Industries Joint Technology Initiative (BBI-JTI)","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Bio-based Industries Joint Technology Initiative (BBI-JTI)","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Bio-based Industries Joint Technology Initiative (BBI-JTI)"}
{"code":"H2020-EU.1.3.2.","title":"Nurturing excellence by means of cross-border and cross-sector mobility","shortTitle":"MSCA Mobility","language":"en","classification":"Excellent science | Marie Skłodowska-Curie Actions | Nurturing excellence by means of cross-border and cross-sector mobility","classification_short":"Excellent Science | Marie-Sklodowska-Curie Actions | MSCA Mobility"}
{"code":"H2020-EU.2.1.3.7.","title":"Optimisation of the use of materials","shortTitle":"Optimisation of the use of materials","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced materials | Optimisation of the use of materials","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced materials | Optimisation of the use of materials"}
{"code":"H2020-EU.2.1.2.4.","title":"Efficient and sustainable synthesis and manufacturing of nanomaterials, components and systems","shortTitle":"Synthesis and manufacturing of nanomaterials, components and systems","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Nanotechnologies | Efficient and sustainable synthesis and manufacturing of nanomaterials, components and systems","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Nanotechnologies | Synthesis and manufacturing of nanomaterials, components and systems"}
{"code":"H2020-EU.1.4.1.","title":"Developing the European research infrastructures for 2020 and beyond","shortTitle":"Research infrastructures for 2020 and beyond","language":"en","classification":"Excellent science | Research Infrastructures | Developing the European research infrastructures for 2020 and beyond","classification_short":"Excellent Science | Research Infrastructures | Research infrastructures for 2020 and beyond"}
{"code":"H2020-EU.3.1.1.1.","title":"Understanding the determinants of health, improving health promotion and disease prevention","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Understanding health, wellbeing and disease | Understanding the determinants of health, improving health promotion and disease prevention","classification_short":"Societal Challenges | Health | Understanding health, wellbeing and disease | Understanding the determinants of health, improving health promotion and disease prevention"}
{"code":"H2020-EU.5.c.","title":"Integrate society in science and innovation issues, policies and activities in order to integrate citizens' interests and values and to increase the quality, relevance, social acceptability and sustainability of research and innovation outcomes in various fields of activity from social innovation to areas such as biotechnology and nanotechnology","shortTitle":"","language":"en","classification":"SCIENCE WITH AND FOR SOCIETY | Integrate society in science and innovation issues, policies and activities in order to integrate citizens' interests and values and to increase the quality, relevance, social acceptability and sustainability of research and innovation outcomes in various fields of activity from social innovation to areas such as biotechnology and nanotechnology","classification_short":"Science with and for Society | Integrate society in science and innovation issues, policies and activities in order to integrate citizens' interests and values and to increase the quality, relevance, social acceptability and sustainability of research and innovation outcomes in various fields of activity from social innovation to areas such as biotechnology and nanotechnology"}
{"code":"H2020-EU.5.","title":"SCIENCE WITH AND FOR SOCIETY","shortTitle":"Science with and for Society","language":"en","classification":"SCIENCE WITH AND FOR SOCIETY","classification_short":"Science with and for Society"}
{"code":"H2020-EU.3.5.3.3.","title":"Find alternatives for critical raw materials","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Ensuring the sustainable supply of non-energy and non-agricultural raw materials | Find alternatives for critical raw materials","classification_short":"Societal Challenges | Climate and environment | Supply of non-energy and non-agricultural raw materials | Find alternatives for critical raw materials"}
{"code":"H2020-EU.3.2.3.1.","title":"Developing sustainable and environmentally-friendly fisheries","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Unlocking the potential of aquatic living resources | Developing sustainable and environmentally-friendly fisheries","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Potential of aquatic living resources | Developing sustainable and environmentally-friendly fisheries"}
{"code":"H2020-EU.2.1.2.","title":"INDUSTRIAL LEADERSHIP - Leadership in enabling and industrial technologies Nanotechnologies","shortTitle":"Nanotechnologies","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Nanotechnologies","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Nanotechnologies"}
{"code":"H2020-EU.3.4.3.2.","title":"On board, smart control systems","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Global leadership for the European transport industry | On board, smart control systems","classification_short":"Societal Challenges | Transport | Global leadership for the European transport industry | On board, smart control systems"}
{"code":"H2020-EU.3.2.4.1.","title":"Fostering the bio-economy for bio-based industries","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable and competitive bio-based industries and supporting the development of a European bioeconomy | Fostering the bio-economy for bio-based industries","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Bio-based industries and supporting bio-economy | Fostering the bio-economy for bio-based industries"}
{"code":"H2020-EU.3.1.6.2.","title":"Optimising the efficiency and effectiveness of healthcare provision and reducing inequalities by evidence based decision making and dissemination of best practice, and innovative technologies and approaches","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Health care provision and integrated care | Optimising the efficiency and effectiveness of healthcare provision and reducing inequalities by evidence based decision making and dissemination of best practice, and innovative technologies and approaches","classification_short":"Societal Challenges | Health | Health care provision and integrated care | Optimising the efficiency and effectiveness of healthcare provision and reducing inequalities by evidence based decision making and dissemination of best practice, and innovative technologies and approaches"}
{"code":"H2020-EU.2.1.5.","title":"INDUSTRIAL LEADERSHIP - Leadership in enabling and industrial technologies - Advanced manufacturing and processing","shortTitle":"Advanced manufacturing and processing","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced manufacturing and processing","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced manufacturing and processing"}
{"code":"H2020-EU.3.5.2.2.","title":"Developing integrated approaches to address water-related challenges and the transition to sustainable management and use of water resources and services","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Protection of the environment, sustainable management of natural resources, water, biodiversity and ecosystems | Developing integrated approaches to address water-related challenges and the transition to sustainable management and use of water resources and services","classification_short":"Societal Challenges | Climate and environment | Protection of the environment | Developing integrated approaches to address water-related challenges and the transition to sustainable management and use of water resources and services"}
{"code":"H2020-EU.3.1.7.3.","title":"Cardiovascular diseases","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Cardiovascular diseases","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Cardiovascular diseases"}
{"code":"H2020-EU.3.3.8.2.","title":"Increase the energy efficiency of production of hydrogen mainly from water electrolysis and renewable sources while reducing operating and capital costs, so that the combined system of the hydrogen production and the conversion using the fuel cell system can compete with the alternatives for electricity production available on the market","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | FCH2 (energy objectives) | Increase the energy efficiency of production of hydrogen mainly from water electrolysis and renewable sources while reducing operating and capital costs, so that the combined system of the hydrogen production and the conversion using the fuel cell system can compete with the alternatives for electricity production available on the market","classification_short":"Societal Challenges | Energy | FCH2 (energy objectives) | Increase the energy efficiency of production of hydrogen mainly from water electrolysis and renewable sources while reducing operating and capital costs, so that the combined system of the hydrogen production and the conversion using the fuel cell system can compete with the alternatives for electricity production available on the market"}
{"code":"H2020-EU.2.1.6.3.","title":"Enabling exploitation of space data","shortTitle":"Enabling exploitation of space data","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Space | Enabling exploitation of space data","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Space | Enabling exploitation of space data"}
{"code":"H2020-EU.2.1.2.5.","title":"Developing and standardisation of capacity-enhancing techniques, measuring methods and equipment","shortTitle":"Capacity-enhancing techniques, measuring methods and equipment","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Nanotechnologies | Developing and standardisation of capacity-enhancing techniques, measuring methods and equipment","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Nanotechnologies | Capacity-enhancing techniques, measuring methods and equipment"}
{"code":"H2020-EU.3.6.2.","title":"Innovative societies","shortTitle":"Innovative societies","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Innovative societies","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Innovative societies"}
{"code":"H2020-EU.3.1.2.1.","title":"Developing effective prevention and screening programmes and improving the assessment of disease susceptibility","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Preventing disease | Developing effective prevention and screening programmes and improving the assessment of disease susceptibility","classification_short":"Societal Challenges | Health | Preventing disease | Developing effective prevention and screening programmes and improving the assessment of disease susceptibility"}
{"code":"H2020-EU.3.6.1.4.","title":"The promotion of sustainable and inclusive environments through innovative spatial and urban planning and design","shortTitle":"","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Inclusive societies | The promotion of sustainable and inclusive environments through innovative spatial and urban planning and design","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Inclusive societies | The promotion of sustainable and inclusive environments through innovative spatial and urban planning and design"}
{"code":"H2020-EU.3.3.2.4.","title":"Develop geothermal, hydro, marine and other renewable energy options","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Low-cost, low-carbon energy supply | Develop geothermal, hydro, marine and other renewable energy options","classification_short":"Societal Challenges | Energy | Low-cost, low-carbon energy supply | Develop geothermal, hydro, marine and other renewable energy options"}
{"code":"H2020-EU.5.b.","title":"Promote gender equality in particular by supporting structural change in the organisation of research institutions and in the content and design of research activities","shortTitle":"","language":"en","classification":"SCIENCE WITH AND FOR SOCIETY | Promote gender equality in particular by supporting structural change in the organisation of research institutions and in the content and design of research activities","classification_short":"Science with and for Society | Promote gender equality in particular by supporting structural change in the organisation of research institutions and in the content and design of research activities"}
{"code":"H2020-EU.1.3.3.","title":"Stimulating innovation by means of cross-fertilisation of knowledge","shortTitle":"MSCA Knowledge","language":"en","classification":"Excellent science | Marie Skłodowska-Curie Actions | Stimulating innovation by means of cross-fertilisation of knowledge","classification_short":"Excellent Science | Marie-Sklodowska-Curie Actions | MSCA Knowledge"}
{"code":"H2020-EU.3.1.4.2.","title":"Individual awareness and empowerment for self-management of health","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Active ageing and self-management of health | Individual awareness and empowerment for self-management of health","classification_short":"Societal Challenges | Health | Active ageing and self-management of health | Individual awareness and empowerment for self-management of health"}
{"code":"H2020-EU.3.1.7.8.","title":"Immune-mediated diseases","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Immune-mediated diseases","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Immune-mediated diseases"}
{"code":"H2020-EU.3.4.","title":"SOCIETAL CHALLENGES - Smart, Green And Integrated Transport","shortTitle":"Transport","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport","classification_short":"Societal Challenges | Transport"}
{"code":"H2020-EU.3.2.6.1.","title":"Sustainable and competitive bio-based industries and supporting the development of a European bio-economy","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Bio-based Industries Joint Technology Initiative (BBI-JTI) | Sustainable and competitive bio-based industries and supporting the development of a European bio-economy","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Bio-based Industries Joint Technology Initiative (BBI-JTI) | Sustainable and competitive bio-based industries and supporting the development of a European bio-economy"}
{"code":"H2020-EU.2.1.2.1.","title":"Developing next generation nanomaterials, nanodevices and nanosystems ","shortTitle":"Next generation nanomaterials, nanodevices and nanosystems","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Nanotechnologies | Developing next generation nanomaterials, nanodevices and nanosystems ","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Nanotechnologies | Next generation nanomaterials, nanodevices and nanosystems"}
{"code":"H2020-Euratom-1.5.","title":"Move toward demonstration of feasibility of fusion as a power source by exploiting existing and future fusion facilities","shortTitle":"","language":"en","classification":"Euratom | Indirect actions | Move toward demonstration of feasibility of fusion as a power source by exploiting existing and future fusion facilities","classification_short":"Euratom | Indirect actions | Move toward demonstration of feasibility of fusion as a power source by exploiting existing and future fusion facilities"}
{"code":"H2020-EU.3.5.","title":"SOCIETAL CHALLENGES - Climate action, Environment, Resource Efficiency and Raw Materials","shortTitle":"Climate and environment","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials","classification_short":"Societal Challenges | Climate and environment"}
{"code":"H2020-EU.2.1.1.6.","title":"Micro- and nanoelectronics and photonics: Key enabling technologies related to micro- and nanoelectronics and to photonics, covering also quantum technologies","shortTitle":"","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Information and Communication Technologies (ICT) | Micro- and nanoelectronics and photonics: Key enabling technologies related to micro- and nanoelectronics and to photonics, covering also quantum technologies","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Information and Communication Technologies | Micro- and nanoelectronics and photonics: Key enabling technologies related to micro- and nanoelectronics and to photonics, covering also quantum technologies"}
{"code":"H2020-EU.3.4.2.4.","title":"Reducing accident rates, fatalities and casualties and improving security","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Better mobility, less congestion, more safety and security | Reducing accident rates, fatalities and casualties and improving security","classification_short":"Societal Challenges | Transport | Mobility, safety and security | Reducing accident rates, fatalities and casualties and improving security"}
{"code":"H2020-EU.3.6.2.2.","title":"Explore new forms of innovation, with special emphasis on social innovation and creativity and understanding how all forms of innovation are developed, succeed or fail","shortTitle":"","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Innovative societies | Explore new forms of innovation, with special emphasis on social innovation and creativity and understanding how all forms of innovation are developed, succeed or fail","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Innovative societies | Explore new forms of innovation, with special emphasis on social innovation and creativity and understanding how all forms of innovation are developed, succeed or fail"}
{"code":"H2020-EU.3.5.1.1.","title":"Improve the understanding of climate change and the provision of reliable climate projections","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Fighting and adapting to climate change | Improve the understanding of climate change and the provision of reliable climate projections","classification_short":"Societal Challenges | Climate and environment | Fighting and adapting to climate change | Improve the understanding of climate change and the provision of reliable climate projections"}
{"code":"H2020-EU.3.4.3.4.","title":"Exploring entirely new transport concepts","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Global leadership for the European transport industry | Exploring entirely new transport concepts","classification_short":"Societal Challenges | Transport | Global leadership for the European transport industry | Exploring entirely new transport concepts"}
{"code":"H2020-EU.3.5.2.1.","title":"Further our understanding of biodiversity and the functioning of ecosystems, their interactions with social systems and their role in sustaining the economy and human well-being","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Protection of the environment, sustainable management of natural resources, water, biodiversity and ecosystems | Further our understanding of biodiversity and the functioning of ecosystems, their interactions with social systems and their role in sustaining the economy and human well-being","classification_short":"Societal Challenges | Climate and environment | Protection of the environment | Further our understanding of biodiversity and the functioning of ecosystems, their interactions with social systems and their role in sustaining the economy and human well-being"}
{"code":"H2020-EU.3.2.2.3.","title":"A sustainable and competitive agri-food industry","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable and competitive agri-food sector for a safe and healthy diet | A sustainable and competitive agri-food industry","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Sustainable and competitive agri-food sector for a safe and healthy diet | A sustainable and competitive agri-food industry"}
{"code":"H2020-EU.1.4.1.1.","title":"Developing new world-class research infrastructures","shortTitle":"","language":"en","classification":"Excellent science | Research Infrastructures | Developing the European research infrastructures for 2020 and beyond | Developing new world-class research infrastructures","classification_short":"Excellent Science | Research Infrastructures | Research infrastructures for 2020 and beyond | Developing new world-class research infrastructures"}
{"code":"H2020-EU.3.1.2.3.","title":"Developing better preventive and therapeutic vaccines","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Preventing disease | Developing better preventive and therapeutic vaccines","classification_short":"Societal Challenges | Health | Preventing disease | Developing better preventive and therapeutic vaccines"}
{"code":"H2020-EU.1.4.3.2.","title":"Facilitate strategic international cooperation","shortTitle":"","language":"en","classification":"Excellent science | Research Infrastructures | Reinforcing European research infrastructure policy and international cooperation | Facilitate strategic international cooperation","classification_short":"Excellent Science | Research Infrastructures | Research infrastructure policy and international cooperation | Facilitate strategic international cooperation"}
{"code":"H2020-EU.3.5.2.","title":"Protection of the environment, sustainable management of natural resources, water, biodiversity and ecosystems","shortTitle":"Protection of the environment","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Protection of the environment, sustainable management of natural resources, water, biodiversity and ecosystems","classification_short":"Societal Challenges | Climate and environment | Protection of the environment"}
{"code":"H2020-Euratom-1.9.","title":"European Fusion Development Agreement","shortTitle":"","language":"en","classification":"Euratom | Indirect actions | European Fusion Development Agreement","classification_short":"Euratom | Indirect actions | European Fusion Development Agreement"}
{"code":"H2020-EU.3.2.1.1.","title":"Increasing production efficiency and coping with climate change, while ensuring sustainability and resilience","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable agriculture and forestry | Increasing production efficiency and coping with climate change, while ensuring sustainability and resilience","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Sustainable agriculture and forestry | Increasing production efficiency and coping with climate change, while ensuring sustainability and resilience"}
{"code":"H2020-EU.3.2.2.2.","title":"Healthy and safe foods and diets for all","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable and competitive agri-food sector for a safe and healthy diet | Healthy and safe foods and diets for all","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Sustainable and competitive agri-food sector for a safe and healthy diet | Healthy and safe foods and diets for all"}
{"code":"H2020-EU.2.1.4.2.","title":"Bio-technology based industrial products and processes","shortTitle":"Bio-technology based industrial products and processes","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Biotechnology | Bio-technology based industrial products and processes","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Biotechnology | Bio-technology based industrial products and processes"}
{"code":"H2020-EU.3.4.5.1.","title":"IADP Large Passenger Aircraft","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | CLEANSKY2 | IADP Large Passenger Aircraft","classification_short":"Societal Challenges | Transport | CLEANSKY2 | IADP Large Passenger Aircraft"}
{"code":"H2020-EU.3.1.1.3.","title":"Improving surveillance and preparedness","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Understanding health, wellbeing and disease | Improving surveillance and preparedness","classification_short":"Societal Challenges | Health | Understanding health, wellbeing and disease | Improving surveillance and preparedness"}
{"code":"H2020-EU.2.1.6.","title":"INDUSTRIAL LEADERSHIP - Leadership in enabling and industrial technologies Space","shortTitle":"Space","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Space","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Space"}
{"code":"H2020-EU.3.1.5.2.","title":"Improving scientific tools and methods to support policy making and regulatory needs","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Methods and data | Improving scientific tools and methods to support policy making and regulatory needs","classification_short":"Societal Challenges | Health | Methods and data | Improving scientific tools and methods to support policy making and regulatory needs"}
{"code":"H2020-EU.3.","title":"Societal challenges","shortTitle":"Societal Challenges","language":"en","classification":"Societal challenges","classification_short":"Societal Challenges"}
{"code":"H2020-EU.1.3.","title":"EXCELLENT SCIENCE - Marie Skłodowska-Curie Actions","shortTitle":"Marie-Sklodowska-Curie Actions","language":"en","classification":"Excellent science | Marie Skłodowska-Curie Actions","classification_short":"Excellent Science | Marie-Sklodowska-Curie Actions"}
{"code":"H2020-EU.4.f.","title":"Strengthening the administrative and operational capacity of transnational networks of National Contact Points","shortTitle":"","language":"en","classification":"SPREADING EXCELLENCE AND WIDENING PARTICIPATION | Strengthening the administrative and operational capacity of transnational networks of National Contact Points","classification_short":"Spreading excellence and widening participation | Strengthening the administrative and operational capacity of transnational networks of National Contact Points"}
{"code":"H2020-EU.1.2.","title":"EXCELLENT SCIENCE - Future and Emerging Technologies (FET)","shortTitle":"Future and Emerging Technologies (FET)","language":"en","classification":"Excellent science | Future and Emerging Technologies (FET)","classification_short":"Excellent Science | Future and Emerging Technologies (FET)"}
{"code":"H2020-EU.3.3.1.1.","title":"Bring to mass market technologies and services for a smart and efficient energy use","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Reducing energy consumption and carbon foorpint by smart and sustainable use | Bring to mass market technologies and services for a smart and efficient energy use","classification_short":"Societal Challenges | Energy | Reducing energy consumption and carbon footprint | Bring to mass market technologies and services for a smart and efficient energy use"}
{"code":"H2020-EU.3.3.2.2.","title":"Develop efficient, reliable and cost-competitive solar energy systems","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Low-cost, low-carbon energy supply | Develop efficient, reliable and cost-competitive solar energy systems","classification_short":"Societal Challenges | Energy | Low-cost, low-carbon energy supply | Develop efficient, reliable and cost-competitive solar energy systems"}
{"code":"H2020-EU.4.c.","title":"Establishing ERA Chairs","shortTitle":"ERA chairs","language":"en","classification":"SPREADING EXCELLENCE AND WIDENING PARTICIPATION | Establishing ERA Chairs","classification_short":"Spreading excellence and widening participation | ERA chairs"}
{"code":"H2020-EU.3.4.5","title":"CLEANSKY2","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | CLEANSKY2","classification_short":"Societal Challenges | Transport | CLEANSKY2"}
{"code":"H2020-EU.3.4.5.2.","title":"IADP Regional Aircraft","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | CLEANSKY2 | IADP Regional Aircraft","classification_short":"Societal Challenges | Transport | CLEANSKY2 | IADP Regional Aircraft"}
{"code":"H2020-EU.3.5.1.","title":"Fighting and adapting to climate change","shortTitle":"Fighting and adapting to climate change","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Fighting and adapting to climate change","classification_short":"Societal Challenges | Climate and environment | Fighting and adapting to climate change"}
{"code":"H2020-EU.3.3.1.","title":"Reducing energy consumption and carbon foorpint by smart and sustainable use","shortTitle":"Reducing energy consumption and carbon footprint","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Reducing energy consumption and carbon foorpint by smart and sustainable use","classification_short":"Societal Challenges | Energy | Reducing energy consumption and carbon footprint"}
{"code":"H2020-EU.3.4.1.","title":"Resource efficient transport that respects the environment","shortTitle":"Resource efficient transport that respects the environment","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Resource efficient transport that respects the environment","classification_short":"Societal Challenges | Transport | Resource efficient transport that respects the environment"}
{"code":"H2020-EU.3.2.6.2.","title":"Fostering the bio-economy for bio-based industrie","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Bio-based Industries Joint Technology Initiative (BBI-JTI) | Fostering the bio-economy for bio-based industrie","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Bio-based Industries Joint Technology Initiative (BBI-JTI) | Fostering the bio-economy for bio-based industrie"}
{"code":"H2020-EU.3.4.7.1","title":"Exploratory Research","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | SESAR JU | Exploratory Research","classification_short":"Societal Challenges | Transport | SESAR JU | Exploratory Research"}
{"code":"H2020-EU.1.2.1.","title":"FET Open","shortTitle":"FET Open","language":"en","classification":"Excellent science | Future and Emerging Technologies (FET) | FET Open","classification_short":"Excellent Science | Future and Emerging Technologies (FET) | FET Open"}
{"code":"H2020-EU.3.4.3.1.","title":"Developing the next generation of transport means as the way to secure market share in the future","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Global leadership for the European transport industry | Developing the next generation of transport means as the way to secure market share in the future","classification_short":"Societal Challenges | Transport | Global leadership for the European transport industry | Developing the next generation of transport means as the way to secure market share in the future"}
{"code":"H2020-EU.3.2.4.","title":"Sustainable and competitive bio-based industries and supporting the development of a European bioeconomy","shortTitle":"Bio-based industries and supporting bio-economy","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable and competitive bio-based industries and supporting the development of a European bioeconomy","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Bio-based industries and supporting bio-economy"}
{"code":"H2020-EC","title":"Horizon 2020 Framework Programme","shortTitle":"EC Treaty","language":"en","classification":"Horizon 2020 Framework Programme","classification_short":"EC Treaty"}
{"code":"H2020-EU.3.6.2.4.","title":"Promote coherent and effective cooperation with third countries","shortTitle":"","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Innovative societies | Promote coherent and effective cooperation with third countries","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Innovative societies | Promote coherent and effective cooperation with third countries"}
{"code":"H2020-EU.3.1.7.5.","title":"Neurodegenerative diseases","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Neurodegenerative diseases","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Neurodegenerative diseases"}
{"code":"H2020-EU.2.1.6.4.","title":"Enabling European research in support of international space partnerships","shortTitle":"Research in support of international space partnerships","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Space | Enabling European research in support of international space partnerships","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Space | Research in support of international space partnerships"}
{"code":"H2020-EU.2.1.5.1.","title":"Technologies for Factories of the Future","shortTitle":"Technologies for Factories of the Future","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced manufacturing and processing | Technologies for Factories of the Future","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced manufacturing and processing | Technologies for Factories of the Future"}
{"code":"H2020-EU.2.3.2.","title":"Specific support","shortTitle":"","language":"en","classification":"Industrial leadership | Innovation In SMEs | Specific support","classification_short":"Industrial Leadership | Innovation in SMEs | Specific support"}
{"code":"H2020-EU.1.4.2.","title":"Fostering the innovation potential of research infrastructures and their human resources","shortTitle":"Research infrastructures and their human resources","language":"en","classification":"Excellent science | Research Infrastructures | Fostering the innovation potential of research infrastructures and their human resources","classification_short":"Excellent Science | Research Infrastructures | Research infrastructures and their human resources"}
{"code":"H2020-EU.3.3.1.2.","title":"Unlock the potential of efficient and renewable heating-cooling systems","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Reducing energy consumption and carbon foorpint by smart and sustainable use | Unlock the potential of efficient and renewable heating-cooling systems","classification_short":"Societal Challenges | Energy | Reducing energy consumption and carbon footprint | Unlock the potential of efficient and renewable heating-cooling systems"}
{"code":"H2020-EU.3.2.3.2.","title":"Developing competitive and environmentally-friendly European aquaculture","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Unlocking the potential of aquatic living resources | Developing competitive and environmentally-friendly European aquaculture","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Potential of aquatic living resources | Developing competitive and environmentally-friendly European aquaculture"}
{"code":"H2020-EU.3.2.1.3.","title":"Empowerment of rural areas, support to policies and rural innovation","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable agriculture and forestry | Empowerment of rural areas, support to policies and rural innovation","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Sustainable agriculture and forestry | Empowerment of rural areas, support to policies and rural innovation"}
{"code":"H2020-EU.3.2.5.3.","title":"Cross-cutting concepts and technologies enabling maritime growth","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Cross-cutting marine and maritime research | Cross-cutting concepts and technologies enabling maritime growth","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Cross-cutting marine and maritime research | Cross-cutting concepts and technologies enabling maritime growth"}
{"code":"H2020-EU.2.1.3.1.","title":"Cross-cutting and enabling materials technologies","shortTitle":"Cross-cutting and enabling materials technologies","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced materials | Cross-cutting and enabling materials technologies","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced materials | Cross-cutting and enabling materials technologies"}
{"code":"H2020-EU.3.1.1.2.","title":"Understanding disease","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Understanding health, wellbeing and disease | Understanding disease","classification_short":"Societal Challenges | Health | Understanding health, wellbeing and disease | Understanding disease"}
{"code":"H2020-Euratom-1.6.","title":"Lay the foundations for future fusion power plants by developing materials, technologies and conceptual design","shortTitle":"","language":"en","classification":"Euratom | Indirect actions | Lay the foundations for future fusion power plants by developing materials, technologies and conceptual design","classification_short":"Euratom | Indirect actions | Lay the foundations for future fusion power plants by developing materials, technologies and conceptual design"}
{"code":"H2020-EU.3.5.7.1.","title":"Reduce the use of the EU defined \"Critical raw materials\", for instance through low platinum or platinum free resources and through recycling or reducing or avoiding the use of rare earth elements","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | FCH2 (raw materials objective) | Reduce the use of the EU defined \"Critical raw materials\", for instance through low platinum or platinum free resources and through recycling or reducing or avoiding the use of rare earth elements","classification_short":"Societal Challenges | Climate and environment | FCH2 (raw materials objective) | Reduce the use of the EU defined \"Critical raw materials\", for instance through low platinum or platinum free resources and through recycling or reducing or avoiding the use of rare earth elements"}
{"code":"H2020-EU.2.2.","title":"INDUSTRIAL LEADERSHIP - Access to risk finance","shortTitle":"Access to risk finance","language":"en","classification":"Industrial leadership | Access to risk finance","classification_short":"Industrial Leadership | Access to risk finance"}
{"code":"H2020-EU.3.4.6.","title":"FCH2 (transport objectives)","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | FCH2 (transport objectives)","classification_short":"Societal Challenges | Transport | FCH2 (transport objectives)"}
{"code":"H2020-EU.4.d.","title":"A Policy Support Facility","shortTitle":"Policy Support Facility (PSF)","language":"en","classification":"SPREADING EXCELLENCE AND WIDENING PARTICIPATION | A Policy Support Facility","classification_short":"Spreading excellence and widening participation | Policy Support Facility (PSF)"}
{"code":"H2020-EU.2.1.1.7.","title":"ECSEL","shortTitle":"","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Information and Communication Technologies (ICT) | ECSEL","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Information and Communication Technologies | ECSEL"}
{"code":"H2020-EU.3.1.5.","title":"Methods and data","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Methods and data","classification_short":"Societal Challenges | Health | Methods and data"}
{"code":"H2020-EU.3.7.7.","title":"Enhance stadardisation and interoperability of systems, including for emergency purposes","shortTitle":"","language":"en","classification":"Societal challenges | Secure societies - Protecting freedom and security of Europe and its citizens | Enhance stadardisation and interoperability of systems, including for emergency purposes","classification_short":"Societal Challenges | Secure societies | Enhance stadardisation and interoperability of systems, including for emergency purposes"}
{"code":"H2020-Euratom-1.7.","title":"Promote innovation and industry competitiveness","shortTitle":"","language":"en","classification":"Euratom | Indirect actions | Promote innovation and industry competitiveness","classification_short":"Euratom | Indirect actions | Promote innovation and industry competitiveness"}
{"code":"H2020-EU.2.1.5.3.","title":"Sustainable, resource-efficient and low-carbon technologies in energy-intensive process industries","shortTitle":"Sustainable, resource-efficient and low-carbon technologies in energy-intensive process industries","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced manufacturing and processing | Sustainable, resource-efficient and low-carbon technologies in energy-intensive process industries","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced manufacturing and processing | Sustainable, resource-efficient and low-carbon technologies in energy-intensive process industries"}
{"code":"H2020-EU.2.1.4.3.","title":"Innovative and competitive platform technologies","shortTitle":"Innovative and competitive platform technologies","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Biotechnology | Innovative and competitive platform technologies","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Biotechnology | Innovative and competitive platform technologies"}
{"code":"H2020-EU.1.2.3.","title":"FET Flagships","shortTitle":"FET Flagships","language":"en","classification":"Excellent science | Future and Emerging Technologies (FET) | FET Flagships","classification_short":"Excellent Science | Future and Emerging Technologies (FET) | FET Flagships"}
{"code":"H2020-EU.3.6.3.","title":"Reflective societies - cultural heritage and European identity","shortTitle":"Reflective societies","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Reflective societies - cultural heritage and European identity","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Reflective societies"}
{"code":"H2020-EU.3.6.3.3.","title":"Research on Europe's role in the world, on the mutual influence and ties between the world regions, and a view from outside on European cultures","shortTitle":"","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Reflective societies - cultural heritage and European identity | Research on Europe's role in the world, on the mutual influence and ties between the world regions, and a view from outside on European cultures","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Reflective societies | Research on Europe's role in the world, on the mutual influence and ties between the world regions, and a view from outside on European cultures"}
{"code":"H2020-EU.3.2.4.2.","title":"Developing integrated biorefineries","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable and competitive bio-based industries and supporting the development of a European bioeconomy | Developing integrated biorefineries","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Bio-based industries and supporting bio-economy | Developing integrated biorefineries"}
{"code":"H2020-EU.2.1.6.1.1.","title":"Safeguard and further develop a competitive, sustainable and entrepreneurial space industry and research community and strengthen European non-dependence in space systems","shortTitle":"","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Space | Enabling European competitiveness, non-dependence and innovation of the European space sector | Safeguard and further develop a competitive, sustainable and entrepreneurial space industry and research community and strengthen European non-dependence in space systems","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Space | Competitiveness, non-dependence and innovation | Safeguard and further develop a competitive, sustainable and entrepreneurial space industry and research community and strengthen European non-dependence in space systems"}
{"code":"H2020-EU.3.1.3.2.","title":"Transferring knowledge to clinical practice and scalable innovation actions","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Treating and managing disease | Transferring knowledge to clinical practice and scalable innovation actions","classification_short":"Societal Challenges | Health | Treating and managing disease | Transferring knowledge to clinical practice and scalable innovation actions"}
{"code":"H2020-EU.2.","title":"Industrial leadership","shortTitle":"Industrial Leadership","language":"en","classification":"Industrial leadership","classification_short":"Industrial Leadership"}
{"code":"H2020-EU.3.4.1.3.","title":"Improving transport and mobility in urban areas","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Resource efficient transport that respects the environment | Improving transport and mobility in urban areas","classification_short":"Societal Challenges | Transport | Resource efficient transport that respects the environment | Improving transport and mobility in urban areas"}
{"code":"H2020-EU.4.e.","title":"Supporting access to international networks for excellent researchers and innovators who lack sufficient involvement in European and international networks","shortTitle":"","language":"en","classification":"SPREADING EXCELLENCE AND WIDENING PARTICIPATION | Supporting access to international networks for excellent researchers and innovators who lack sufficient involvement in European and international networks","classification_short":"Spreading excellence and widening participation | Supporting access to international networks for excellent researchers and innovators who lack sufficient involvement in European and international networks"}
{"code":"H2020-EU.3.2.1.","title":"Sustainable agriculture and forestry","shortTitle":"Sustainable agriculture and forestry","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable agriculture and forestry","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Sustainable agriculture and forestry"}
{"code":"H2020-EU.3.1.7.7.","title":"Respiratory diseases","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Respiratory diseases","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Respiratory diseases"}
{"code":"H2020-EU.3.4.8.6.","title":"Cross-cutting themes and activities (CCA)","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Shift2Rail JU | Cross-cutting themes and activities (CCA)","classification_short":"Societal Challenges | Transport | Shift2Rail JU | Cross-cutting themes and activities (CCA)"}
{"code":"H2020-EU.3.4.8.4.","title":"Innovation Programme 4: IT Solutions for attractive railway services","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Shift2Rail JU | Innovation Programme 4: IT Solutions for attractive railway services","classification_short":"Societal Challenges | Transport | Shift2Rail JU | Innovation Programme 4: IT Solutions for attractive railway services"}
{"code":"H2020-EU.3.2.2.","title":"Sustainable and competitive agri-food sector for a safe and healthy diet","shortTitle":"Sustainable and competitive agri-food sector for a safe and healthy diet","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable and competitive agri-food sector for a safe and healthy diet","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Sustainable and competitive agri-food sector for a safe and healthy diet"}
{"code":"H2020-EU.3.4.3.","title":"Global leadership for the European transport industry","shortTitle":"Global leadership for the European transport industry","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Global leadership for the European transport industry","classification_short":"Societal Challenges | Transport | Global leadership for the European transport industry"}
{"code":"H2020-EU.1.4.2.1.","title":"Exploiting the innovation potential of research infrastructures","shortTitle":"","language":"en","classification":"Excellent science | Research Infrastructures | Fostering the innovation potential of research infrastructures and their human resources | Exploiting the innovation potential of research infrastructures","classification_short":"Excellent Science | Research Infrastructures | Research infrastructures and their human resources | Exploiting the innovation potential of research infrastructures"}
{"code":"H2020-EU.3.3.2.3.","title":"Develop competitive and environmentally safe technologies for CO2 capture, transport, storage and re-use","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Low-cost, low-carbon energy supply | Develop competitive and environmentally safe technologies for CO2 capture, transport, storage and re-use","classification_short":"Societal Challenges | Energy | Low-cost, low-carbon energy supply | Develop competitive and environmentally safe technologies for CO2 capture, transport, storage and re-use"}
{"code":"H2020-EU.3.6.3.1.","title":"Study European heritage, memory, identity, integration and cultural interaction and translation, including its representations in cultural and scientific collections, archives and museums, to better inform and understand the present by richer interpretations of the past","shortTitle":"","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Reflective societies - cultural heritage and European identity | Study European heritage, memory, identity, integration and cultural interaction and translation, including its representations in cultural and scientific collections, archives and museums, to better inform and understand the present by richer interpretations of the past","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Reflective societies | Study European heritage, memory, identity, integration and cultural interaction and translation, including its representations in cultural and scientific collections, archives and museums, to better inform and understand the present by richer interpretations of the past"}
{"code":"H2020-EU.2.1.2.2.","title":"Ensuring the safe and sustainable development and application of nanotechnologies","shortTitle":"Safe and sustainable nanotechnologies","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Nanotechnologies | Ensuring the safe and sustainable development and application of nanotechnologies","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Nanotechnologies | Safe and sustainable nanotechnologies"}
{"code":"H2020-EU.3.1.6.","title":"Health care provision and integrated care","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Health care provision and integrated care","classification_short":"Societal Challenges | Health | Health care provision and integrated care"}
{"code":"H2020-EU.3.4.5.9.","title":"Technology Evaluator","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | CLEANSKY2 | Technology Evaluator","classification_short":"Societal Challenges | Transport | CLEANSKY2 | Technology Evaluator"}
{"code":"H2020-EU.3.6.","title":"SOCIETAL CHALLENGES - Europe In A Changing World - Inclusive, Innovative And Reflective Societies","shortTitle":"Inclusive, innovative and reflective societies","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies"}
{"code":"H2020-EU.3.4.8.","title":"Shift2Rail JU","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Shift2Rail JU","classification_short":"Societal Challenges | Transport | Shift2Rail JU"}
{"code":"H2020-EU.3.2.6.3.","title":"Sustainable biorefineries","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Bio-based Industries Joint Technology Initiative (BBI-JTI) | Sustainable biorefineries","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Bio-based Industries Joint Technology Initiative (BBI-JTI) | Sustainable biorefineries"}
{"code":"H2020-EU.4.a.","title":"Teaming of excellent research institutions and low performing RDI regions","shortTitle":"Teaming of research institutions and low performing regions","language":"en","classification":"SPREADING EXCELLENCE AND WIDENING PARTICIPATION | Teaming of excellent research institutions and low performing RDI regions","classification_short":"Spreading excellence and widening participation | Teaming of research institutions and low performing regions"}
{"code":"H2020-EU.3.1.7.4.","title":"Diabetes","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Diabetes","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Diabetes"}
{"code":"H2020-EU.3.7.2.","title":"Protect and improve the resilience of critical infrastructures, supply chains and tranport modes","shortTitle":"","language":"en","classification":"Societal challenges | Secure societies - Protecting freedom and security of Europe and its citizens | Protect and improve the resilience of critical infrastructures, supply chains and tranport modes","classification_short":"Societal Challenges | Secure societies | Protect and improve the resilience of critical infrastructures, supply chains and tranport modes"}
{"code":"H2020-EU.3.1.2.","title":"Preventing disease","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Preventing disease","classification_short":"Societal Challenges | Health | Preventing disease"}
{"code":"H2020-EU.3.5.3.4.","title":"Improve societal awareness and skills on raw materials","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Ensuring the sustainable supply of non-energy and non-agricultural raw materials | Improve societal awareness and skills on raw materials","classification_short":"Societal Challenges | Climate and environment | Supply of non-energy and non-agricultural raw materials | Improve societal awareness and skills on raw materials"}
{"code":"H2020-EU.3.3.7.","title":"Market uptake of energy innovation - building on Intelligent Energy Europe","shortTitle":"Market uptake of energy innovation","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Market uptake of energy innovation - building on Intelligent Energy Europe","classification_short":"Societal Challenges | Energy | Market uptake of energy innovation"}
{"code":"H2020-EU.2.3.","title":"INDUSTRIAL LEADERSHIP - Innovation In SMEs","shortTitle":"Innovation in SMEs","language":"en","classification":"Industrial leadership | Innovation In SMEs","classification_short":"Industrial Leadership | Innovation in SMEs"}
{"code":"H2020-EU.2.1.1.3.","title":"Future Internet: Software, hardware, Infrastructures, technologies and services","shortTitle":"","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Information and Communication Technologies (ICT) | Future Internet: Software, hardware, Infrastructures, technologies and services","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Information and Communication Technologies | Future Internet: Software, hardware, Infrastructures, technologies and services"}
{"code":"H2020-EU.3.1.5.3.","title":"Using in-silico medicine for improving disease management and prediction","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Methods and data | Using in-silico medicine for improving disease management and prediction","classification_short":"Societal Challenges | Health | Methods and data | Using in-silico medicine for improving disease management and prediction"}
{"code":"H2020-EU.3.6.1.1.","title":"The mechanisms to promote smart, sustainable and inclusive growth","shortTitle":"","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Inclusive societies | The mechanisms to promote smart, sustainable and inclusive growth","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Inclusive societies | The mechanisms to promote smart, sustainable and inclusive growth"}
{"code":"H2020-EU.1.3.1.","title":"Fostering new skills by means of excellent initial training of researchers","shortTitle":"MCSA Initial training","language":"en","classification":"Excellent science | Marie Skłodowska-Curie Actions | Fostering new skills by means of excellent initial training of researchers","classification_short":"Excellent Science | Marie-Sklodowska-Curie Actions | MCSA Initial training"}
{"code":"H2020-EU.3.6.2.3.","title":"Make use of the innovative, creative and productive potential of all generations","shortTitle":"","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Innovative societies | Make use of the innovative, creative and productive potential of all generations","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Innovative societies | Make use of the innovative, creative and productive potential of all generations"}
{"code":"H2020-EU.3.5.1.3.","title":"Support mitigation policies, including studies that focus on impact from other sectoral policies","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Fighting and adapting to climate change | Support mitigation policies, including studies that focus on impact from other sectoral policies","classification_short":"Societal Challenges | Climate and environment | Fighting and adapting to climate change | Support mitigation policies, including studies that focus on impact from other sectoral policies"}
{"code":"H2020-EU.3.3.1.3.","title":"Foster European Smart cities and Communities","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Reducing energy consumption and carbon foorpint by smart and sustainable use | Foster European Smart cities and Communities","classification_short":"Societal Challenges | Energy | Reducing energy consumption and carbon footprint | Foster European Smart cities and Communities"}
{"code":"H2020-EU.3.1.1.","title":"Understanding health, wellbeing and disease","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Understanding health, wellbeing and disease","classification_short":"Societal Challenges | Health | Understanding health, wellbeing and disease"}
{"code":"H2020-Euratom-1.","title":"Indirect actions","shortTitle":"","language":"en","classification":"Euratom | Indirect actions","classification_short":"Euratom | Indirect actions"}
{"code":"H2020-EU.3.5.7.","title":"FCH2 (raw materials objective)","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | FCH2 (raw materials objective)","classification_short":"Societal Challenges | Climate and environment | FCH2 (raw materials objective)"}
{"code":"H2020-EU.3.7.3.","title":"Strengthen security through border management","shortTitle":"","language":"en","classification":"Societal challenges | Secure societies - Protecting freedom and security of Europe and its citizens | Strengthen security through border management","classification_short":"Societal Challenges | Secure societies | Strengthen security through border management"}
{"code":"H2020-EU.2.1.1.2.","title":"Next generation computing: Advanced and secure computing systems and technologies, including cloud computing","shortTitle":"","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Information and Communication Technologies (ICT) | Next generation computing: Advanced and secure computing systems and technologies, including cloud computing","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Information and Communication Technologies | Next generation computing: Advanced and secure computing systems and technologies, including cloud computing"}
{"code":"H2020-EU.3.5.5.","title":"Developing comprehensive and sustained global environmental observation and information systems","shortTitle":"Environmental observation and information systems","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Developing comprehensive and sustained global environmental observation and information systems","classification_short":"Societal Challenges | Climate and environment | Environmental observation and information systems"}
{"code":"H2020-EU.3.1.7.10.","title":"Cancer","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Cancer","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Cancer"}
{"code":"H2020-EU.3.4.8.2.","title":"Innovation Programme 2: Advanced traffic management and control systems","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Shift2Rail JU | Innovation Programme 2: Advanced traffic management and control systems","classification_short":"Societal Challenges | Transport | Shift2Rail JU | Innovation Programme 2: Advanced traffic management and control systems"}
{"code":"H2020-EU.5.e.","title":"Develop the accessibility and the use of the results of publicly-funded research","shortTitle":"","language":"en","classification":"SCIENCE WITH AND FOR SOCIETY | Develop the accessibility and the use of the results of publicly-funded research","classification_short":"Science with and for Society | Develop the accessibility and the use of the results of publicly-funded research"}
{"code":"H2020-EU.3.4.4.","title":"Socio-economic and behavioural research and forward looking activities for policy making","shortTitle":"Socio-economic and behavioural research","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Socio-economic and behavioural research and forward looking activities for policy making","classification_short":"Societal Challenges | Transport | Socio-economic and behavioural research"}
{"code":"H2020-EU.3.3.2.","title":"Low-cost, low-carbon energy supply","shortTitle":"Low-cost, low-carbon energy supply","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Low-cost, low-carbon energy supply","classification_short":"Societal Challenges | Energy | Low-cost, low-carbon energy supply"}
{"code":"H2020-EU.3.4.2.2.","title":"Substantial improvements in the mobility of people and freight","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Better mobility, less congestion, more safety and security | Substantial improvements in the mobility of people and freight","classification_short":"Societal Challenges | Transport | Mobility, safety and security | Substantial improvements in the mobility of people and freight"}
{"code":"H2020-EU.3.5.6.","title":"Cultural heritage","shortTitle":"Cultural heritage","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Cultural heritage","classification_short":"Societal Challenges | Climate and environment | Cultural heritage"}
{"code":"H2020-EU.3.5.3.","title":"Ensuring the sustainable supply of non-energy and non-agricultural raw materials","shortTitle":"Supply of non-energy and non-agricultural raw materials","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Ensuring the sustainable supply of non-energy and non-agricultural raw materials","classification_short":"Societal Challenges | Climate and environment | Supply of non-energy and non-agricultural raw materials"}
{"code":"H2020-EU.2.1.5.2.","title":"Technologies enabling energy-efficient systems and energy-efficient buildings with a low environmental impact","shortTitle":"Technologies enabling energy-efficient systems and buildings","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced manufacturing and processing | Technologies enabling energy-efficient systems and energy-efficient buildings with a low environmental impact","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced manufacturing and processing | Technologies enabling energy-efficient systems and buildings"}
{"code":"H2020-EU.1.4.1.2.","title":"Integrating and opening existing national and regional research infrastructures of European interest","shortTitle":"","language":"en","classification":"Excellent science | Research Infrastructures | Developing the European research infrastructures for 2020 and beyond | Integrating and opening existing national and regional research infrastructures of European interest","classification_short":"Excellent Science | Research Infrastructures | Research infrastructures for 2020 and beyond | Integrating and opening existing national and regional research infrastructures of European interest"}
{"code":"H2020-EU.3.7.8.","title":"Support the Union's external security policies including through conflict prevention and peace-building","shortTitle":"","language":"en","classification":"Societal challenges | Secure societies - Protecting freedom and security of Europe and its citizens | Support the Union's external security policies including through conflict prevention and peace-building","classification_short":"Societal Challenges | Secure societies | Support the Union's external security policies including through conflict prevention and peace-building"}
{"code":"H2020-EU.2.1.1.1.","title":"A new generation of components and systems: Engineering of advanced embedded and energy and resource efficient components and systems","shortTitle":"","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Information and Communication Technologies (ICT) | A new generation of components and systems: Engineering of advanced embedded and energy and resource efficient components and systems","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Information and Communication Technologies | A new generation of components and systems: Engineering of advanced embedded and energy and resource efficient components and systems"}
{"code":"H2020-EU.1.1.","title":"EXCELLENT SCIENCE - European Research Council (ERC)","shortTitle":"European Research Council (ERC)","language":"en","classification":"Excellent science | European Research Council (ERC)","classification_short":"Excellent Science | European Research Council (ERC)"}
{"code":"H2020-EU.3.4.5.6.","title":"ITD Systems","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | CLEANSKY2 | ITD Systems","classification_short":"Societal Challenges | Transport | CLEANSKY2 | ITD Systems"}
{"code":"H2020-EU.6.","title":"NON-NUCLEAR DIRECT ACTIONS OF THE JOINT RESEARCH CENTRE (JRC)","shortTitle":"Joint Research Centre (JRC) non-nuclear direct actions","language":"en","classification":"NON-NUCLEAR DIRECT ACTIONS OF THE JOINT RESEARCH CENTRE (JRC)","classification_short":"Joint Research Centre (JRC) non-nuclear direct actions"}
{"code":"H2020-EU.3.2.5.1.","title":"Climate change impact on marine ecosystems and maritime economy","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Cross-cutting marine and maritime research | Climate change impact on marine ecosystems and maritime economy","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Cross-cutting marine and maritime research | Climate change impact on marine ecosystems and maritime economy"}
{"code":"H2020-Euratom-1.2.","title":"Contribute to the development of solutions for the management of ultimate nuclear waste","shortTitle":"","language":"en","classification":"Euratom | Indirect actions | Contribute to the development of solutions for the management of ultimate nuclear waste","classification_short":"Euratom | Indirect actions | Contribute to the development of solutions for the management of ultimate nuclear waste"}
{"code":"H2020-EU.3.1.7.11.","title":"Rare/Orphan Diseases","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Rare/Orphan Diseases","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Rare/Orphan Diseases"}
{"code":"H2020-EU.3.1.4.1.","title":"Active ageing, independent and assisted living","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Active ageing and self-management of health | Active ageing, independent and assisted living","classification_short":"Societal Challenges | Health | Active ageing and self-management of health | Active ageing, independent and assisted living"}
{"code":"H2020-Euratom-1.4.","title":"Foster radiation protection","shortTitle":"","language":"en","classification":"Euratom | Indirect actions | Foster radiation protection","classification_short":"Euratom | Indirect actions | Foster radiation protection"}
{"code":"H2020-EU.2.2.2.","title":"The Equity facility providing equity finance for R&I: 'Union equity instruments for research and innovation'","shortTitle":"Equity facility","language":"en","classification":"Industrial leadership | Access to risk finance | The Equity facility providing equity finance for R&I: 'Union equity instruments for research and innovation'","classification_short":"Industrial Leadership | Access to risk finance | Equity facility"}
{"code":"H2020-EU.3.3.8.","title":"FCH2 (energy objectives)","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | FCH2 (energy objectives)","classification_short":"Societal Challenges | Energy | FCH2 (energy objectives)"}
{"code":"H2020-EU.3.2.3.","title":"Unlocking the potential of aquatic living resources","shortTitle":"Potential of aquatic living resources","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Unlocking the potential of aquatic living resources","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Potential of aquatic living resources"}
{"code":"H2020-EU.3.5.2.3.","title":"Provide knowledge and tools for effective decision making and public engagement","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Protection of the environment, sustainable management of natural resources, water, biodiversity and ecosystems | Provide knowledge and tools for effective decision making and public engagement","classification_short":"Societal Challenges | Climate and environment | Protection of the environment | Provide knowledge and tools for effective decision making and public engagement"}
{"code":"H2020-EU.3.3.6.","title":"Robust decision making and public engagement","shortTitle":"Robust decision making and public engagement","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Robust decision making and public engagement","classification_short":"Societal Challenges | Energy | Robust decision making and public engagement"}
{"code":"H2020-EU.3.1.7.2.","title":"Osteoarthritis","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Osteoarthritis","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Osteoarthritis"}
{"code":"H2020-EU.2.1.1.","title":"INDUSTRIAL LEADERSHIP - Leadership in enabling and industrial technologies - Information and Communication Technologies (ICT)","shortTitle":"Information and Communication Technologies","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Information and Communication Technologies (ICT)","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Information and Communication Technologies"}
{"code":"H2020-EU.2.1.6.2.","title":"Enabling advances in space technology","shortTitle":"Enabling advances in space technology","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Space | Enabling advances in space technology","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Space | Enabling advances in space technology"}
{"code":"H2020-EU.2.1.1.4.","title":"Content technologies and information management: ICT for digital content, cultural and creative industries","shortTitle":"","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Information and Communication Technologies (ICT) | Content technologies and information management: ICT for digital content, cultural and creative industries","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Information and Communication Technologies | Content technologies and information management: ICT for digital content, cultural and creative industries"}
{"code":"H2020-EU.2.1.5.4.","title":"New sustainable business models","shortTitle":"New sustainable business models","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced manufacturing and processing | New sustainable business models","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced manufacturing and processing | New sustainable business models"}
{"code":"H2020-EU.2.1.4.","title":"INDUSTRIAL LEADERSHIP - Leadership in enabling and industrial technologies Biotechnology","shortTitle":"Biotechnology","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Biotechnology","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Biotechnology"}

View File

@ -1,17 +0,0 @@
{"id":"894593","programme":"H2020-EU.3.4.7.","topics":"SESAR-ER4-31-2019"}
{"id":"897004","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"}
{"id":"896300","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"}
{"id":"892890","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"}
{"id":"886828","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"}
{"id":"886776","programme":"H2020-EU.2.1.4.","topics":"BBI-2019-SO3-D4"}
{"id":"886776","programme":"H2020-EU.3.2.6.","topics":"BBI-2019-SO3-D4"}
{"id":"895426","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"}
{"id":"898218","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"}
{"id":"893787","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"}
{"id":"896189","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"}
{"id":"891624","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"}
{"id":"887259","programme":"H2020-EU.2.1.4.","topics":"BBI-2019-SO3-D3"}
{"id":"887259","programme":"H2020-EU.3.2.6.","topics":"BBI-2019-SO3-D3"}
{"id":"892834","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"}
{"id":"895716","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"}
{"id":"883730","programme":"H2020-EU.1.1.","topics":"ERC-2019-ADG"}

View File

@ -1,25 +0,0 @@
rcn;code;title;shortTitle;language
664331;H2020-EU.3.3.2.;Un approvisionnement en électricité à faible coût et à faibles émissions de carbone;Low-cost, low-carbon energy supply;fr
664355;H2020-EU.3.3.7.;Absorción por el mercado de la innovación energética - explotación del Programa Energía Inteligente - Europa Europe;Market uptake of energy innovation;es
664323;H2020-EU.3.3.1.;Ridurre il consumo di energia e le emissioni di carbonio grazie all'uso intelligente e sostenibile;Reducing energy consumption and carbon footprint;it
664233;H2020-EU.2.3.2.3.;Wsparcie innowacji rynkowych;Supporting market-driven innovation;pl
664199;H2020-EU.2.1.5.1.;Tecnologías para las fábricas del futuro;Technologies for Factories of the Future;es
664235;H2020-EU.3.;PRIORITÉ «Défis de société»;Societal Challenges;fr
664355;H2020-EU.3.3.7.;"Assorbimento di mercato dell'innovazione energetica - iniziative fondate sul programma ""Energia intelligente - Europa""";Market uptake of energy innovation;it
664355;H2020-EU.3.3.7.;"Markteinführung von Energieinnovationen Aufbau auf ""Intelligente Energie Europa";Market uptake of energy innovation;de
664235;H2020-EU.3.;"PRIORIDAD ""Retos de la sociedad""";Societal Challenges;es
664231;H2020-EU.2.3.2.2.;Mejorar la capacidad de innovación de las PYME;Enhancing the innovation capacity of SMEs;es
664223;H2020-EU.2.3.;LIDERAZGO INDUSTRIAL - Innovación en la pequeña y mediana empresa;Innovation in SMEs;es
664323;H2020-EU.3.3.1.;Réduire la consommation d'énergie et l'empreinte carbone en utilisant l'énergie de manière intelligente et durable;Reducing energy consumption and carbon footprint;fr
664323;H2020-EU.3.3.1.;Reducir el consumo de energía y la huella de carbono mediante un uso inteligente y sostenible;Reducing energy consumption and carbon footprint;es
664215;H2020-EU.2.1.6.4.;Beitrag der europäischen Forschung zu internationalen Weltraumpartnerschaften;Research in support of international space partnerships;de
664213;H2020-EU.2.1.6.3.;Permettere lo sfruttamento dei dati spaziali;;it
664213;H2020-EU.2.1.6.3.;Permettre l'exploitation des données spatiales;Enabling exploitation of space data;fr
664231;H2020-EU.2.3.2.2.;Zwiększenie zdolności MŚP pod względem innowacji;Enhancing the innovation capacity of SMEs;pl
664231;H2020-EU.2.3.2.2.;Rafforzare la capacità di innovazione delle PMI;Enhancing the innovation capacity of SMEs;it
664213;H2020-EU.2.1.6.3.;Grundlagen für die Nutzung von Weltraumdaten;Enabling exploitation of space data;de
664211;H2020-EU.2.1.6.2.;Favorecer los avances en las tecnologías espaciales;Enabling advances in space technology;es
664209;H2020-EU.2.1.6.1.;Assurer la compétitivité et l'indépendance de l'Europe et promouvoir l'innovation dans le secteur spatial européen;Competitiveness, non-dependence and innovation;fr
664231;H2020-EU.2.3.2.2.;Renforcement de la capacité d'innovation des PME;Enhancing the innovation capacity of SMEs;fr
664203;H2020-EU.2.1.5.3.;Tecnologías sostenibles, eficientes en su utilización de recursos y de baja emisión de carbono en las industrias de transformación de gran consumo energético;Sustainable, resource-efficient and low-carbon technologies in energy-intensive process industries;es
664103;H2020-EU.1.2.1.;FET Open;FET Open;es
1 rcn code title shortTitle language
2 664331 H2020-EU.3.3.2. Un approvisionnement en électricité à faible coût et à faibles émissions de carbone Low-cost, low-carbon energy supply fr
3 664355 H2020-EU.3.3.7. Absorción por el mercado de la innovación energética - explotación del Programa Energía Inteligente - Europa Europe Market uptake of energy innovation es
4 664323 H2020-EU.3.3.1. Ridurre il consumo di energia e le emissioni di carbonio grazie all'uso intelligente e sostenibile Reducing energy consumption and carbon footprint it
5 664233 H2020-EU.2.3.2.3. Wsparcie innowacji rynkowych Supporting market-driven innovation pl
6 664199 H2020-EU.2.1.5.1. Tecnologías para las fábricas del futuro Technologies for Factories of the Future es
7 664235 H2020-EU.3. PRIORITÉ «Défis de société» Societal Challenges fr
8 664355 H2020-EU.3.3.7. Assorbimento di mercato dell'innovazione energetica - iniziative fondate sul programma "Energia intelligente - Europa" Market uptake of energy innovation it
9 664355 H2020-EU.3.3.7. Markteinführung von Energieinnovationen – Aufbau auf "Intelligente Energie – Europa Market uptake of energy innovation de
10 664235 H2020-EU.3. PRIORIDAD "Retos de la sociedad" Societal Challenges es
11 664231 H2020-EU.2.3.2.2. Mejorar la capacidad de innovación de las PYME Enhancing the innovation capacity of SMEs es
12 664223 H2020-EU.2.3. LIDERAZGO INDUSTRIAL - Innovación en la pequeña y mediana empresa Innovation in SMEs es
13 664323 H2020-EU.3.3.1. Réduire la consommation d'énergie et l'empreinte carbone en utilisant l'énergie de manière intelligente et durable Reducing energy consumption and carbon footprint fr
14 664323 H2020-EU.3.3.1. Reducir el consumo de energía y la huella de carbono mediante un uso inteligente y sostenible Reducing energy consumption and carbon footprint es
15 664215 H2020-EU.2.1.6.4. Beitrag der europäischen Forschung zu internationalen Weltraumpartnerschaften Research in support of international space partnerships de
16 664213 H2020-EU.2.1.6.3. Permettere lo sfruttamento dei dati spaziali it
17 664213 H2020-EU.2.1.6.3. Permettre l'exploitation des données spatiales Enabling exploitation of space data fr
18 664231 H2020-EU.2.3.2.2. Zwiększenie zdolności MŚP pod względem innowacji Enhancing the innovation capacity of SMEs pl
19 664231 H2020-EU.2.3.2.2. Rafforzare la capacità di innovazione delle PMI Enhancing the innovation capacity of SMEs it
20 664213 H2020-EU.2.1.6.3. Grundlagen für die Nutzung von Weltraumdaten Enabling exploitation of space data de
21 664211 H2020-EU.2.1.6.2. Favorecer los avances en las tecnologías espaciales Enabling advances in space technology es
22 664209 H2020-EU.2.1.6.1. Assurer la compétitivité et l'indépendance de l'Europe et promouvoir l'innovation dans le secteur spatial européen Competitiveness, non-dependence and innovation fr
23 664231 H2020-EU.2.3.2.2. Renforcement de la capacité d'innovation des PME Enhancing the innovation capacity of SMEs fr
24 664203 H2020-EU.2.1.5.3. Tecnologías sostenibles, eficientes en su utilización de recursos y de baja emisión de carbono en las industrias de transformación de gran consumo energético Sustainable, resource-efficient and low-carbon technologies in energy-intensive process industries es
25 664103 H2020-EU.1.2.1. FET Open FET Open es

View File

@ -0,0 +1,399 @@
[{"acronym": "GiSTDS",
"contentUpdateDate": "2022-10-08 18:28:27",
"ecMaxContribution": 203149.44,
"ecSignatureDate": "2020-03-16",
"endDate": "2022-11-30",
"frameworkProgramme": "H2020",
"fundingScheme": "MSCA-IF-EF-SE",
"grantDoi": "10.3030/886988",
"id": 894593,
"legalBasis": "H2020-EU.1.3.",
"masterCall": "H2020-MSCA-IF-2019",
"nature": "",
"objective": "Coordination of different players in active distribution systems by increasing the penetration of distributed energy resources and rapid advances on the aggregators, microgrids and prosumers with private territory individuals establishes new challenges in control and management systems from the owners point of views. Undertaking digitalization of future distribution systems, GiSTDS introduces an edge computing framework based on GridEye, the core production of DEPsys, which provides real time visibility and monitoring. Relevant drawbacks in the distribution system management platforms in handling the scalability of players, look ahead preventive management systems regarding contingency condition and lack of physical boundaries for third party entities (aggregators) will be addressed by GiSTDS. The main novelties of this project in comparison to the GridEye are: 1) Developed P2P trading module provides automated double auction negotiation in real time fashion which enables all private entities with and without specific physical boundaries to participate in local and flexible electricity markets. 2) Modification of GridEyes modules to address the scalability and resilient operation in both the normal and contingency conditions. 3) To present a look ahead energy managements schemes for the operators, GiSTDS will be equipped to the forecasting module based on auto-regressive with exogenous variables (ARX) and machine learning techniques such as long short term memory (LSTM) and recursive neural network (RNN). Therefore, GiSTDS based on modified and developed modules explores comprehensive distributed framework for control, monitoring and operation of energy systems with multiple dispersed players in different scales. The edge computing solutions in GiSTDS eectively digitalis energy systems and creates major opportunities in terms of avoiding big data concerns and getting a bottom-up monitoring approach for the network supervision.",
"rcn": 227870,
"startDate": "2020-12-01",
"status": "TERMINATED",
"subCall": "H2020-MSCA-IF-2019",
"title": "GridEye Scalable Transactive Distribution Systems",
"topics": "MSCA-IF-2019",
"totalCost": 203149.44
},{
"acronym": "REAL",
"contentUpdateDate": "2022-04-27 21:10:20",
"ecMaxContribution": 1498830,
"ecSignatureDate": "2020-09-29",
"endDate": "2026-03-31",
"frameworkProgramme": "H2020",
"fundingScheme": "ERC-STG",
"grantDoi": "10.3030/947908",
"id": 897004,
"legalBasis": "H2020-EU.1.1.",
"masterCall": "ERC-2020-STG",
"nature": "",
"objective": "In the last decade, machine learning (ML) has become a fundamental tool with a growing impact in many disciplines, from science to industry. However, nowadays, the scenario is changing: data are exponentially growing compared to the computational resources (post Moore's law era), and ML algorithms are becoming crucial building blocks in complex systems for decision making, engineering, science. Current machine learning is not suitable for the new scenario, both from a theoretical and a practical viewpoint: (a) the lack of cost-effectiveness of the algorithms impacts directly the economic/energetic costs of large scale ML, making it barely affordable by universities or research institutes; (b) the lack of reliability of the predictions affects critically the safety of the systems where ML is employed. To deal with the challenges posed by the new scenario, REAL will lay the foundations of a solid theoretical and algorithmic framework for reliable and cost-effective large scale machine learning on modern computational architectures. In particular, REAL will extend the classical ML framework to provide algorithms with two additional guarantees: (a) the predictions will be reliable, i.e., endowed with explicit bounds on their uncertainty guaranteed by the theory; (b) the algorithms will be cost-effective, i.e., they will be naturally adaptive to the new architectures and will provably achieve the desired reliability and accuracy level, by using minimum possible computational resources. The algorithms resulting from REAL will be released as open-source libraries for distributed and multi-GPU settings, and their effectiveness will be extensively tested on key benchmarks from computer vision, natural language processing, audio processing, and bioinformatics. The methods and the techniques developed in this project will help machine learning to take the next step and become a safe, effective, and fundamental tool in science and engineering for large scale data problems.",
"rcn": 231448,
"startDate": "2021-04-01",
"status": "SIGNED",
"subCall": "ERC-2020-STG",
"title": "Reliable and cost-effective large scale machine learning",
"topics": "ERC-2020-STG",
"totalCost": 1498830
},{
"acronym": "CARL-PdM",
"contentUpdateDate": "2022-08-09 09:09:33",
"ecMaxContribution": 50000,
"ecSignatureDate": "2017-07-13",
"endDate": "2018-01-31",
"frameworkProgramme": "H2020",
"fundingScheme": "SME-1",
"grantDoi": "10.3030/781123",
"id": 896300,
"legalBasis": "H2020-EU.2.1.1.",
"masterCall": "H2020-SMEInst-2016-2017",
"nature": "",
"objective": "\"\"\"Industry 4.0 preaches a complete revolution of industrial process and promises huge efficiency gains by a complete virtualization of the factory, numerical design tools, automation of the logistics and the routing of the parts, smart machines, 3D printing, cyber-physical systems, predictive maintenance and control of the whole factory by an intelligent system. \nIn the next 10 years, industry 4.0 is expected to change the way we operate our factories and to create 1250 Billion € of additional value added in Europe.\nAlso , according to ARC Advisory Group, the predictive maintenance market is estimated to grow from 1,404.3M€ in 2016 to 4,904.0M€ by 2021.\nCARL-PdM is a innovative IIoT data powered predictive maintenance platform encompass the core of \"\"Industry 4.0\"\" with a new maintenance paradigm : maintenance is a production function whose aim should be to optimize production output and quality.\nWe will leverage the IoT revolution to achieve these goal.\nThis software solution, CARL-PdM, provides many core capabilities in industrial scenarios, including edge analytics who provide a way to pre-process the data so that only the pertinent information is sent to the predictive layer (Auto Classification and Machine learning).\nThe predictive layer will categorize data into abstract class which represent technical assets behavior. It is a reliable and reproducible approach.\nCompetitive advantages: \n- Reduce failure by 50%, maintenance cost by 30%, production stops by 70%, energetic consumption by 20%, Time To Repair by 30%\n- Increase production flexibility\n- System agnostic to machines\n- Machine-learning algorithm that compares the fault prediction and sensor data with historical data, predicting best maintenance activity regarding to production and quality objectives \n\nThe solution will be implemented at a global scale, starting in European markets: France, Italy, Belgium for early market uptake and testing; and then the biggest EU markets (Germany, UK, Poland and Spain).\n\"",
"rcn": 211479,
"startDate": "2017-08-01",
"status": "CLOSED",
"subCall": "H2020-SMEINST-1-2016-2017",
"title": "Next Generation Holistic Predictive Maintenance Software",
"topics": "SMEInst-01-2016-2017",
"totalCost": 71429
},{
"acronym": "OPTIMAL",
"contentUpdateDate": "2022-11-02 12:00:16",
"ecMaxContribution": 772800,
"ecSignatureDate": "2020-12-01",
"endDate": "2025-12-31",
"frameworkProgramme": "H2020",
"fundingScheme": "MSCA-RISE",
"grantDoi": "10.3030/101007963",
"id": 892890,
"legalBasis": "H2020-EU.1.3.",
"masterCall": "H2020-MSCA-RISE-2020",
"nature": "",
"objective": "The proposed project is to develop and maintain long term collaborations between Europe and China towards CO2 neutral Olefin production. We will realize this objective by carrying out joint research in big data and artificial intelligence (AI) for ethylene plants integrated with carbon capture and CO2 utilisation. Specifically this requires a universal set of skills such as pilot scale experimental study, process modelling and analysis, optimisation, catalysis and reaction kinetics that will be strengthened by the individual mobility of researchers between Europe and China. There are 12 partners involved in OPTIMAL with 3 industrial partners. These partners are world leading in their respective research areas. OPTIMAL is planned to start from Aug. 2021 and will continue for 48 months. There will be 28 experienced and 35 early stage researchers participating in OPTIMAL with exchange visits of 262 person months. The funding of €772,800 will be requested from European Commission to support these planned secondments. The European beneficiaries are experts at catalysis, CO2 utilisation, intensified carbon capture, reaction mechanism and kinetics & CFD studies, hybrid modelling, molecular simulation and dynamic optimisation, whilst the Chinese partners are experts at exergy analysis, process control and optimisation, solvent-based carbon capture & data-driven model development, deep reinforced learning based model free control, intelligent predictive control, physics-based reduced order model development, soft exergy sensor development and optimisation under uncertainty. Transfer of knowledge will take place through these exchange visits. We will generate at least 25 Journal publications and 25 Conference papers. 2 Special Issues will be established in leading journals such as Applied Energy. 2 Workshops and 2 Special Sessions in major international conferences will also be organised to disseminate project results.",
"rcn": 232682,
"startDate": "2021-08-01",
"status": "SIGNED",
"subCall": "H2020-MSCA-RISE-2020",
"title": "Smart and CO2 neutral Olefin Production by arTificial Intelligence and MAchine Learning",
"topics": "MSCA-RISE-2020",
"totalCost": 1205200
},{
"acronym": "e-DNA BotStop",
"contentUpdateDate": "2022-08-15 14:18:25",
"ecMaxContribution": 50000,
"ecSignatureDate": "2019-04-11",
"endDate": "2019-10-31",
"frameworkProgramme": "H2020",
"fundingScheme": "SME-1",
"grantDoi": "10.3030/854460",
"id": 886828,
"legalBasis": "H2020-EU.2.3.",
"masterCall": "H2020-EIC-SMEInst-2018-2020",
"nature": "",
"objective": "In the last decade there has been an explosion in Online Travel Agents (OTAs) worldwide. OTAs undertake the mammoth task of undercutting the flight prices of major airlines through the use of Bots (an internet Bot, also known as web robot, WWW robot or simply bot, is a software application that runs automated tasks (scripts) over the Internet.). Bots are used to scrape airlines for valuable data to benchmark aggregate flight costs, which drives down prices for the consumer.\n\nWhilst beneficial to consumers, scraping harms travel companies because:\n•\tBots can engage with a websites server hardware and cause website traffic to run slower, in some cases causing server downtime and Direct Denial of Service (DDoS)\n•\tLong term Search Engine Optimization (SEO) damage; distorting analytical marketing metrics.\n•\tDiverting customers to purchase products via third party resellers, limiting chances for up-sell and cross sell opportunities. \n\nThis problem is tackled by anti-scrape approaches. However, current anti-scrape/booking bot solutions are only capable of distinguishing between human traffic and bot traffic through supervised algorithms that do not work to the degree of efficacy required. \n\n\nOur proposed solution is BotStop an algorithmic approach to identifying Bots and scrapers and to policing malicious application traffic. eDNA will provide a solution which reintroduces transparency into the process of purchasing flights and will streamline customer website experience to ensure a more stress-free experience",
"rcn": 223866,
"startDate": "2019-05-01",
"status": "CLOSED",
"subCall": "H2020-SMEInst-2018-2020-1",
"title": "e-DNA BotStop",
"topics": "EIC-SMEInst-2018-2020",
"totalCost": 71429
},{
"acronym": "NAUTIC",
"contentUpdateDate": "2022-08-25 21:32:49",
"ecMaxContribution": 184707.84,
"ecSignatureDate": "2021-04-27",
"endDate": "2023-09-30",
"frameworkProgramme": "H2020",
"fundingScheme": "MSCA-IF-EF-ST",
"grantDoi": "10.3030/101033666",
"id": 8867767,
"legalBasis": "H2020-EU.1.3.",
"masterCall": "H2020-MSCA-IF-2020",
"nature": "",
"objective": "Bringing a new drug to the European market takes at least 10 years and 2.5 BEUR of R&D effort. Computational methods significantly shorten this journey but they require knowledge of the structure and interactions of the involved biomolecules - most often proteins. In recent years, a tremendous progress has been made in the field of a single protein 3D structure prediction. However, predicting protein assemblies -the most crucial step - still remains very challenging. The aim of this IF project is to revolutionise protein complexes prediction methods. This will be achieved first by developing novel, effective and fast approaches for the calculation of the vibrational entropy, key to protein-protein docking mechanisms. Then, in an innovative and multi-disciplinary approach, the Experienced Researcher (ER) aims to combine advanced physics-based models with machine learning methods using data from structural and sequence databases. Finally, this project will link all the pieces together and release them in the form of a web-server in order to allow the community to benefit from the results of this research.\nThe ER will carry out the fellowship in the Centre National de la Recherche Scientifique - CNRS in Grenoble, France. CNRS carries out research in all scientific fields of knowledge and the Supervisor is a renowned expert in data science, computing, and software engineering. Through a well-thought two-way knowledge transfer and training plan, this project will benefit both the host institution and the ER in terms of scientific knowledge, network and open the path for new applications to potentially exploit at the European or global level. The project will also place the ER as a highly visible researcher in the field and ideally set her as a valuable resource for European industrial actors.",
"rcn": 235804,
"startDate": "2021-07-01",
"status": "TERMINATED",
"subCall": "H2020-MSCA-IF-2020",
"title": "Novel computational avenues in protein-protein docking",
"topics": "MSCA-IF-2020",
"totalCost": 184707.84
},{
"acronym": "EnzVolNet",
"contentUpdateDate": "2022-08-15 12:50:20",
"ecMaxContribution": 158121.6,
"ecSignatureDate": "2017-02-14",
"endDate": "2019-04-30",
"frameworkProgramme": "H2020",
"fundingScheme": "MSCA-IF-EF-ST",
"grantDoi": "10.3030/753045",
"id": 101003374,
"legalBasis": "H2020-EU.1.3.",
"masterCall": "H2020-MSCA-IF-2016",
"nature": "",
"objective": "Natural enzymes have evolved to perform their functions under complex selective pressures, being capable of accelerating reactions by several orders of magnitude. In particular, heteromeric enzyme complexes catalyze an enormous array of useful reactions that are often allosterically regulated by different protein partners. Unfortunately, the underlying physical principles of this regulation are still under debate, which makes the alteration of enzyme structure towards useful isolated subunits a tremendous challenge for modern chemical biology. Exploitation of isolated enzyme subunits, however, is advantageous for biosynthetic applications as it reduces the metabolic stress on the host cell and greatly simplifies efforts to engineer specific properties of the enzyme. Current approaches to alter natural enzyme complexes are based on the evaluation of thousands of variants, which make them economically unviable and the resulting catalytic efficiencies lag far behind their natural counterparts. The revolutionary nature of EnzVolNet relies on the application of conformational network models (e.g Markov State Models) to extract the essential functional protein dynamics and key conformational states, reducing the complexity of the enzyme design paradigm and completely reformulating previous computational design approaches. Initial mutations are extracted from costly random mutagenesis experiments and chemoinformatic tools are used to identify beneficial mutations leading to more proficient enzymes. This new strategy will be applied to develop stand-alone enzymes from heteromeric protein complexes, with advantageous biosynthetic properties and improve activity and substrate scope. Experimental evaluation of our computational predictions will finally elucidate the potential of the present approach for mimicking Natures rules of evolution.",
"rcn": 208408,
"startDate": "2017-05-01",
"status": "CLOSED",
"subCall": "H2020-MSCA-IF-2016",
"title": "COMPUTATIONAL EVOLUTION OF ENZYME VARIANTS THROUGH CONFORMATIONAL NETWORKS",
"topics": "MSCA-IF-2016",
"totalCost": 158121.6
},{
"acronym": "FASTPARSE",
"contentUpdateDate": "2022-08-18 09:56:14",
"ecMaxContribution": 1481747,
"ecSignatureDate": "2016-12-08",
"endDate": "2022-07-31",
"frameworkProgramme": "H2020",
"fundingScheme": "ERC-STG",
"grantDoi": "10.3030/714150",
"id": 886776,
"legalBasis": "H2020-EU.1.1.",
"masterCall": "ERC-2016-STG",
"nature": "",
"objective": "The popularization of information technology and the Internet has resulted in an unprecedented growth in the scale at which individuals and institutions generate, communicate and access information. In this context, the effective leveraging of the vast amounts of available data to discover and address people's needs is a fundamental problem of modern societies.\n\nSince most of this circulating information is in the form of written or spoken human language, natural language processing (NLP) technologies are a key asset for this crucial goal. NLP can be used to break language barriers (machine translation), find required information (search engines, question answering), monitor public opinion (opinion mining), or digest large amounts of unstructured text into more convenient forms (information extraction, summarization), among other applications.\n\nThese and other NLP technologies rely on accurate syntactic parsing to extract or analyze the meaning of sentences. Unfortunately, current state-of-the-art parsing algorithms have high computational costs, processing less than a hundred sentences per second on standard hardware. While this is acceptable for working on small sets of documents, it is clearly prohibitive for large-scale processing, and thus constitutes a major roadblock for the widespread application of NLP.\n\nThe goal of this project is to eliminate this bottleneck by developing fast parsers that are suitable for web-scale processing. To do so, FASTPARSE will improve the speed of parsers on several fronts: by avoiding redundant calculations through the reuse of intermediate results from previous sentences; by applying a cognitively-inspired model to compress and recode linguistic information; and by exploiting regularities in human language to find patterns that the parsers can take for granted, avoiding their explicit calculation. The joint application of these techniques will result in much faster parsers that can power all kinds of web-scale NLP applications.",
"rcn": 206936,
"startDate": "2017-02-01",
"status": "SIGNED",
"subCall": "ERC-2016-STG",
"title": "Fast Natural Language Parsing for Large-Scale NLP",
"topics": "ERC-2016-STG",
"totalCost": 1481747
},{
"acronym": "StarLink",
"contentUpdateDate": "2022-08-10 09:42:53",
"ecMaxContribution": 50000,
"ecSignatureDate": "2018-05-04",
"endDate": "2018-08-31",
"frameworkProgramme": "H2020",
"fundingScheme": "SME-1",
"grantDoi": "10.3030/815698",
"id": 815698,
"legalBasis": "H2020-EU.2.3.",
"masterCall": "H2020-EIC-SMEInst-2018-2020",
"nature": "",
"objective": "Vacuum pumps are used in thousands of industrial applications, playing a vital role in food processing, semiconductors, chemicals, pharmaceuticals and many other manufacturing and assembly processes. However, todays pumps are currently unable to provide any type of insights that could help users anticipate a pump malfunction, plan maintenance procedures or setting the adjustments. Pump malfunctions or breakdowns, due to unplanned maintenance or improper settings, cost millions of euros in lost revenues every year as production and logistic lines lie idle waiting for pumps to be fixed, and when they are not optimized their productivity decrease or their energy consumption go up. \n\nBut now, DVP, a vacuum pump manufacturer, has developed the solution to these challenges through StarLink, the worlds first intelligent vacuum pump system. StarLink is a patent-pending system that uses data analytics and machine learning to identify pump malfunctions before they happen, propose actions to be taken, and automatically adjust the operation parameters if the problem relates to the setting. This will reduce pump downtime-related costs by 30%, increase their productivity by 50% and make easier the operation manager tasks. \n\nThe combination of our deep knowledge of vacuum pumps needs with the machine learning expertise of the university of Ferrara will create the most intelligent device to improve the competitiveness of European companies. Additionally, StarLink will contribute to DVPs growth in terms of employees and product portfolio since we will be able to offer a wider range of products and services related to vacuum pumps, which will allow us to enter new markets and sell more units. By 2023, it will generate €3M in yearly revenue with net profits of €2M to our company.",
"rcn": 217721,
"startDate": "2018-05-01",
"status": "CLOSED",
"subCall": "H2020-SMEInst-2018-2020-1",
"title": "StarLink: The World's First Intelligent Vacuum Pump System",
"topics": "EIC-SMEInst-2018-2020",
"totalCost": 71429
},{
"acronym": "ARMOUR",
"contentUpdateDate": "2022-08-18 16:42:12",
"ecMaxContribution": 191149.44,
"ecSignatureDate": "2020-03-16",
"endDate": "2022-10-14",
"frameworkProgramme": "H2020",
"fundingScheme": "MSCA-IF-EF-SE",
"grantDoi": "10.3030/890844",
"id": 890844,
"legalBasis": "H2020-EU.1.3.",
"masterCall": "H2020-MSCA-IF-2019",
"nature": "",
"objective": "General awareness about the smart grid technologies has improved in the last decade due to various energy liberalization actions taken by the European Union. However, the lack of well-developed technologies, has been main cause of slow acceptance of smart grids. This calls for the identification of unexplored research areas in smart grids. Positive outcomes of the research can help in laying down new and well-defined standards for the smart grids and associated intelligent technologies. A convenient and easily integrable product can also help in encouraging various distribution system operators to accept the new technologies. Massive amount of data is already being collected from the distribution networks using smart meters. Rapid advancements in machine learning research have opened up new avenues for data utilization in smart grid. \nForerunners like DEPsys (a smart grid technology company based in Switzerland), have now simplified the distribution system data for further analysis and research. A critical concern raised by DEPsys customers, is their inability to trace the source of power quality issues in the distribution network, which in-turn leads to both energy and economic losses over time. This project builds up on existing infrastructure of DEPsys and aims to be an AMROUR (by improving robustness) for distribution networks against power quality events. The main objectives are: (i) leveraging machine learning for condition monitoring and tracing power quality events, and (ii) to develop a smart grid technology which assists the distribution system operators in prevention and diagnosis of power quality events.",
"rcn": 227886,
"startDate": "2020-10-15",
"status": "SIGNED",
"subCall": "H2020-MSCA-IF-2019",
"title": "smARt Monitoring Of distribUtion netwoRks for robust power quality",
"topics": "MSCA-IF-2019",
"totalCost": 191149.44
},{
"acronym": "Target5LO",
"contentUpdateDate": "2022-08-16 11:09:20",
"ecMaxContribution": 195454.8,
"ecSignatureDate": "2018-03-19",
"endDate": "2020-02-29",
"frameworkProgramme": "H2020",
"fundingScheme": "MSCA-IF-EF-CAR",
"grantDoi": "10.3030/792495",
"id": 792495,
"legalBasis": "H2020-EU.1.3.",
"masterCall": "H2020-MSCA-IF-2017",
"nature": "",
"objective": "Drug efficacy is cornerstone for successful drug discovery programs. Considering that, on average, FDA-approved drugs modulate dozens of off-targets it remains imperative to find strategies to overcome adverse drug reactions correlated with pernicious polypharmacology. In fact, several chemical entities displaying promising anticancer are discontinued from drug development pipelines due to narrow therapeutic windows in pre-clinical models. Here, we propose the development of antibody-drug conjugates exploring the unique bioactivity profile of the naphthoquinone natural product-lapachone (Lp) against acute myeloid leukemia (AML), an unmet medical need. Using a machine learning method, we disclosed Lp as an allosteric modulator of 5-lipoxygenase (5-LO), correlated its anticancer activity with 5-LO expression in blood cancers and showed its efficacy in a disseminated mouse model of AML.\n\nIn this project, a comprehensive investigation of novel means for the targeted delivery of Lp to leukaemia cells is sought after, considering both the promising bioactivity profile but also the significant toxicity in untargeted dosage forms. We apply state-of-the-art synthetic medicinal chemistry to design and access cleavable linkers, and site-specifically conjugate Lp to an anti-IL7R antibody, a validated biomarker in AML and other leukaemias. We aim at employing biophysical and chemical biology approaches to validate quantitative and fast release of Lp with accurate spatiotemporal control in in vitro disease models. Finally, we will validate the deployment of the constructs through preclinical in vivo models of AML. We foresee broad applicability of the developed technology, which may have profound implications in drug discovery. Upon successful completion of this research program, we hope to yield a new targeted drug to treat AML patients with improved efficacy and reduced side-effects.",
"rcn": 215065,
"startDate": "2018-03-01",
"status": "CLOSED",
"subCall": "H2020-MSCA-IF-2017",
"title": "Targeting 5-lipoxygenase in the context of Acute Myeloid Leukemia",
"topics": "MSCA-IF-2017",
"totalCost": 195454.8
},{
"acronym": "Smart Library",
"contentUpdateDate": "2022-08-11 19:59:53",
"ecMaxContribution": 1200000,
"ecSignatureDate": "2017-02-26",
"endDate": "2018-12-31",
"frameworkProgramme": "H2020",
"fundingScheme": "SME-2",
"grantDoi": "10.3030/756826",
"id": 756826,
"legalBasis": "H2020-EU.3.6.",
"masterCall": "H2020-SMEInst-2016-2017",
"nature": "",
"objective": "Children today are natives of technology, having frequent access to digital devices both at home and at school. Digital devices are today even more used than TV. Worryingly, the offering of high quality educational apps is very limited and expensive. Parents and educators are concerned about this and are actively searching for better alternatives.\n\nTo help resolve these issues, Smile and Learn places technology at the service of education with the mission of helping children 2 to 12 years old learn while having fun using digital devices. Like the north American educational philosopher John Dewey, we believe that “if we teach todays students as we taught yesterdays, we rob them of tomorrow.” Our vision is to become the global leader in Edutainment (Entertainment plus Education). To do so we have developed the Smart Digital Library, a single platform of interactive games and stories that, as of today, provides access to up to 30 individual proprietary apps (100 apps by end 2018). The “Library” can be used at home, on the go or at school and provides “smart” recommendations to children, their parents and educators.\n\nIn August 2016, Smile and Learn successfully completed phase I of SME Instrument, finalizing our first release of the Smart Library rolled out in real production environments both at pilot schools (today more than 100 schools use the Library, including 10 special education schools) and with families (+7,000 active users) in different markets, including the US, Spain, the UK, France, Mexico and Colombia, with very positive feedback. We already have more than 30,000 users worldwide with no marketing expenditure.\n\nWe are now moving forward to make the Smart Library a global state-of-the-art product in the edutainment industry by scaling it up and rolling out a powerful dissemination plan, that we expect to conduct with the support of Phase 2 H2020",
"rcn": 208757,
"startDate": "2017-03-01",
"status": "CLOSED",
"subCall": "H2020-SMEINST-2-2016-2017",
"title": "Smart Library of Edutainment: technology and gamification at the service of Education",
"topics": "SMEInst-12-2016-2017",
"totalCost": 1827500
},{
"acronym": "PALGLAC",
"contentUpdateDate": "2022-08-25 10:28:12",
"ecMaxContribution": 2425298.75,
"ecSignatureDate": "2018-05-14",
"endDate": "2024-09-30",
"frameworkProgramme": "H2020",
"fundingScheme": "ERC-ADG",
"grantDoi": "10.3030/787263",
"id": 787263,
"legalBasis": "H2020-EU.1.1.",
"masterCall": "ERC-2017-ADG",
"nature": "",
"objective": "Ice sheets regulate Earths climate by reflecting sunlight away, enabling suitable temperatures for human habitation. Warming is reducing these ice masses and raising sea level. Glaciologists predict ice loss using computational ice sheet models which interact with climate and oceans, but with caveats that highlight processes are inadequately encapsulated. Weather forecasting made a leap in skill by comparing modelled forecasts with actual outcomes to improve physical realism of their models. This project sets out an ambitious programme to adopt this data-modelling approach in ice sheet modelling. Given their longer timescales (100-1000s years) we will use geological and geomorphological records of former ice sheets to provide the evidence; the rapidly growing field of palaeoglaciology.\n\nFocussing on the most numerous and spatially-extensive records of palaeo ice sheet activity - glacial landforms - the project aims to revolutionise understanding of past, present and future ice sheets. Our mapping campaign (Work-Package 1), including by machine learning techniques (WP2), should vastly increase the evidence-base. Resolution of how subglacial landforms are generated and how hydrological networks develop (WP3) would be major breakthroughs leading to possible inversions to information on ice thickness or velocity, and with key implications for ice flow models and hydrological effects on ice dynamics. By pioneering techniques and coding for combining ice sheet models with landform data (WP4) we will improve knowledge of the role of palaeo-ice sheets in Earth system change. Trialling of numerical models in these data-rich environments will highlight deficiencies in process-formulations, leading to better models. Applying our coding to combine landforms and geochronology to optimise modelling (WP4) of the retreat of the Greenland and Antarctic ice sheets since the last glacial will provide spin up glaciological conditions for models that forecast sea level rise.",
"rcn": 216167,
"startDate": "2018-10-01",
"status": "SIGNED",
"subCall": "ERC-2017-ADG",
"title": "Palaeoglaciological advances to understand Earths ice sheets by landform analysis",
"topics": "ERC-2017-ADG",
"totalCost": 2425298.75
},{
"acronym": "Konetik eLCV",
"contentUpdateDate": "2022-08-10 09:21:56",
"ecMaxContribution": 50000,
"ecSignatureDate": "2018-11-29",
"endDate": "2019-01-31",
"frameworkProgramme": "H2020",
"fundingScheme": "SME-1",
"grantDoi": "10.3030/837614",
"id": 837614,
"legalBasis": "H2020-EU.2.3.",
"masterCall": "H2020-EIC-SMEInst-2018-2020",
"nature": "",
"objective": "Light Commercial vehicle fleets are important for the EV adoption A LCV is a business tool, so the utilisation rate and ensuring business continuity are key. Integrating and managing electric LCV is challenging due to the limited driving range and charging infrastructure.\n\nIn this project, our aim is to make a feasibility study of developing the first AI based charging assistant for Light Commercial Vehicle fleets. As part of the project aim is to research into the technical feasibility of analyzing vehicle charging data from the electric LCVs and combine that with consumption data from public, home and office chargers to ensure business continuity of eLCV fleets and save money on charging and reducing idle time.\n\nAccording to the IEA, EV/HEVs stock is projected to reach 200 Million units by 2030. The total EV/HEV market is expected to grow up 233EUR bn by 2021 growing at a 40.65%\n\nThe project will allow us to facilitate the market spread of eLCVs with the first machine learning based smart charging assistant tool based on our unique algorithm that combines advanced energy management and telematics. This will imply to disrupt into the European and international market by saving significant money on eLCV charging and reducing downtimes for our client while generating 5,1 M€ profit until 2022 and a generation of 42 new direct jobs on the company level for Konetik.\n\nKonetik is a telematics company focusing on products helping the widespread of electric vehicles. Konetik serves 300+ companies 3 energy utilities already engaged (NKM, ENGIE, EnBW) regarding a pilot program. Selected as one of the top 100 Berlin based startups",
"rcn": 219747,
"startDate": "2018-11-01",
"status": "CLOSED",
"subCall": "H2020-SMEInst-2018-2020-1",
"title": "Artificial Intelligence based Smart Charging Assistant for Electric Light Commercial Vehicle Fleets",
"topics": "EIC-SMEInst-2018-2020",
"totalCost": 71429
},{
"acronym": "INSENSION",
"contentUpdateDate": "2022-09-04 01:10:17",
"ecMaxContribution": 2255875,
"ecSignatureDate": "2017-11-07",
"endDate": "2021-10-31",
"frameworkProgramme": "H2020",
"fundingScheme": "RIA",
"grantDoi": "10.3030/780819",
"id": 780819,
"legalBasis": "H2020-EU.2.1.1.",
"masterCall": "H2020-ICT-2016-2017",
"nature": "",
"objective": "The INSENSION project will create an ICT platform that enables persons with profound and multiple learning disabilities (PMLD) to use digital applications and services that can enhance the quality of their lives, increase their ability to self-determination and enrich their lives. The target end users of the proposed solution are capable of using only nonconventional, nonsymbolic means of interaction with their environment. Therefore, the platform aims to provide technological means for seamless, and adaptable recognition of a range of highly individual nonsymbolic behavioral signals of people with PMLD to detect behavioral patterns happening in the context of specific situations. These patterns are translated into the affective intents of the end user (their approval or disapproval to the given situation) and allow to communicate them to assistive services. This way an individual with PMLD gains a possibility to seamlessly influence their living environment, through new means of communication with other people, changing conditions of their environment or use new types of assistive digital applications. The project employs recent advances in a range of ICT disciplines equipping the proposed assistive ICT platform with natural behavior recognition mechanisms based on gesture, facial expression and vocalization recognition technologies. This is complemented by novel techniques of artificial intelligence and state-of-the-art Internet of Things models. The research and development of the project is conducted within the inclusive design paradigm, with individual with PMLD and their caregivers directly participating in the R+D process throughout the whole duration of the project. This process links a highly interdisciplinary team of experts of ICT specialists and researchers and practitioners of disability studies and care, with due participation of an assistive technology industry representatives.",
"rcn": 213171,
"startDate": "2018-01-01",
"status": "SIGNED",
"subCall": "H2020-ICT-2017-1",
"title": "Personalized intelligent platform enabling interaction with digital services to individuals with profound and multiple learning disabilities",
"topics": "ICT-23-2017",
"totalCost": 2255875
},{
"acronym": "MANET",
"contentUpdateDate": "2022-06-13 17:36:10",
"ecMaxContribution": 171473.28,
"ecSignatureDate": "2021-04-30",
"endDate": "2024-06-30",
"frameworkProgramme": "H2020",
"fundingScheme": "MSCA-IF-EF-ST",
"grantDoi": "10.3030/101033173",
"id": 101033173,
"legalBasis": "H2020-EU.1.3.",
"masterCall": "H2020-MSCA-IF-2020",
"nature": "",
"objective": "Curbing greenhouse gas emissions is a challenge of the utmost importance for our society future and requires urgent decisions on the implementation of clear-cut climate economic policies. Integrated Assessment Models (IAMs) allow to explore alternative energy scenarios in the next 30-70 years. They are key to support the design of climate policies as they highlight the nexus between climate modelling, social science, and energy systems. However, the use of IAMs to inform climate policies does not come free of controversial aspects. Primarily, the inherent uncertainty of IAMs long-term outputs has created several difficulties for the integration of the modelling insights in the policy design. Modelling outputs diverge across IAMs models quite dramatically when they are asked for example to quantify the uptake of key technologies for the decarbonisation, such as renewables and carbon capture and storage. Uncertainty in IAMs descends from lack of knowledge of the future and from IAMs incomplete representations of the future. Uncertainty cannot be removed, but reduced, understood, and conveyed appropriately to policy makers to avoid that different projections cause delayed actions. \nThis project aims to fill this gap providing a methodology which defines the sources of uncertainty, either due to IAMs inputs or IAMs structure, and quantify their relative importance. The methodology will be embodied in an emulator of IAMs, MANET (the eMulAtor of iNtegratAd assEssmenT models) formulated using machine learning techniques to reproduce IAMs outputs. The project will provide a proof of concept of MANET focusing on the uptake of key decarbonisation technologies. The emulator will provide a simplified version of the IAM outputs as a response surface of the model to any variation of the inputs. MANET will be a flexible tool for policy makers and scientists for a direct comparison of IAMs with no limitation of the solution domain.",
"rcn": 235834,
"startDate": "2022-07-01",
"status": "SIGNED",
"subCall": "H2020-MSCA-IF-2020",
"title": "Climate economic policies: assessing values and costs of uncertainty in energy scenarios",
"topics": "MSCA-IF-2020",
"totalCost": 171473.28
},{
"acronym": "PRINTOUT",
"contentUpdateDate": "2022-11-12 14:18:08",
"ecMaxContribution": 183473.28,
"ecSignatureDate": "2020-04-21",
"endDate": "2022-06-14",
"frameworkProgramme": "H2020",
"fundingScheme": "MSCA-IF-EF-ST",
"grantDoi": "10.3030/892757",
"id": 892757,
"legalBasis": "H2020-EU.1.3.",
"masterCall": "H2020-MSCA-IF-2019",
"nature": "",
"objective": "With the extensive range of document generation devices nowadays, the establishment of computational techniques to find manipulation, detect illegal copies and link documents to their source are useful because (i) finding manipulation can help to detect fake news and manipulated documents; (ii) exposing illegal copies can avoid frauds and copyright violation; and (iii) indicating the owner of an illegal document can provide strong arguments to the prosecution of a suspect. Different machine learning techniques have been proposed in the scientific literature to act in these problems, but many of them are limited as: (i) there is a lack of methodology, which may require different experts to solve different problems; (ii) the limited range of known elements being considered for multi-class classification problems such as source attribution, which do not consider unknown classes in a real-world testing; and (iii) they dont consider adversarial attacks from an experienced forger. In this research project, we propose to address these problems on two fronts: resilient characterization and classification. In the characterization front, we intend to use multi-analysis approaches. Proposed by the candidate in his Ph.D. research, it is a methodology to fuse/ensemble machine learning approaches by considering several investigative scenarios, creating robust classifiers that minimize the risk of attacks. Additionally, we aim at proposing the use of open-set classifiers, which are trained to avoid misclassification of classes not included in the classifier training. We envision solutions to several printed document forensics applications with this setup: source attribution, forgery of documents and illegal copies detection. All the approaches we aim at creating in this project will be done in partnership with a document authentication company, which will provide real-world datasets and new applications.",
"rcn": 229161,
"startDate": "2020-06-15",
"status": "CLOSED",
"subCall": "H2020-MSCA-IF-2019",
"title": "Printed Documents Authentication",
"topics": "MSCA-IF-2019",
"totalCost": 183473.28
},{
"acronym": "SKIDLESS",
"contentUpdateDate": "2022-08-16 00:57:32",
"ecMaxContribution": 50000,
"ecSignatureDate": "2019-01-21",
"endDate": "2019-07-31",
"frameworkProgramme": "H2020",
"fundingScheme": "SME-1",
"grantDoi": "10.3030/855496",
"id": 855496,
"legalBasis": "H2020-EU.2.3.",
"masterCall": "H2020-EIC-SMEInst-2018-2020",
"nature": "",
"objective": "When we drive, our safety is protected by a set of technologies that silently watch over the cars behaviour, intervening to\nminimise the risk of accidents. The Electronic Stability Control (ESC) is by far the most impactful safety technology in cars,\nhaving reduced by around 40% the number of fatal accidents caused by the vehicles loss of control. Although effective, any\nESC on the market suffer from one significant flaw: it cannot directly measure the sideslip angle, which is the key indicator of\nskidding, namely the situation when the car deviates from the drivers intended direction. The result is that present ESC can\ndetect up to 80% of skidding events, thus still leaving room for improvements that can save lives. To address this issue and\ncatch a huge market opportunity, Modelway has developed a machine learning technology able to accurately estimate the\nvehicles sideslip angle in real time. And without adding any new sensor to the car. The key to obtain this result is the\nproprietary and patented Direct Virtual Sensor technology, which can be embedded in standard ESC units to further improve\nthe vehicles capacity to detect a skidding event. The DVS technology has been prototyped and extensive tests have been\ncarried out with car manufacturers and their Tier-1 suppliers, showing that the performances are already in line with the\nexpectations of a highly regulated industry as automotive. Now the development roadmap focuses on understanding the\nfeasibility of the integration of the DVS technology in commercial ESC units (Phase 1), to enable a co-development effort\nwith global ESC manufacturers (e.g. Bosch, Magneti Marelli) leading to a pre-commercial validation test-bed (Phase 2). In\nterms of business potential, with around 100 million cars sold each year globally and around 50 in Europe and the US where\nthe use of ESC is mandatory since 2014, we target more than 4 million DSV installed in cars by 2025, leading to more than\n28 M€ of revenues.",
"rcn": 220470,
"startDate": "2019-02-01",
"status": "CLOSED",
"subCall": "H2020-SMEInst-2018-2020-1",
"title": "Enhancing car safety through accurate and real time side-slip angle assessment",
"topics": "EIC-SMEInst-2018-2020",
"totalCost": 71429
},{
"acronym": "Z-Fact0r",
"contentUpdateDate": "2022-08-18 09:44:24",
"ecMaxContribution": 4206252.88,
"ecSignatureDate": "2016-08-09",
"endDate": "2020-03-31",
"frameworkProgramme": "H2020",
"fundingScheme": "IA",
"grantDoi": "10.3030/723906",
"id": 723906,
"legalBasis": "H2020-EU.2.1.5.",
"masterCall": "H2020-IND-CE-2016-17",
"nature": "",
"objective": "Manufacturing represents approximately 21 % of the EUs GDP and 20 % of its employment, providing more than 30 million jobs in 230 000 enterprises, mostly SMEs. Moreover, each job in industry is considered to be linked to two more in related services. European manufacturing is also a dominant element in international trade, leading the world in areas such as automotive, machinery and agricultural engineering. Already threatened by both the lower-wage economies and other high-tech rivals, the situation of EU companies was even made more difficult by the downturn.\nThe Z-Fact0r consortium has conducted an extensive state-of-the-art research (see section 1.4) and realised that although a number of activities (see section 1.3) have been trying to address the need for zero-defect manufacturing, still there is a vast business opportunity for innovative, high-ROI (Return on Investment) solutions to ensure, better quality and higher productivity in the European manufacturing industries.\nThe Z-Fact0r solution comprises the introduction of five (5) multi-stage production-based strategies targeting (i) the early detection of the defect (Z-DETECT), (ii) the prediction of the defect generation (Z-PREDICT), (iii) the prevention of defect generation by recalibrating the production line (multi-stage), as well as defect propagation in later stages of the production (Z-PREVENT), (iv) the reworking/remanufacturing of the product, if this is possible, using additive and subtractive manufacturing techniques (Z-REPAIR) and (v) the management of the aforementioned strategies through event modelling, KPI (key performance indicators) monitoring and real-time decision support (Z-MANAGE).\nTo do that we have brought together a total of thirteen (13) EU-based partners, representing both industry and academia, having ample experience in cutting-edge technologies and active presence in the EU manufacturing.",
"rcn": 205465,
"startDate": "2016-10-01",
"status": "CLOSED",
"subCall": "H2020-FOF-2016",
"title": "Zero-defect manufacturing strategies towards on-line production management for European factories",
"topics": "FOF-03-2016",
"totalCost": 6063018.75
}]

View File

@ -1,16 +0,0 @@
{"id":"894593","programme":"H2020-EU.3.4.7.","topics":"SESAR-ER4-31-2019"}
{"id":"897004","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"}
{"id":"896300","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"}
{"id":"892890","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"}
{"id":"886828","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"}
{"id":"886776","programme":"H2020-EU.2.1.4.;H2020-EU.3.2.6.","topics":"BBI-2019-SO3-D4"}
{"id":"895426","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"}
{"id":"898218","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"}
{"id":"893787","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"}
{"id":"896189","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"}
{"id":"891624","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"}
{"id":"887259","programme":"H2020-EU.2.1.4.;H2020-EU.3.2.6.","topics":"BBI-2019-SO3-D3"}
{"id":"892834","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"}
{"id":"895716","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"}
{"id":"954782","programme":"H2020-EU.3.;H2020-EU.2.3.;H2020-EU.2.1.","topics":"EIC-SMEInst-2018-2020"}
{"id":"101003374","programme":"H2020-EU.4.","topics":"WF-02-2019"}

View File

@ -1,12 +0,0 @@
{"result_id":"dedup_wf_001::53575dc69e9ace947e02d47ecd54a7a6","downloads":0,"views":4}
{"result_id":"dedup_wf_001::53575dc69e9ace947e02d47ecd54a7a6","downloads":0,"views":1}
{"result_id":"doi_________::17eda2ff77407538fbe5d3d719b9d1c0","downloads":0,"views":1}
{"result_id":"doi_________::1d4dc08605fd0a2be1105d30c63bfea1","downloads":1,"views":3}
{"result_id":"doi_________::2e3527822854ca9816f6dfea5bff61a8","downloads":1,"views":1}
{"result_id":"doi_________::3085e4c6e051378ca6157fe7f0430c1f","downloads":2,"views":3}
{"result_id":"doi_________::3085e4c6e051378ca6157fe7f0430c1f","downloads":0,"views":3}
{"result_id":"doi_________::33f710e6dd30cc5e67e35b371ddc33cf","downloads":0,"views":1}
{"result_id":"doi_________::39738ebf10654732dd3a7af9f24655f8","downloads":1,"views":3}
{"result_id":"doi_________::3c3b65f07c1a06c7894397eda1d11bbf","downloads":1,"views":8}
{"result_id":"doi_________::3c3b65f07c1a06c7894397eda1d11bbf","downloads":0,"views":2}
{"result_id":"doi_________::4938a71a884dd481d329657aa543b850","downloads":0,"views":3}

View File

@ -54,6 +54,9 @@ class DataciteToOAFTest extends AbstractVocabularyTest {
val path = getClass.getResource("/eu/dnetlib/dhp/actionmanager/datacite/dataset").getPath val path = getClass.getResource("/eu/dnetlib/dhp/actionmanager/datacite/dataset").getPath
val conf = new SparkConf() val conf = new SparkConf()
conf.set("spark.driver.host", "localhost")
conf.set("spark.ui.enabled", "false")
val spark: SparkSession = SparkSession val spark: SparkSession = SparkSession
.builder() .builder()
.config(conf) .config(conf)

View File

@ -29,20 +29,7 @@ import eu.dnetlib.broker.objects.OaBrokerRelatedPublication;
import eu.dnetlib.broker.objects.OaBrokerRelatedSoftware; import eu.dnetlib.broker.objects.OaBrokerRelatedSoftware;
import eu.dnetlib.broker.objects.OaBrokerTypedValue; import eu.dnetlib.broker.objects.OaBrokerTypedValue;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.Author; import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.Dataset;
import eu.dnetlib.dhp.schema.oaf.Datasource;
import eu.dnetlib.dhp.schema.oaf.ExternalReference;
import eu.dnetlib.dhp.schema.oaf.Field;
import eu.dnetlib.dhp.schema.oaf.Instance;
import eu.dnetlib.dhp.schema.oaf.Journal;
import eu.dnetlib.dhp.schema.oaf.KeyValue;
import eu.dnetlib.dhp.schema.oaf.Project;
import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.schema.oaf.Software;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
public class ConversionUtils { public class ConversionUtils {
@ -74,6 +61,10 @@ public class ConversionUtils {
return sp != null ? new OaBrokerTypedValue(classId(sp.getQualifier()), sp.getValue()) : null; return sp != null ? new OaBrokerTypedValue(classId(sp.getQualifier()), sp.getValue()) : null;
} }
public static OaBrokerTypedValue oafSubjectToBrokerTypedValue(final Subject sp) {
return sp != null ? new OaBrokerTypedValue(classId(sp.getQualifier()), sp.getValue()) : null;
}
public static OaBrokerRelatedDataset oafDatasetToBrokerDataset(final Dataset d) { public static OaBrokerRelatedDataset oafDatasetToBrokerDataset(final Dataset d) {
if (d == null) { if (d == null) {
return null; return null;
@ -118,7 +109,7 @@ public class ConversionUtils {
res.setTitles(structPropList(result.getTitle())); res.setTitles(structPropList(result.getTitle()));
res.setAbstracts(fieldList(result.getDescription())); res.setAbstracts(fieldList(result.getDescription()));
res.setLanguage(classId(result.getLanguage())); res.setLanguage(classId(result.getLanguage()));
res.setSubjects(structPropTypedList(result.getSubject())); res.setSubjects(subjectList(result.getSubject()));
res.setCreators(mappedList(result.getAuthor(), ConversionUtils::oafAuthorToBrokerAuthor)); res.setCreators(mappedList(result.getAuthor(), ConversionUtils::oafAuthorToBrokerAuthor));
res.setPublicationdate(fieldValue(result.getDateofacceptance())); res.setPublicationdate(fieldValue(result.getDateofacceptance()));
res.setPublisher(fieldValue(result.getPublisher())); res.setPublisher(fieldValue(result.getPublisher()));
@ -315,6 +306,18 @@ public class ConversionUtils {
: new ArrayList<>(); : new ArrayList<>();
} }
private static List<OaBrokerTypedValue> subjectList(final List<Subject> list) {
if (list == null) {
return new ArrayList<>();
}
return list
.stream()
.map(ConversionUtils::oafSubjectToBrokerTypedValue)
.filter(Objects::nonNull)
.collect(Collectors.toList());
}
private static List<OaBrokerTypedValue> structPropTypedList(final List<StructuredProperty> list) { private static List<OaBrokerTypedValue> structPropTypedList(final List<StructuredProperty> list) {
if (list == null) { if (list == null) {
return new ArrayList<>(); return new ArrayList<>();

View File

@ -143,7 +143,7 @@ public class SparkOpenorgsDedupTest implements Serializable {
.load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "organization")) .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "organization"))
.count(); .count();
assertEquals(288, orgs_simrel); assertEquals(290, orgs_simrel);
} }
@Test @Test
@ -172,7 +172,7 @@ public class SparkOpenorgsDedupTest implements Serializable {
.load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "organization")) .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "organization"))
.count(); .count();
assertEquals(324, orgs_simrel); assertEquals(326, orgs_simrel);
} }
@Test @Test

View File

@ -168,11 +168,11 @@ public class SparkStatsTest implements Serializable {
.textFile(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_blockstats") .textFile(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_blockstats")
.count(); .count();
assertEquals(477, orgs_blocks); assertEquals(480, orgs_blocks);
assertEquals(295, pubs_blocks); assertEquals(295, pubs_blocks);
assertEquals(122, sw_blocks); assertEquals(122, sw_blocks);
assertEquals(191, ds_blocks); assertEquals(191, ds_blocks);
assertEquals(171, orp_blocks); assertEquals(178, orp_blocks);
} }
@AfterAll @AfterAll

View File

@ -4,12 +4,12 @@ package eu.dnetlib.dhp.bulktag;
import static eu.dnetlib.dhp.PropagationConstant.removeOutputDir; import static eu.dnetlib.dhp.PropagationConstant.removeOutputDir;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.util.ArrayList; import java.util.*;
import java.util.Objects;
import java.util.Optional;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.ForeachFunction;
import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Encoders;
@ -23,10 +23,17 @@ import com.google.gson.Gson;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.bulktag.community.*; import eu.dnetlib.dhp.bulktag.community.*;
import eu.dnetlib.dhp.schema.oaf.Datasource;
import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Result;
public class SparkBulkTagJob { public class SparkBulkTagJob {
private static String OPENAIRE_3 = "openaire3.0";
private static String OPENAIRE_4 = "openaire-pub_4.0";
private static String OPENAIRE_CRIS = "openaire-cris_1.1";
private static String OPENAIRE_DATA = "openaire2.0_data";
private static String EOSC = "10|openaire____::2e06c1122c7df43765fdcf91080824fa";
private static final Logger log = LoggerFactory.getLogger(SparkBulkTagJob.class); private static final Logger log = LoggerFactory.getLogger(SparkBulkTagJob.class);
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
@ -88,10 +95,47 @@ public class SparkBulkTagJob {
isSparkSessionManaged, isSparkSessionManaged,
spark -> { spark -> {
removeOutputDir(spark, outputPath); removeOutputDir(spark, outputPath);
extendCommunityConfigurationForEOSC(spark, inputPath, cc);
execBulkTag(spark, inputPath, outputPath, protoMappingParams, resultClazz, cc); execBulkTag(spark, inputPath, outputPath, protoMappingParams, resultClazz, cc);
}); });
} }
private static void extendCommunityConfigurationForEOSC(SparkSession spark, String inputPath,
CommunityConfiguration cc) {
Dataset<String> datasources = readPath(
spark, inputPath
.substring(
0,
inputPath.lastIndexOf("/"))
+ "/datasource",
Datasource.class)
.filter((FilterFunction<Datasource>) ds -> isOKDatasource(ds))
.map((MapFunction<Datasource, String>) ds -> ds.getId(), Encoders.STRING());
Map<String, List<Pair<String, SelectionConstraints>>> dsm = cc.getEoscDatasourceMap();
for (String ds : datasources.collectAsList()) {
final String dsId = ds.substring(3);
if (!dsm.containsKey(dsId)) {
ArrayList<Pair<String, SelectionConstraints>> eoscList = new ArrayList<>();
dsm.put(dsId, eoscList);
}
}
}
private static boolean isOKDatasource(Datasource ds) {
final String compatibility = ds.getOpenairecompatibility().getClassid();
boolean isOk = (compatibility.equalsIgnoreCase(OPENAIRE_3) ||
compatibility.equalsIgnoreCase(OPENAIRE_4) ||
compatibility.equalsIgnoreCase(OPENAIRE_CRIS) ||
compatibility.equalsIgnoreCase(OPENAIRE_DATA)) &&
ds.getCollectedfrom().stream().anyMatch(cf -> cf.getKey().equals(EOSC));
return isOk;
}
private static <R extends Result> void execBulkTag( private static <R extends Result> void execBulkTag(
SparkSession spark, SparkSession spark,
String inputPath, String inputPath,

View File

@ -15,6 +15,7 @@ public class Community implements Serializable {
private List<Provider> providers = new ArrayList<>(); private List<Provider> providers = new ArrayList<>();
private List<ZenodoCommunity> zenodoCommunities = new ArrayList<>(); private List<ZenodoCommunity> zenodoCommunities = new ArrayList<>();
private SelectionConstraints constraints = new SelectionConstraints(); private SelectionConstraints constraints = new SelectionConstraints();
private SelectionConstraints removeConstraints = new SelectionConstraints();
public String toJson() { public String toJson() {
final Gson g = new Gson(); final Gson g = new Gson();
@ -67,4 +68,12 @@ public class Community implements Serializable {
public void setConstraints(SelectionConstraints constraints) { public void setConstraints(SelectionConstraints constraints) {
this.constraints = constraints; this.constraints = constraints;
} }
public SelectionConstraints getRemoveConstraints() {
return removeConstraints;
}
public void setRemoveConstraints(SelectionConstraints removeConstraints) {
this.removeConstraints = removeConstraints;
}
} }

View File

@ -26,6 +26,18 @@ public class CommunityConfiguration implements Serializable {
private Map<String, List<Pair<String, SelectionConstraints>>> zenodocommunityMap = new HashMap<>(); private Map<String, List<Pair<String, SelectionConstraints>>> zenodocommunityMap = new HashMap<>();
// map communityid -> selectionconstraints // map communityid -> selectionconstraints
private Map<String, SelectionConstraints> selectionConstraintsMap = new HashMap<>(); private Map<String, SelectionConstraints> selectionConstraintsMap = new HashMap<>();
// map eosc datasource -> communityid
private Map<String, List<Pair<String, SelectionConstraints>>> eoscDatasourceMap = new HashMap<>();
// map communityid -> remove constraints
private Map<String, SelectionConstraints> removeConstraintsMap = new HashMap<>();
public Map<String, List<Pair<String, SelectionConstraints>>> getEoscDatasourceMap() {
return eoscDatasourceMap;
}
public void setEoscDatasourceMap(Map<String, List<Pair<String, SelectionConstraints>>> eoscDatasourceMap) {
this.eoscDatasourceMap = eoscDatasourceMap;
}
public Map<String, List<Pair<String, SelectionConstraints>>> getSubjectMap() { public Map<String, List<Pair<String, SelectionConstraints>>> getSubjectMap() {
return subjectMap; return subjectMap;
@ -61,6 +73,14 @@ public class CommunityConfiguration implements Serializable {
this.selectionConstraintsMap = selectionConstraintsMap; this.selectionConstraintsMap = selectionConstraintsMap;
} }
public Map<String, SelectionConstraints> getRemoveConstraintsMap() {
return removeConstraintsMap;
}
public void setRemoveConstraintsMap(Map<String, SelectionConstraints> removeConstraintsMap) {
this.removeConstraintsMap = removeConstraintsMap;
}
CommunityConfiguration(final Map<String, Community> communities) { CommunityConfiguration(final Map<String, Community> communities) {
this.communities = communities; this.communities = communities;
init(); init();
@ -80,6 +100,9 @@ public class CommunityConfiguration implements Serializable {
if (selectionConstraintsMap == null) { if (selectionConstraintsMap == null) {
selectionConstraintsMap = Maps.newHashMap(); selectionConstraintsMap = Maps.newHashMap();
} }
if (removeConstraintsMap == null) {
removeConstraintsMap = Maps.newHashMap();
}
for (Community c : getCommunities().values()) { for (Community c : getCommunities().values()) {
// get subjects // get subjects
@ -101,6 +124,8 @@ public class CommunityConfiguration implements Serializable {
zenodocommunityMap); zenodocommunityMap);
} }
selectionConstraintsMap.put(id, c.getConstraints()); selectionConstraintsMap.put(id, c.getConstraints());
removeConstraintsMap.put(id, c.getRemoveConstraints());
} }
} }
@ -146,6 +171,10 @@ public class CommunityConfiguration implements Serializable {
.collect(Collectors.toList()); .collect(Collectors.toList());
} }
public boolean isEoscDatasource(final String dts) {
return eoscDatasourceMap.containsKey(dts);
}
public List<Pair<String, SelectionConstraints>> getCommunityForZenodoCommunity(String zc) { public List<Pair<String, SelectionConstraints>> getCommunityForZenodoCommunity(String zc) {
return zenodocommunityMap.get(zc); return zenodocommunityMap.get(zc);
} }

View File

@ -37,7 +37,7 @@ public class CommunityConfigurationFactory {
public static CommunityConfiguration newInstance(final String xml) throws DocumentException, SAXException { public static CommunityConfiguration newInstance(final String xml) throws DocumentException, SAXException {
log.debug(String.format("parsing community configuration from:\n%s", xml)); log.info(String.format("parsing community configuration from:\n%s", xml));
final SAXReader reader = new SAXReader(); final SAXReader reader = new SAXReader();
reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
@ -86,6 +86,7 @@ public class CommunityConfigurationFactory {
c.setProviders(parseDatasources(node)); c.setProviders(parseDatasources(node));
c.setZenodoCommunities(parseZenodoCommunities(node)); c.setZenodoCommunities(parseZenodoCommunities(node));
c.setConstraints(parseConstrains(node)); c.setConstraints(parseConstrains(node));
c.setRemoveConstraints(parseRemoveConstrains(node));
return c; return c;
} }
@ -102,6 +103,19 @@ public class CommunityConfigurationFactory {
return selectionConstraints; return selectionConstraints;
} }
private static SelectionConstraints parseRemoveConstrains(Node node) {
Node constsNode = node.selectSingleNode("./removeConstraints");
if (constsNode == null || StringUtils.isBlank(StringUtils.trim(constsNode.getText()))) {
return new SelectionConstraints();
}
SelectionConstraints selectionConstraints = new Gson()
.fromJson(constsNode.getText(), SelectionConstraints.class);
selectionConstraints.setSelection(resolver);
log.info("number of selection constraints set " + selectionConstraints.getCriteria().size());
return selectionConstraints;
}
private static List<String> parseSubjects(final Node node) { private static List<String> parseSubjects(final Node node) {
final List<String> subjects = Lists.newArrayList(); final List<String> subjects = Lists.newArrayList();

View File

@ -11,6 +11,7 @@ public class Constraint implements Serializable {
private String verb; private String verb;
private String field; private String field;
private String value; private String value;
// private String element;
private Selection selection; private Selection selection;
public String getVerb() { public String getVerb() {
@ -50,4 +51,12 @@ public class Constraint implements Serializable {
public boolean verifyCriteria(String metadata) { public boolean verifyCriteria(String metadata) {
return selection.apply(metadata); return selection.apply(metadata);
} }
// public String getElement() {
// return element;
// }
//
// public void setElement(String element) {
// this.element = element;
// }
} }

View File

@ -1,8 +1,10 @@
package eu.dnetlib.dhp.bulktag.community; package eu.dnetlib.dhp.bulktag.community;
import java.io.IOException;
import java.util.List; import java.util.List;
import org.apache.commons.io.IOUtils;
import org.dom4j.DocumentException; import org.dom4j.DocumentException;
import org.xml.sax.SAXException; import org.xml.sax.SAXException;
@ -13,60 +15,17 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
public class QueryInformationSystem { public class QueryInformationSystem {
private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
+ " let $subj := $x//CONFIGURATION/context/param[./@name='subject']/text() "
+ " let $datasources := $x//CONFIGURATION/context/category[./@id=concat($x//CONFIGURATION/context/@id,'::contentproviders')]/concept "
+ " let $organizations := $x//CONFIGURATION/context/category[./@id=concat($x//CONFIGURATION/context/@id,'::resultorganizations')]/concept "
+ " let $communities := $x//CONFIGURATION/context/category[./@id=concat($x//CONFIGURATION/context/@id,'::zenodocommunities')]/concept "
+
"let $zenodo := $x//param[./@name='zenodoCommunity']/text() "
+ " where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] and $x//context/param[./@name = 'status']/text() != 'hidden' "
+ " return "
+ " <community> "
+ " { $x//CONFIGURATION/context/@id} "
+ " <subjects> "
+ " {for $y in tokenize($subj,',') "
+ " return "
+ " <subject>{$y}</subject>} "
+ " </subjects> "
+ " <datasources> "
+ " {for $d in $datasources "
+ " where $d/param[./@name='enabled']/text()='true' "
+ " return "
+ " <datasource> "
+ " <openaireId> "
+ " {$d//param[./@name='openaireId']/text()} "
+ " </openaireId> "
+ " <selcriteria> "
+ " {$d/param[./@name='selcriteria']/text()} "
+ " </selcriteria> "
+ " </datasource> } "
+ " </datasources> " +
" <zenodocommunities> " +
"{for $zc in $zenodo " +
"return " +
"<zenodocommunity> " +
"<zenodoid> " +
"{$zc} " +
"</zenodoid> " +
"</zenodocommunity>}"
+ " {for $zc in $communities "
+ " return "
+ " <zenodocommunity> "
+ " <zenodoid> "
+ " {$zc/param[./@name='zenodoid']/text()} "
+ " </zenodoid> "
+ " <selcriteria> "
+ " {$zc/param[./@name='selcriteria']/text()} "
+ " </selcriteria> "
+ " </zenodocommunity>} "
+ " </zenodocommunities> "
+ " </community>";
public static CommunityConfiguration getCommunityConfiguration(final String isLookupUrl) public static CommunityConfiguration getCommunityConfiguration(final String isLookupUrl)
throws ISLookUpException, DocumentException, SAXException { throws ISLookUpException, DocumentException, SAXException, IOException {
ISLookUpService isLookUp = ISLookupClientFactory.getLookUpService(isLookupUrl); ISLookUpService isLookUp = ISLookupClientFactory.getLookUpService(isLookupUrl);
final List<String> res = isLookUp.quickSearchProfile(XQUERY); final List<String> res = isLookUp
.quickSearchProfile(
IOUtils
.toString(
QueryInformationSystem.class
.getResourceAsStream(
"/eu/dnetlib/dhp/bulktag/query.xq")));
final String xmlConf = "<communities>" + Joiner.on(" ").join(res) + "</communities>"; final String xmlConf = "<communities>" + Joiner.on(" ").join(res) + "</communities>";

View File

@ -7,22 +7,22 @@ import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
import java.io.Serializable; import java.io.Serializable;
import java.util.*; import java.util.*;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.gson.Gson; import com.google.gson.Gson;
import com.jayway.jsonpath.DocumentContext; import com.jayway.jsonpath.DocumentContext;
import com.jayway.jsonpath.JsonPath; import com.jayway.jsonpath.JsonPath;
import eu.dnetlib.dhp.bulktag.SparkBulkTagJob; import eu.dnetlib.dhp.bulktag.eosc.EoscIFTag;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
/** Created by miriam on 02/08/2018. */ /** Created by miriam on 02/08/2018. */
public class ResultTagger implements Serializable { public class ResultTagger implements Serializable {
private static final Logger log = LoggerFactory.getLogger(ResultTagger.class);
private boolean clearContext(Result result) { private boolean clearContext(Result result) {
int tmp = result.getContext().size(); int tmp = result.getContext().size();
@ -64,6 +64,38 @@ public class ResultTagger implements Serializable {
return result; return result;
} }
// Execute the EOSCTag for the services
switch (result.getResulttype().getClassid()) {
case PUBLICATION_RESULTTYPE_CLASSID:
break;
case SOFTWARE_RESULTTYPE_CLASSID:
EoscIFTag.tagForSoftware(result);
break;
case DATASET_RESULTTYPE_CLASSID:
EoscIFTag.tagForDataset(result);
break;
case ORP_RESULTTYPE_CLASSID:
EoscIFTag.tagForOther(result);
break;
}
// communities contains all the communities to be not added to the context
final Set<String> removeCommunities = new HashSet<>();
conf
.getRemoveConstraintsMap()
.keySet()
.forEach(communityId -> {
if (conf.getRemoveConstraintsMap().get(communityId).getCriteria() != null &&
conf
.getRemoveConstraintsMap()
.get(communityId)
.getCriteria()
.stream()
.anyMatch(crit -> crit.verifyCriteria(param)))
removeCommunities.add(communityId);
});
// communities contains all the communities to be added as context for the result // communities contains all the communities to be added as context for the result
final Set<String> communities = new HashSet<>(); final Set<String> communities = new HashSet<>();
@ -86,24 +118,34 @@ public class ResultTagger implements Serializable {
// Tagging for datasource // Tagging for datasource
final Set<String> datasources = new HashSet<>(); final Set<String> datasources = new HashSet<>();
final Set<String> tmp = new HashSet<>(); final Set<String> collfrom = new HashSet<>();
final Set<String> hostdby = new HashSet<>();
if (Objects.nonNull(result.getInstance())) { if (Objects.nonNull(result.getInstance())) {
for (Instance i : result.getInstance()) { for (Instance i : result.getInstance()) {
if (Objects.nonNull(i.getCollectedfrom()) && Objects.nonNull(i.getCollectedfrom().getKey())) { if (Objects.nonNull(i.getCollectedfrom()) && Objects.nonNull(i.getCollectedfrom().getKey())) {
tmp.add(StringUtils.substringAfter(i.getCollectedfrom().getKey(), "|")); collfrom.add(StringUtils.substringAfter(i.getCollectedfrom().getKey(), "|"));
} }
if (Objects.nonNull(i.getHostedby()) && Objects.nonNull(i.getHostedby().getKey())) { if (Objects.nonNull(i.getHostedby()) && Objects.nonNull(i.getHostedby().getKey())) {
tmp.add(StringUtils.substringAfter(i.getHostedby().getKey(), "|")); hostdby.add(StringUtils.substringAfter(i.getHostedby().getKey(), "|"));
} }
} }
tmp collfrom
.forEach( .forEach(
dsId -> datasources dsId -> datasources
.addAll( .addAll(
conf.getCommunityForDatasource(dsId, param))); conf.getCommunityForDatasource(dsId, param)));
hostdby.forEach(dsId -> {
datasources
.addAll(
conf.getCommunityForDatasource(dsId, param));
if (conf.isEoscDatasource(dsId)) {
datasources.add("eosc");
}
});
} }
communities.addAll(datasources); communities.addAll(datasources);
@ -139,7 +181,8 @@ public class ResultTagger implements Serializable {
.getSelectionConstraintsMap() .getSelectionConstraintsMap()
.keySet() .keySet()
.forEach(communityId -> { .forEach(communityId -> {
if (conf.getSelectionConstraintsMap().get(communityId).getCriteria() != null && if (!removeCommunities.contains(communityId) &&
conf.getSelectionConstraintsMap().get(communityId).getCriteria() != null &&
conf conf
.getSelectionConstraintsMap() .getSelectionConstraintsMap()
.get(communityId) .get(communityId)
@ -151,6 +194,11 @@ public class ResultTagger implements Serializable {
communities.addAll(aconstraints); communities.addAll(aconstraints);
communities.removeAll(removeCommunities);
if (aconstraints.size() > 0)
log.info("Found {} for advancedConstraints ", aconstraints.size());
clearContext(result); clearContext(result);
/* Verify if there is something to bulktag */ /* Verify if there is something to bulktag */

View File

@ -3,13 +3,9 @@ package eu.dnetlib.dhp.bulktag.criteria;
import java.io.Serializable; import java.io.Serializable;
/**
* @author miriam.baglioni
* @Date 06/04/23
*/
@VerbClass("starts_with") @VerbClass("starts_with")
public class StartsWithVerb implements Selection, Serializable { public class StartsWithVerb implements Selection, Serializable {
private String param; private String param;
public StartsWithVerb() { public StartsWithVerb() {

View File

@ -3,19 +3,15 @@ package eu.dnetlib.dhp.bulktag.criteria;
import java.io.Serializable; import java.io.Serializable;
/**
* @author miriam.baglioni
* @Date 06/04/23
*/
@VerbClass("starts_with_caseinsensitive") @VerbClass("starts_with_caseinsensitive")
public class StartsWithIgnoreCaseVerb implements Selection, Serializable { public class StartsWithVerbIgnoreCase implements Selection, Serializable {
private String param; private String param;
public StartsWithIgnoreCaseVerb() { public StartsWithVerbIgnoreCase() {
} }
public StartsWithIgnoreCaseVerb(final String param) { public StartsWithVerbIgnoreCase(final String param) {
this.param = param; this.param = param;
} }

View File

@ -1,29 +0,0 @@
package eu.dnetlib.dhp.bulktag.eosc;
import java.io.Serializable;
/**
* @author miriam.baglioni
* @Date 21/07/22
*/
public class DatasourceMaster implements Serializable {
private String datasource;
private String master;
public String getDatasource() {
return datasource;
}
public void setDatasource(String datasource) {
this.datasource = datasource;
}
public String getMaster() {
return master;
}
public void setMaster(String master) {
this.master = master;
}
}

View File

@ -0,0 +1,145 @@
package eu.dnetlib.dhp.bulktag.eosc;
import java.util.*;
import eu.dnetlib.dhp.schema.oaf.*;
public class EoscIFTag {
public static final String EOSC_GALAXY_WORKFLOW = "EOSC::Galaxy Workflow";
public static final String EOSC_TWITTER_DATA = "EOSC::Twitter Data";
public static final String EOSC_JUPYTER_NOTEBOOK = "EOSC::Jupyter Notebook";
public static final String COMPLIES_WITH = "compliesWith";
public static EoscIfGuidelines newInstance(String code, String label, String url, String semantics) {
EoscIfGuidelines eig = new EoscIfGuidelines();
eig.setCode(code);
eig.setLabel(label);
eig.setUrl(url);
eig.setSemanticRelation(semantics);
return eig;
}
public static <R extends Result> void tagForSoftware(R s) {
if (!Optional.ofNullable(s.getEoscifguidelines()).isPresent())
s.setEoscifguidelines(new ArrayList<>());
if (containsCriteriaNotebook(s)) {
addEIG(
s.getEoscifguidelines(), EOSC_JUPYTER_NOTEBOOK, EOSC_JUPYTER_NOTEBOOK, "",
COMPLIES_WITH);
}
if (containsCriteriaGalaxy(s)) {
addEIG(
s.getEoscifguidelines(), EOSC_GALAXY_WORKFLOW, EOSC_GALAXY_WORKFLOW, "", COMPLIES_WITH);
}
}
public static <R extends Result> void tagForOther(R orp) {
if (!Optional.ofNullable(orp.getEoscifguidelines()).isPresent())
orp.setEoscifguidelines(new ArrayList<>());
if (containsCriteriaGalaxy(orp)) {
addEIG(
orp.getEoscifguidelines(), EOSC_GALAXY_WORKFLOW, EOSC_GALAXY_WORKFLOW, "",
COMPLIES_WITH);
}
if (containscriteriaTwitter(orp)) {
addEIG(orp.getEoscifguidelines(), EOSC_TWITTER_DATA, EOSC_TWITTER_DATA, "", COMPLIES_WITH);
}
}
public static <R extends Result> void tagForDataset(R d) {
if (!Optional.ofNullable(d.getEoscifguidelines()).isPresent())
d.setEoscifguidelines(new ArrayList<>());
if (containscriteriaTwitter(d)) {
addEIG(d.getEoscifguidelines(), EOSC_TWITTER_DATA, EOSC_TWITTER_DATA, "", COMPLIES_WITH);
}
}
private static void addEIG(List<EoscIfGuidelines> eoscifguidelines, String code, String label, String url,
String sem) {
if (!eoscifguidelines.stream().anyMatch(eig -> eig.getCode().equals(code)))
eoscifguidelines.add(newInstance(code, label, url, sem));
}
private static boolean containscriteriaTwitter(Result r) {
Set<String> words = getWordsSP(r.getTitle());
words.addAll(getWordsF(r.getDescription()));
if (words.contains("twitter") &&
(words.contains("data") || words.contains("dataset")))
return true;
return Optional
.ofNullable(r.getSubject())
.map(
s -> s.stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("twitter")) &&
s.stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("data")))
.orElse(false);
}
private static boolean containsCriteriaGalaxy(Result r) {
Set<String> words = getWordsSP(r.getTitle());
words.addAll(getWordsF(r.getDescription()));
if (words.contains("galaxy") &&
words.contains("workflow"))
return true;
return Optional
.ofNullable(r.getSubject())
.map(
s -> s.stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("galaxy")) &&
s.stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("workflow")))
.orElse(false);
}
private static <R extends Result> boolean containsCriteriaNotebook(R s) {
if (!Optional.ofNullable(s.getSubject()).isPresent())
return false;
if (s.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("jupyter")))
return true;
if (s
.getSubject()
.stream()
.anyMatch(
sbj -> sbj.getValue().toLowerCase().contains("python") &&
sbj.getValue().toLowerCase().contains("notebook")))
return true;
if (s.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("python")) &&
s.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("notebook")))
return true;
return false;
}
private static Set<String> getWordsSP(List<StructuredProperty> elem) {
Set<String> words = new HashSet<>();
Optional
.ofNullable(elem)
.ifPresent(
e -> e
.forEach(
t -> words
.addAll(
Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" ")))));
return words;
}
private static Set<String> getWordsF(List<Field<String>> elem) {
Set<String> words = new HashSet<>();
Optional
.ofNullable(elem)
.ifPresent(
e -> e
.forEach(
t -> words
.addAll(
Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" ")))));
return words;
}
}

View File

@ -1,136 +0,0 @@
package eu.dnetlib.dhp.bulktag.eosc;
import java.io.BufferedWriter;
import java.io.Closeable;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.nio.charset.StandardCharsets;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.Arrays;
import java.util.List;
import java.util.function.Consumer;
import java.util.function.Function;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
/**
* @author miriam.baglioni
* @Date 21/07/22
*/
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.DbClient;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.common.RelationInverse;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
public class ReadMasterDatasourceFromDB implements Closeable {
private final DbClient dbClient;
private static final Log log = LogFactory.getLog(ReadMasterDatasourceFromDB.class);
private final BufferedWriter writer;
private final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private static final String QUERY = "SELECT dso.id datasource, d.id master FROM " +
"(SELECT id FROM dsm_services WHERE id like 'eosc%') dso " +
"FULL JOIN " +
"(SELECT id, duplicate FROM dsm_dedup_services WHERE duplicate like 'eosc%')d " +
"ON dso.id = d.duplicate";
public static void main(final String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils
.toString(
ReadMasterDatasourceFromDB.class
.getResourceAsStream(
"/eu/dnetlib/dhp/bulktag/datasourcemaster_parameters.json")));
parser.parseArgument(args);
final String dbUrl = parser.get("postgresUrl");
final String dbUser = parser.get("postgresUser");
final String dbPassword = parser.get("postgresPassword");
final String hdfsPath = parser.get("hdfsPath");
final String hdfsNameNode = parser.get("hdfsNameNode");
try (
final ReadMasterDatasourceFromDB rmd = new ReadMasterDatasourceFromDB(hdfsPath, hdfsNameNode, dbUrl, dbUser,
dbPassword)) {
log.info("Processing datasources...");
rmd.execute(QUERY, rmd::datasourceMasterMap);
}
}
public void execute(final String sql, final Function<ResultSet, DatasourceMaster> producer) {
dbClient.processResults(sql, rs -> writeMap(producer.apply(rs)));
}
public DatasourceMaster datasourceMasterMap(ResultSet rs) {
try {
DatasourceMaster dm = new DatasourceMaster();
String datasource = rs.getString("datasource");
dm.setDatasource(datasource);
String master = rs.getString("master");
if (StringUtils.isNotBlank(master))
dm.setMaster(OafMapperUtils.createOpenaireId(10, master, true));
else
dm.setMaster(OafMapperUtils.createOpenaireId(10, datasource, true));
return dm;
} catch (final SQLException e) {
throw new RuntimeException(e);
}
}
@Override
public void close() throws IOException {
dbClient.close();
writer.close();
}
public ReadMasterDatasourceFromDB(
final String hdfsPath, String hdfsNameNode, final String dbUrl, final String dbUser, final String dbPassword)
throws IOException {
this.dbClient = new DbClient(dbUrl, dbUser, dbPassword);
Configuration conf = new Configuration();
conf.set("fs.defaultFS", hdfsNameNode);
FileSystem fileSystem = FileSystem.get(conf);
Path hdfsWritePath = new Path(hdfsPath);
FSDataOutputStream fsDataOutputStream = null;
if (fileSystem.exists(hdfsWritePath)) {
fsDataOutputStream = fileSystem.append(hdfsWritePath);
} else {
fsDataOutputStream = fileSystem.create(hdfsWritePath);
}
this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8));
}
protected void writeMap(final DatasourceMaster dm) {
try {
writer.write(OBJECT_MAPPER.writeValueAsString(dm));
writer.newLine();
} catch (final IOException e) {
throw new RuntimeException(e);
}
}
}

View File

@ -1,209 +0,0 @@
package eu.dnetlib.dhp.bulktag.eosc;
import static eu.dnetlib.dhp.PropagationConstant.readPath;
import static eu.dnetlib.dhp.PropagationConstant.removeOutputDir;
import static eu.dnetlib.dhp.bulktag.community.TaggingConstants.*;
import static eu.dnetlib.dhp.bulktag.community.TaggingConstants.TAGGING_TRUST;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PROVENANCE_ACTIONS;
import java.io.Serializable;
import java.util.*;
import java.util.stream.Collectors;
import javax.print.attribute.DocAttributeSet;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.ForeachFunction;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.gson.Gson;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.bulktag.SparkBulkTagJob;
import eu.dnetlib.dhp.bulktag.community.*;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
import scala.Tuple2;
/**
* @author miriam.baglioni
* @Date 21/07/22
*/
public class SparkEoscBulkTag implements Serializable {
private static final Logger log = LoggerFactory.getLogger(SparkEoscBulkTag.class);
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private static String OPENAIRE_3 = "openaire3.0";
private static String OPENAIRE_4 = "openaire-pub_4.0";
private static String OPENAIRE_CRIS = "openaire-cris_1.1";
private static String OPENAIRE_DATA = "openaire2.0_data";
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
SparkEoscBulkTag.class
.getResourceAsStream(
"/eu/dnetlib/dhp/bulktag/input_eosc_bulkTag_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
Boolean isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
final String inputPath = parser.get("sourcePath");
log.info("inputPath: {}", inputPath);
final String workingPath = parser.get("workingPath");
log.info("workingPath: {}", workingPath);
String datasourceMapPath = parser.get("datasourceMapPath");
log.info("datasourceMapPath: {}", datasourceMapPath);
final String resultClassName = parser.get("resultTableName");
log.info("resultTableName: {}", resultClassName);
final String resultType = parser.get("resultType");
log.info("resultType: {}", resultType);
Class<? extends Result> resultClazz = (Class<? extends Result>) Class.forName(resultClassName);
SparkConf conf = new SparkConf();
CommunityConfiguration cc;
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
removeOutputDir(spark, workingPath);
selectCompliantDatasources(spark, inputPath, workingPath, datasourceMapPath);
execBulkTag(spark, inputPath, workingPath, resultType, resultClazz);
});
}
private static void selectCompliantDatasources(SparkSession spark, String inputPath, String workingPath,
String datasourceMapPath) {
Dataset<Datasource> datasources = readPath(spark, inputPath + "datasource", Datasource.class)
.filter((FilterFunction<Datasource>) ds -> {
final String compatibility = ds.getOpenairecompatibility().getClassid();
return compatibility.equalsIgnoreCase(OPENAIRE_3) ||
compatibility.equalsIgnoreCase(OPENAIRE_4) ||
compatibility.equalsIgnoreCase(OPENAIRE_CRIS) ||
compatibility.equalsIgnoreCase(OPENAIRE_DATA);
});
Dataset<DatasourceMaster> datasourceMaster = readPath(spark, datasourceMapPath, DatasourceMaster.class);
datasources
.joinWith(datasourceMaster, datasources.col("id").equalTo(datasourceMaster.col("master")), "left")
.map(
(MapFunction<Tuple2<Datasource, DatasourceMaster>, DatasourceMaster>) t2 -> t2._2(),
Encoders.bean(DatasourceMaster.class))
.filter(Objects::nonNull)
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(workingPath + "datasource");
}
private static <R extends Result> void execBulkTag(
SparkSession spark,
String inputPath,
String workingPath,
String resultType,
Class<R> resultClazz) {
List<String> hostedByList = readPath(spark, workingPath + "datasource", DatasourceMaster.class)
.map((MapFunction<DatasourceMaster, String>) dm -> dm.getMaster(), Encoders.STRING())
.collectAsList();
readPath(spark, inputPath + resultType, resultClazz)
.map(
(MapFunction<R, R>) value -> enrich(value, hostedByList),
Encoders.bean(resultClazz))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(workingPath + resultType);
readPath(spark, workingPath + resultType, resultClazz)
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(inputPath + resultType);
}
private static <R extends Result> R enrich(R value, List<String> hostedByList) {
if (value.getDataInfo().getDeletedbyinference() == null) {
value.getDataInfo().setDeletedbyinference(false);
}
if (value.getContext() == null) {
value.setContext(new ArrayList<>());
}
if (value
.getInstance()
.stream()
.anyMatch(
i -> (hostedByList.contains(i.getHostedby().getKey())))
&&
!value.getContext().stream().anyMatch(c -> c.getId().equals("eosc"))) {
Context context = new Context();
context.setId("eosc");
context
.setDataInfo(
Arrays
.asList(
OafMapperUtils
.dataInfo(
false, BULKTAG_DATA_INFO_TYPE, true, false,
OafMapperUtils
.qualifier(
CLASS_ID_DATASOURCE, CLASS_NAME_BULKTAG_DATASOURCE,
DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS),
TAGGING_TRUST)));
value.getContext().add(context);
}
return value;
}
public static <R> Dataset<R> readPath(
SparkSession spark, String inputPath, Class<R> clazz) {
return spark
.read()
.textFile(inputPath)
.map((MapFunction<String, R>) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz));
}
// TODO remove this hack as soon as the values fixed by this method will be provided as NON null
private static <R extends Result> MapFunction<R, R> patchResult() {
return r -> {
if (r.getDataInfo().getDeletedbyinference() == null) {
r.getDataInfo().setDeletedbyinference(false);
}
if (r.getContext() == null) {
r.setContext(new ArrayList<>());
}
return r;
};
}
}

View File

@ -1,242 +0,0 @@
package eu.dnetlib.dhp.bulktag.eosc;
import static eu.dnetlib.dhp.PropagationConstant.readPath;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.util.*;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.schema.oaf.*;
public class SparkEoscTag {
private static final Logger log = LoggerFactory.getLogger(SparkEoscTag.class);
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
public static final String EOSC_GALAXY_WORKFLOW = "EOSC::Galaxy Workflow";
public static final String EOSC_TWITTER_DATA = "EOSC::Twitter Data";
public static final String EOSC_JUPYTER_NOTEBOOK = "EOSC::Jupyter Notebook";
public static final String COMPLIES_WITH = "compliesWith";
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
SparkEoscTag.class
.getResourceAsStream(
"/eu/dnetlib/dhp/bulktag/input_eoscTag_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
Boolean isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
final String inputPath = parser.get("sourcePath");
log.info("inputPath: {}", inputPath);
final String workingPath = parser.get("workingPath");
log.info("workingPath: {}", workingPath);
SparkConf conf = new SparkConf();
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
execEoscTag(spark, inputPath, workingPath);
});
}
public static EoscIfGuidelines newInstance(String code, String label, String url, String semantics) {
EoscIfGuidelines eig = new EoscIfGuidelines();
eig.setCode(code);
eig.setLabel(label);
eig.setUrl(url);
eig.setSemanticRelation(semantics);
return eig;
}
private static void execEoscTag(SparkSession spark, String inputPath, String workingPath) {
readPath(spark, inputPath + "/software", Software.class)
.map((MapFunction<Software, Software>) s -> {
if (containsCriteriaNotebook(s)) {
if (!Optional.ofNullable(s.getEoscifguidelines()).isPresent())
s.setEoscifguidelines(new ArrayList<>());
addEIG(
s.getEoscifguidelines(), EOSC_JUPYTER_NOTEBOOK, EOSC_JUPYTER_NOTEBOOK, "",
COMPLIES_WITH);
}
if (containsCriteriaGalaxy(s)) {
if (!Optional.ofNullable(s.getEoscifguidelines()).isPresent())
s.setEoscifguidelines(new ArrayList<>());
addEIG(
s.getEoscifguidelines(), EOSC_GALAXY_WORKFLOW, EOSC_GALAXY_WORKFLOW, "", COMPLIES_WITH);
}
return s;
}, Encoders.bean(Software.class))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(workingPath + "/software");
readPath(spark, workingPath + "/software", Software.class)
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(inputPath + "/software");
readPath(spark, inputPath + "/otherresearchproduct", OtherResearchProduct.class)
.map((MapFunction<OtherResearchProduct, OtherResearchProduct>) orp -> {
if (!Optional.ofNullable(orp.getEoscifguidelines()).isPresent())
orp.setEoscifguidelines(new ArrayList<>());
if (containsCriteriaGalaxy(orp)) {
addEIG(
orp.getEoscifguidelines(), EOSC_GALAXY_WORKFLOW, EOSC_GALAXY_WORKFLOW, "",
COMPLIES_WITH);
}
if (containscriteriaTwitter(orp)) {
addEIG(orp.getEoscifguidelines(), EOSC_TWITTER_DATA, EOSC_TWITTER_DATA, "", COMPLIES_WITH);
}
if (containsCriteriaNotebook(orp)) {
addEIG(
orp.getEoscifguidelines(), EOSC_JUPYTER_NOTEBOOK, EOSC_JUPYTER_NOTEBOOK, "",
COMPLIES_WITH);
}
return orp;
}, Encoders.bean(OtherResearchProduct.class))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(workingPath + "/otherresearchproduct");
readPath(spark, workingPath + "/otherresearchproduct", OtherResearchProduct.class)
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(inputPath + "/otherresearchproduct");
readPath(spark, inputPath + "/dataset", Dataset.class)
.map((MapFunction<Dataset, Dataset>) d -> {
if (!Optional.ofNullable(d.getEoscifguidelines()).isPresent())
d.setEoscifguidelines(new ArrayList<>());
if (containscriteriaTwitter(d)) {
addEIG(d.getEoscifguidelines(), EOSC_TWITTER_DATA, EOSC_TWITTER_DATA, "", COMPLIES_WITH);
}
return d;
}, Encoders.bean(Dataset.class))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(workingPath + "/dataset");
readPath(spark, workingPath + "/dataset", Dataset.class)
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(inputPath + "/dataset");
}
private static void addEIG(List<EoscIfGuidelines> eoscifguidelines, String code, String label, String url,
String sem) {
if (!eoscifguidelines.stream().anyMatch(eig -> eig.getCode().equals(code)))
eoscifguidelines.add(newInstance(code, label, url, sem));
}
private static boolean containscriteriaTwitter(Result r) {
Set<String> words = getWordsSP(r.getTitle());
words.addAll(getWordsF(r.getDescription()));
if (words.contains("twitter") &&
(words.contains("data") || words.contains("dataset")))
return true;
return Optional
.ofNullable(r.getSubject())
.map(
s -> s.stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("twitter")) &&
s.stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("data")))
.orElse(false);
}
private static boolean containsCriteriaGalaxy(Result r) {
Set<String> words = getWordsSP(r.getTitle());
words.addAll(getWordsF(r.getDescription()));
if (words.contains("galaxy") &&
words.contains("workflow"))
return true;
return Optional
.ofNullable(r.getSubject())
.map(
s -> s.stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("galaxy")) &&
s.stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("workflow")))
.orElse(false);
}
private static boolean containsCriteriaNotebook(Result s) {
if (!Optional.ofNullable(s.getSubject()).isPresent())
return false;
if (s.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("jupyter")))
return true;
if (s
.getSubject()
.stream()
.anyMatch(
sbj -> sbj.getValue().toLowerCase().contains("python") &&
sbj.getValue().toLowerCase().contains("notebook")))
return true;
if (s.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("python")) &&
s.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("notebook")))
return true;
return false;
}
private static Set<String> getWordsSP(List<StructuredProperty> elem) {
Set<String> words = new HashSet<>();
Optional
.ofNullable(elem)
.ifPresent(
e -> e
.forEach(
t -> words
.addAll(
Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" ")))));
return words;
}
private static Set<String> getWordsF(List<Field<String>> elem) {
Set<String> words = new HashSet<>();
Optional
.ofNullable(elem)
.ifPresent(
e -> e
.forEach(
t -> words
.addAll(
Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" ")))));
return words;
}
}

View File

@ -219,158 +219,9 @@
<error to="Kill"/> <error to="Kill"/>
</action> </action>
<join name="wait" to="eosc_tag"/> <join name="wait" to="End"/>
<action name="eosc_tag">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn-cluster</master>
<mode>cluster</mode>
<name>EOSC_tagging</name>
<class>eu.dnetlib.dhp.bulktag.eosc.SparkEoscTag</class>
<jar>dhp-enrichment-${projectVersion}.jar</jar>
<spark-opts>
--num-executors=${sparkExecutorNumber}
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${outputPath}</arg>
<arg>--workingPath</arg><arg>${workingDir}/eoscTag</arg>
</spark>
<ok to="eosc_get_datasource_master"/>
<error to="Kill"/>
</action>
<action name="eosc_get_datasource_master">
<java>
<main-class>eu.dnetlib.dhp.bulktag.eosc.ReadMasterDatasourceFromDB</main-class>
<arg>--hdfsPath</arg><arg>${workingDir}/datasourcemaster</arg>
<arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
<arg>--postgresUrl</arg><arg>${postgresURL}</arg>
<arg>--postgresUser</arg><arg>${postgresUser}</arg>
<arg>--postgresPassword</arg><arg>${postgresPassword}</arg>
</java>
<ok to="fork_eosc_context_tag"/>
<error to="Kill"/>
</action>
<fork name="fork_eosc_context_tag">
<path start="eosc_context_tag_publication"/>
<path start="eosc_context_tag_dataset"/>
<path start="eosc_context_tag_otherresearchproduct"/>
<path start="eosc_context_tag_software"/>
</fork>
<action name="eosc_context_tag_publication">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn-cluster</master>
<mode>cluster</mode>
<name>EOSC tagging publication</name>
<class>eu.dnetlib.dhp.bulktag.eosc.SparkEoscBulkTag</class>
<jar>dhp-enrichment-${projectVersion}.jar</jar>
<spark-opts>
--num-executors=${sparkExecutorNumber}
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${outputPath}/</arg>
<arg>--resultType</arg><arg>publication</arg>
<arg>--workingPath</arg><arg>${workingDir}/eoscContextTag/</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
<arg>--datasourceMapPath</arg><arg>${workingDir}/datasourcemaster</arg>
</spark>
<ok to="wait_eosc_context_tag"/>
<error to="Kill"/>
</action>
<action name="eosc_context_tag_dataset">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn-cluster</master>
<mode>cluster</mode>
<name>EOSC tagging dataset</name>
<class>eu.dnetlib.dhp.bulktag.eosc.SparkEoscBulkTag</class>
<jar>dhp-enrichment-${projectVersion}.jar</jar>
<spark-opts>
--num-executors=${sparkExecutorNumber}
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${outputPath}/</arg>
<arg>--resultType</arg><arg>dataset</arg>
<arg>--workingPath</arg><arg>${workingDir}/eoscContextTag/</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
<arg>--datasourceMapPath</arg><arg>${workingDir}/datasourcemaster</arg>
</spark>
<ok to="wait_eosc_context_tag"/>
<error to="Kill"/>
</action>
<action name="eosc_context_tag_software">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn-cluster</master>
<mode>cluster</mode>
<name>EOSC tagging software</name>
<class>eu.dnetlib.dhp.bulktag.eosc.SparkEoscBulkTag</class>
<jar>dhp-enrichment-${projectVersion}.jar</jar>
<spark-opts>
--num-executors=${sparkExecutorNumber}
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${outputPath}/</arg>
<arg>--resultType</arg><arg>software</arg>
<arg>--workingPath</arg><arg>${workingDir}/eoscContextTag/</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
<arg>--datasourceMapPath</arg><arg>${workingDir}/datasourcemaster</arg>
</spark>
<ok to="wait_eosc_context_tag"/>
<error to="Kill"/>
</action>
<action name="eosc_context_tag_otherresearchproduct">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn-cluster</master>
<mode>cluster</mode>
<name>EOSC tagging ORP</name>
<class>eu.dnetlib.dhp.bulktag.eosc.SparkEoscBulkTag</class>
<jar>dhp-enrichment-${projectVersion}.jar</jar>
<spark-opts>
--num-executors=${sparkExecutorNumber}
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${outputPath}/</arg>
<arg>--resultType</arg><arg>otherresearchproduct</arg>
<arg>--workingPath</arg><arg>${workingDir}/eoscContextTag/</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
<arg>--datasourceMapPath</arg><arg>${workingDir}/datasourcemaster</arg>
</spark>
<ok to="wait_eosc_context_tag"/>
<error to="Kill"/>
</action>
<join name="wait_eosc_context_tag" to="End"/>
<end name="End"/> <end name="End"/>
</workflow-app> </workflow-app>

View File

@ -0,0 +1,62 @@
for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType')
let $subj := $x//CONFIGURATION/context/param[./@name='subject']/text()
let $datasources := $x//CONFIGURATION/context/category[./@id=concat($x//CONFIGURATION/context/@id,'::contentproviders')]/concept
let $organizations := $x//CONFIGURATION/context/category[./@id=concat($x//CONFIGURATION/context/@id,'::resultorganizations')]/concept
let $communities := $x//CONFIGURATION/context/category[./@id=concat($x//CONFIGURATION/context/@id,'::zenodocommunities')]/concept
let $fos := $x//CONFIGURATION/context/param[./@name='fos']/text()
let $sdg := $x//CONFIGURATION/context/param[./@name='sdg']/text()
let $zenodo := $x//param[./@name='zenodoCommunity']/text()
where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] and $x//context/param[./@name = 'status']/text() != 'hidden'
return
<community>
{ $x//CONFIGURATION/context/@id}
<removeConstraints>
{$x//CONFIGURATION/context/param[./@name='removeConstraints']/text() }
</removeConstraints>
<advancedConstraints>
{$x//CONFIGURATION/context/param[./@name='advancedConstraints']/text() }
</advancedConstraints>
<subjects>
{for $y in tokenize($subj,',')
return
<subject>{$y}</subject>}
{for $y in tokenize($fos,',')
return
<subject>{$y}</subject>}
{for $y in tokenize($sdg,',')
return
<subject>{$y}</subject>}
</subjects>
<datasources>
{for $d in $datasources
where $d/param[./@name='enabled']/text()='true'
return
<datasource>
<openaireId>
{$d//param[./@name='openaireId']/text()}
</openaireId>
<selcriteria>
{$d/param[./@name='selcriteria']/text()}
</selcriteria>
</datasource> }
</datasources>
<zenodocommunities>
{for $zc in $zenodo
return
<zenodocommunity>
<zenodoid>
{$zc}
</zenodoid>
</zenodocommunity>}
{for $zc in $communities
return
<zenodocommunity>
<zenodoid>
{$zc/param[./@name='zenodoid']/text()}
</zenodoid>
<selcriteria>
{$zc/param[./@name='selcriteria']/text()}
</selcriteria>
</zenodocommunity>}
</zenodocommunities>
</community>

View File

@ -6,6 +6,7 @@ import static eu.dnetlib.dhp.bulktag.community.TaggingConstants.ZENODO_COMMUNITY
import java.io.IOException; import java.io.IOException;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
import java.util.List;
import org.apache.commons.io.FileUtils; import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
@ -24,10 +25,7 @@ import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.schema.oaf.Dataset; import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.Software;
public class BulkTagJobTest { public class BulkTagJobTest {
@ -41,8 +39,10 @@ public class BulkTagJobTest {
+ " \"contributor\" : \"$['contributor'][*]['value']\"," + " \"contributor\" : \"$['contributor'][*]['value']\","
+ " \"description\" : \"$['description'][*]['value']\", " + " \"description\" : \"$['description'][*]['value']\", "
+ " \"subject\" :\"$['subject'][*]['value']\" , " + + " \"subject\" :\"$['subject'][*]['value']\" , " +
"\"fos\" : \"$['subject'][?(@['qualifier']['classid']=='FOS')].value\"" + "\"fos\" : \"$['subject'][?(@['qualifier']['classid']=='FOS')].value\"," +
"} "; "\"sdg\" : \"$['subject'][?(@['qualifier']['classid']=='SDG')].value\"," +
"\"hostedby\" : \"$['instance'][*]['hostedby']['key']\" , " +
"\"collectedfrom\" : \"$['instance'][*]['collectedfrom']['key']\"} ";
private static SparkSession spark; private static SparkSession spark;
@ -58,7 +58,7 @@ public class BulkTagJobTest {
.toString( .toString(
BulkTagJobTest.class BulkTagJobTest.class
.getResourceAsStream( .getResourceAsStream(
"/eu/dnetlib/dhp/bulktag/communityconfiguration/tagging_conf.xml")); "/eu/dnetlib/dhp/bulktag/communityconfiguration/tagging_conf_remove.xml"));
} catch (IOException e) { } catch (IOException e) {
e.printStackTrace(); e.printStackTrace();
} }
@ -638,7 +638,8 @@ public class BulkTagJobTest {
new String[] { new String[] {
"-isTest", Boolean.TRUE.toString(), "-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/software/").getPath(), "-sourcePath",
getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/software/software_10.json.gz").getPath(),
"-taggingConf", taggingConf, "-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software", "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software",
"-outputPath", workingDir.toString() + "/software", "-outputPath", workingDir.toString() + "/software",
@ -681,7 +682,7 @@ public class BulkTagJobTest {
Assertions.assertEquals(3, idExplodeCommunity.filter("community = 'covid-19'").count()); Assertions.assertEquals(3, idExplodeCommunity.filter("community = 'covid-19'").count());
Assertions.assertEquals(1, idExplodeCommunity.filter("community = 'dh-ch'").count()); Assertions.assertEquals(1, idExplodeCommunity.filter("community = 'dh-ch'").count());
Assertions.assertEquals(4, idExplodeCommunity.filter("community = 'aginfra'").count()); Assertions.assertEquals(4, idExplodeCommunity.filter("community = 'aginfra'").count());
Assertions.assertEquals(1, idExplodeCommunity.filter("community = 'dariah'").count()); Assertions.assertEquals(1, idExplodeCommunity.filter("community = 'dth'").count());
Assertions.assertEquals(1, idExplodeCommunity.filter("community = 'fam'").count()); Assertions.assertEquals(1, idExplodeCommunity.filter("community = 'fam'").count());
Assertions Assertions
@ -691,7 +692,7 @@ public class BulkTagJobTest {
.filter( .filter(
"provenance = 'community:zenodocommunity' and " "provenance = 'community:zenodocommunity' and "
+ "id = '50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4' and (" + "id = '50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4' and ("
+ "community = 'dh-ch' or community = 'dariah')") + "community = 'dh-ch' or community = 'dth')")
.count()); .count());
query = "select id, MyT.id community, size(MyT.datainfo) datainfosize " query = "select id, MyT.id community, size(MyT.datainfo) datainfosize "
@ -752,7 +753,7 @@ public class BulkTagJobTest {
.textFile(workingDir.toString() + "/dataset") .textFile(workingDir.toString() + "/dataset")
.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class)); .map(item -> OBJECT_MAPPER.readValue(item, Dataset.class));
Assertions.assertEquals(10, tmp.count()); Assertions.assertEquals(12, tmp.count());
org.apache.spark.sql.Dataset<Dataset> verificationDataset = spark org.apache.spark.sql.Dataset<Dataset> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(Dataset.class)); .createDataset(tmp.rdd(), Encoders.bean(Dataset.class));
@ -766,19 +767,69 @@ public class BulkTagJobTest {
org.apache.spark.sql.Dataset<Row> idExplodeCommunity = spark.sql(query); org.apache.spark.sql.Dataset<Row> idExplodeCommunity = spark.sql(query);
idExplodeCommunity.show(false); idExplodeCommunity.show(false);
Assertions.assertEquals(4, idExplodeCommunity.count());
Assertions
.assertEquals(
3, idExplodeCommunity.filter("provenance = 'community:datasource'").count());
} }
@Test @Test
void bulktagPublicationwithConstraintsTest() throws Exception { void bulkTagOtherJupyter() throws Exception {
final String sourcePath = getClass() final String sourcePath = getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/bulktag/sample/publication/orcidbulktagfordatasource") "/eu/dnetlib/dhp/eosctag/jupyter/otherresearchproduct")
.getPath();
SparkBulkTagJob
.main(
new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.OtherResearchProduct",
"-outputPath", workingDir.toString() + "/otherresearchproduct",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-pathMap", pathMap
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
Assertions
.assertEquals(
10, sc
.textFile(workingDir.toString() + "/otherresearchproduct")
.map(item -> OBJECT_MAPPER.readValue(item, OtherResearchProduct.class))
.count());
Assertions
.assertEquals(
0, sc
.textFile(workingDir.toString() + "/otherresearchproduct")
.map(item -> OBJECT_MAPPER.readValue(item, OtherResearchProduct.class))
.filter(
orp -> orp
.getSubject()
.stream()
.anyMatch(sbj -> sbj.getValue().equals("EOSC::Jupyter Notebook")))
.count());
Assertions
.assertEquals(
0, sc
.textFile(workingDir.toString() + "/otherresearchproduct")
.map(item -> OBJECT_MAPPER.readValue(item, OtherResearchProduct.class))
.filter(
orp -> orp
.getSubject()
.stream()
.anyMatch(eig -> eig.getValue().equals("EOSC::Jupyter Notebook")))
.count());
}
@Test
public void bulkTagDatasetJupyter() throws Exception {
final String sourcePath = getClass()
.getResource(
"/eu/dnetlib/dhp/eosctag/jupyter/dataset")
.getPath(); .getPath();
SparkBulkTagJob SparkBulkTagJob
.main( .main(
@ -786,26 +837,723 @@ public class BulkTagJobTest {
"-isTest", Boolean.TRUE.toString(), "-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath, "-sourcePath", sourcePath,
"-taggingConf", IOUtils "-taggingConf", taggingConf,
.toString( "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
BulkTagJobTest.class "-outputPath", workingDir.toString() + "/dataset",
.getResourceAsStream( "-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"/eu/dnetlib/dhp/bulktag/communityconfiguration/tagging_conf_neanias.xml")), "-pathMap", pathMap
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication", });
"-outputPath", workingDir.toString() + "/publication",
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
Assertions
.assertEquals(
10, sc
.textFile(workingDir.toString() + "/dataset")
.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class))
.count());
Assertions
.assertEquals(
0, sc
.textFile(workingDir.toString() + "/dataset")
.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class))
.filter(
ds -> ds.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Jupyter Notebook")))
.count());
Assertions
.assertEquals(
0, sc
.textFile(workingDir.toString() + "/dataset")
.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class))
.filter(
ds -> ds
.getEoscifguidelines()
.stream()
.anyMatch(eig -> eig.getCode().equals("EOSC::Jupyter Notebook")))
.count());
}
@Test
public void bulkTagSoftwareJupyter() throws Exception {
final String sourcePath = getClass()
.getResource(
"/eu/dnetlib/dhp/eosctag/jupyter/software")
.getPath();
SparkBulkTagJob
.main(
new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software",
"-outputPath", workingDir.toString() + "/software",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL, "-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-pathMap", pathMap "-pathMap", pathMap
}); });
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<Publication> tmp = sc JavaRDD<Software> tmp = sc
.textFile(workingDir.toString() + "/publication") .textFile(workingDir.toString() + "/software")
.map(item -> OBJECT_MAPPER.readValue(item, Publication.class)); .map(item -> OBJECT_MAPPER.readValue(item, Software.class));
Assertions.assertEquals(2, tmp.count()); Assertions.assertEquals(10, tmp.count());
org.apache.spark.sql.Dataset<Publication> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(Publication.class)); Assertions
.assertEquals(
4,
tmp
.filter(s -> s.getEoscifguidelines() != null)
.filter(
s -> s
.getEoscifguidelines()
.stream()
.anyMatch(eig -> eig.getCode().equals("EOSC::Jupyter Notebook")))
.count());
Assertions
.assertEquals(
1, tmp
.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
.collect()
.get(0)
.getEoscifguidelines()
.size());
Assertions
.assertEquals(
1, tmp
.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
.collect()
.get(0)
.getSubject()
.size());
Assertions
.assertTrue(
tmp
.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
.collect()
.get(0)
.getEoscifguidelines()
.stream()
.anyMatch(s -> s.getCode().equals("EOSC::Jupyter Notebook")));
Assertions
.assertFalse(
tmp
.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
.collect()
.get(0)
.getSubject()
.stream()
.anyMatch(s -> s.getValue().equals("EOSC::Jupyter Notebook")));
Assertions
.assertEquals(
5, tmp
.filter(sw -> sw.getId().equals("50|od______1582::501b25d420f808c8eddcd9b16e917f11"))
.collect()
.get(0)
.getSubject()
.size());
Assertions
.assertFalse(
tmp
.filter(sw -> sw.getId().equals("50|od______1582::501b25d420f808c8eddcd9b16e917f11"))
.collect()
.get(0)
.getSubject()
.stream()
.anyMatch(s -> s.getValue().equals("EOSC::Jupyter Notebook")));
Assertions
.assertEquals(
0,
tmp
.filter(sw -> sw.getId().equals("50|od______1582::501b25d420f808c8eddcd9b16e917f11"))
.collect()
.get(0)
.getEoscifguidelines()
.size());
Assertions
.assertEquals(
8, tmp
.filter(sw -> sw.getId().equals("50|od______1582::581621232a561b7e8b4952b18b8b0e56"))
.collect()
.get(0)
.getSubject()
.size());
Assertions
.assertFalse(
tmp
.filter(sw -> sw.getId().equals("50|od______1582::581621232a561b7e8b4952b18b8b0e56"))
.collect()
.get(0)
.getSubject()
.stream()
.anyMatch(s -> s.getValue().equals("EOSC::Jupyter Notebook")));
Assertions
.assertEquals(
1, tmp
.filter(sw -> sw.getId().equals("50|od______1582::581621232a561b7e8b4952b18b8b0e56"))
.collect()
.get(0)
.getEoscifguidelines()
.size());
Assertions
.assertTrue(
tmp
.filter(sw -> sw.getId().equals("50|od______1582::581621232a561b7e8b4952b18b8b0e56"))
.collect()
.get(0)
.getEoscifguidelines()
.stream()
.anyMatch(s -> s.getCode().equals("EOSC::Jupyter Notebook")));
Assertions
.assertEquals(
5, tmp
.filter(sw -> sw.getId().equals("50|od______1582::5aec1186054301b66c0c5dc35972a589"))
.collect()
.get(0)
.getSubject()
.size());
Assertions
.assertFalse(
tmp
.filter(sw -> sw.getId().equals("50|od______1582::5aec1186054301b66c0c5dc35972a589"))
.collect()
.get(0)
.getSubject()
.stream()
.anyMatch(s -> s.getValue().equals("EOSC::Jupyter Notebook")));
Assertions
.assertEquals(
0,
tmp
.filter(sw -> sw.getId().equals("50|od______1582::5aec1186054301b66c0c5dc35972a589"))
.collect()
.get(0)
.getEoscifguidelines()
.size());
Assertions
.assertEquals(
8, tmp
.filter(sw -> sw.getId().equals("50|od______1582::639909adfad9d708308f2aedb733e4a0"))
.collect()
.get(0)
.getSubject()
.size());
Assertions
.assertFalse(
tmp
.filter(sw -> sw.getId().equals("50|od______1582::639909adfad9d708308f2aedb733e4a0"))
.collect()
.get(0)
.getSubject()
.stream()
.anyMatch(s -> s.getValue().equals("EOSC::Jupyter Notebook")));
Assertions
.assertEquals(
1,
tmp
.filter(sw -> sw.getId().equals("50|od______1582::639909adfad9d708308f2aedb733e4a0"))
.collect()
.get(0)
.getEoscifguidelines()
.size());
Assertions
.assertTrue(
tmp
.filter(sw -> sw.getId().equals("50|od______1582::639909adfad9d708308f2aedb733e4a0"))
.collect()
.get(0)
.getEoscifguidelines()
.stream()
.anyMatch(s -> s.getCode().equals("EOSC::Jupyter Notebook")));
List<Subject> subjects = tmp
.filter(sw -> sw.getId().equals("50|od______1582::6e7a9b21a2feef45673890432af34244"))
.collect()
.get(0)
.getSubject();
Assertions.assertEquals(7, subjects.size());
Assertions.assertTrue(subjects.stream().anyMatch(s -> s.getValue().equals("jupyter")));
Assertions.assertTrue(subjects.stream().anyMatch(s -> s.getValue().equals("Modeling and Simulation")));
Assertions.assertTrue(subjects.stream().anyMatch(s -> s.getValue().equals("structure granulaire")));
Assertions.assertTrue(subjects.stream().anyMatch(s -> s.getValue().equals("algorithme")));
Assertions.assertTrue(subjects.stream().anyMatch(s -> s.getValue().equals("simulation numérique")));
Assertions.assertTrue(subjects.stream().anyMatch(s -> s.getValue().equals("flux de gaz")));
Assertions.assertTrue(subjects.stream().anyMatch(s -> s.getValue().equals("flux de liquide")));
}
@Test
void galaxyOtherTest() throws Exception {
final String sourcePath = getClass()
.getResource(
"/eu/dnetlib/dhp/eosctag/galaxy/otherresearchproduct")
.getPath();
SparkBulkTagJob
.main(
new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.OtherResearchProduct",
"-outputPath", workingDir.toString() + "/otherresearchproduct",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-pathMap", pathMap
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<OtherResearchProduct> orp = sc
.textFile(workingDir.toString() + "/otherresearchproduct")
.map(item -> OBJECT_MAPPER.readValue(item, OtherResearchProduct.class));
Assertions.assertEquals(10, orp.count());
Assertions
.assertEquals(
0,
orp
.filter(
s -> s.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Galaxy Workflow")))
.count());
orp.foreach(o -> System.out.println(OBJECT_MAPPER.writeValueAsString(o)));
Assertions
.assertEquals(
1, orp
.filter(o -> o.getEoscifguidelines() != null)
.filter(
o -> o
.getEoscifguidelines()
.stream()
.anyMatch(eig -> eig.getCode().equals("EOSC::Galaxy Workflow")))
.count());
Assertions
.assertEquals(
2, orp
.filter(sw -> sw.getId().equals("50|od______2017::0750a4d0782265873d669520f5e33c07"))
.collect()
.get(0)
.getSubject()
.size());
Assertions
.assertFalse(
orp
.filter(sw -> sw.getId().equals("50|od______2017::0750a4d0782265873d669520f5e33c07"))
.collect()
.get(0)
.getSubject()
.stream()
.anyMatch(s -> s.getValue().equals("EOSC::Galaxy Workflow")));
Assertions
.assertEquals(
1, orp
.filter(sw -> sw.getId().equals("50|od______2017::0750a4d0782265873d669520f5e33c07"))
.collect()
.get(0)
.getEoscifguidelines()
.size());
Assertions
.assertTrue(
orp
.filter(sw -> sw.getId().equals("50|od______2017::0750a4d0782265873d669520f5e33c07"))
.collect()
.get(0)
.getEoscifguidelines()
.stream()
.anyMatch(s -> s.getCode().equals("EOSC::Galaxy Workflow")));
Assertions
.assertEquals(
2, orp
.filter(sw -> sw.getId().equals("50|od______2017::1bd97baef19dbd2db3203b112bb83bc5"))
.collect()
.get(0)
.getSubject()
.size());
Assertions
.assertFalse(
orp
.filter(sw -> sw.getId().equals("50|od______2017::1bd97baef19dbd2db3203b112bb83bc5"))
.collect()
.get(0)
.getSubject()
.stream()
.anyMatch(s -> s.getValue().equals("EOSC::Galaxy Workflow")));
Assertions
.assertEquals(
2, orp
.filter(sw -> sw.getId().equals("50|od______2017::1e400f1747487fd15998735c41a55c72"))
.collect()
.get(0)
.getSubject()
.size());
Assertions
.assertFalse(
orp
.filter(sw -> sw.getId().equals("50|od______2017::1e400f1747487fd15998735c41a55c72"))
.collect()
.get(0)
.getSubject()
.stream()
.anyMatch(s -> s.getValue().equals("EOSC::Galaxy Workflow")));
}
@Test
void galaxySoftwareTest() throws Exception {
final String sourcePath = getClass()
.getResource(
"/eu/dnetlib/dhp/eosctag/galaxy/software")
.getPath();
SparkBulkTagJob
.main(
new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software",
"-outputPath", workingDir.toString() + "/software",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-pathMap", pathMap
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<Software> tmp = sc
.textFile(workingDir.toString() + "/software")
.map(item -> OBJECT_MAPPER.readValue(item, Software.class));
Assertions.assertEquals(11, tmp.count());
Assertions
.assertEquals(
0,
tmp
.filter(
s -> s.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Galaxy Workflow")))
.count());
Assertions
.assertEquals(
1,
tmp
.filter(
s -> s.getEoscifguidelines().size() > 0)
.count());
Assertions
.assertEquals(
1,
tmp
.filter(
s -> s.getEoscifguidelines().size() > 0)
.filter(
s -> s
.getEoscifguidelines()
.stream()
.anyMatch(eig -> eig.getCode().equals("EOSC::Galaxy Workflow")))
.count());
Assertions
.assertEquals(
1, tmp
.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
.collect()
.get(0)
.getSubject()
.size());
Assertions
.assertFalse(
tmp
.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
.collect()
.get(0)
.getSubject()
.stream()
.anyMatch(s -> s.getValue().equals("EOSC::Galaxy Workflow")));
Assertions
.assertEquals(
1, tmp
.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
.collect()
.get(0)
.getEoscifguidelines()
.size());
Assertions
.assertTrue(
tmp
.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
.collect()
.get(0)
.getEoscifguidelines()
.stream()
.anyMatch(eig -> eig.getCode().equals("EOSC::Galaxy Workflow")));
Assertions
.assertEquals(
5, tmp
.filter(sw -> sw.getId().equals("50|od______1582::501b25d420f808c8eddcd9b16e917f11"))
.collect()
.get(0)
.getSubject()
.size());
Assertions
.assertEquals(
8, tmp
.filter(sw -> sw.getId().equals("50|od______1582::581621232a561b7e8b4952b18b8b0e56"))
.collect()
.get(0)
.getSubject()
.size());
Assertions
.assertFalse(
tmp
.filter(sw -> sw.getId().equals("50|od______1582::581621232a561b7e8b4952b18b8b0e56"))
.collect()
.get(0)
.getSubject()
.stream()
.anyMatch(s -> s.getValue().equals("EOSC::Galaxy Workflow")));
}
@Test
void twitterDatasetTest() throws Exception {
final String sourcePath = getClass()
.getResource(
"/eu/dnetlib/dhp/eosctag/twitter/dataset")
.getPath();
SparkBulkTagJob
.main(
new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
"-outputPath", workingDir.toString() + "/dataset",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-pathMap", pathMap
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<Dataset> dats = sc
.textFile(workingDir.toString() + "/dataset")
.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class));
Assertions.assertEquals(11, dats.count());
Assertions
.assertEquals(
3,
dats
.filter(
s -> s
.getEoscifguidelines()
.stream()
.anyMatch(eig -> eig.getCode().equals("EOSC::Twitter Data")))
.count());
}
@Test
void twitterOtherTest() throws Exception {
final String sourcePath = getClass()
.getResource(
"/eu/dnetlib/dhp/eosctag/twitter/otherresearchproduct")
.getPath();
SparkBulkTagJob
.main(
new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.OtherResearchProduct",
"-outputPath", workingDir.toString() + "/otherresearchproduct",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-pathMap", pathMap
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<OtherResearchProduct> orp = sc
.textFile(workingDir.toString() + "/otherresearchproduct")
.map(item -> OBJECT_MAPPER.readValue(item, OtherResearchProduct.class));
Assertions.assertEquals(10, orp.count());
Assertions
.assertEquals(
0,
orp
.filter(s -> s.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Twitter Data")))
.count());
Assertions
.assertEquals(
3,
orp
.filter(
s -> s
.getEoscifguidelines()
.stream()
.anyMatch(eig -> eig.getCode().equals("EOSC::Twitter Data")))
.count());
}
@Test
void twitterSoftwareTest() throws Exception {
final String sourcePath = getClass()
.getResource(
"/eu/dnetlib/dhp/eosctag/twitter/software")
.getPath();
SparkBulkTagJob
.main(
new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software",
"-outputPath", workingDir.toString() + "/software",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-pathMap", pathMap
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<Software> tmp = sc
.textFile(workingDir.toString() + "/software")
.map(item -> OBJECT_MAPPER.readValue(item, Software.class));
Assertions.assertEquals(10, tmp.count());
Assertions
.assertEquals(
0,
tmp
.filter(s -> s.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Twitter Data")))
.count());
}
@Test
void EoscContextTagTest() throws Exception {
final String sourcePath = getClass()
.getResource(
"/eu/dnetlib/dhp/bulktag/eosc/dataset/dataset_10.json")
.getPath();
SparkBulkTagJob
.main(
new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
"-outputPath", workingDir.toString() + "/dataset",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-pathMap", pathMap
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<Dataset> tmp = sc
.textFile(workingDir.toString() + "/dataset")
.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class));
Assertions.assertEquals(8, tmp.count());
Assertions
.assertEquals(
4,
tmp
.filter(
s -> s.getContext().stream().anyMatch(c -> c.getId().equals("eosc")))
.count());
Assertions
.assertEquals(
1,
tmp
.filter(
d -> d.getId().equals("50|475c1990cbb2::0fecfb874d9395aa69d2f4d7cd1acbea")
&&
d.getContext().stream().anyMatch(c -> c.getId().equals("eosc")))
.count());
Assertions
.assertEquals(
1,
tmp
.filter(
d -> d.getId().equals("50|475c1990cbb2::3185cd5d8a2b0a06bb9b23ef11748eb1")
&&
d.getContext().stream().anyMatch(c -> c.getId().equals("eosc")))
.count());
Assertions
.assertEquals(
1,
tmp
.filter(
d -> d.getId().equals("50|475c1990cbb2::3894c94123e96df8a21249957cf160cb")
&&
d.getContext().stream().anyMatch(c -> c.getId().equals("eosc")))
.count());
Assertions
.assertEquals(
1,
tmp
.filter(
d -> d.getId().equals("50|475c1990cbb2::449f28eefccf9f70c04ad70d61e041c7")
&&
d.getContext().stream().anyMatch(c -> c.getId().equals("eosc")))
.count());
}
@Test
void removeTest() throws Exception {
final String pathMap = BulkTagJobTest.pathMap;
SparkBulkTagJob
.main(
new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath",
getClass()
.getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_datasourcewithconstraints")
.getPath(),
"-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
"-outputPath", workingDir.toString() + "/dataset",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-pathMap", pathMap
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<Dataset> tmp = sc
.textFile(workingDir.toString() + "/dataset")
.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class));
Assertions.assertEquals(12, tmp.count());
org.apache.spark.sql.Dataset<Dataset> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(Dataset.class));
verificationDataset.createOrReplaceTempView("dataset"); verificationDataset.createOrReplaceTempView("dataset");
String query = "select id, MyT.id community, MyD.provenanceaction.classid provenance, MyD.provenanceaction.classname name " String query = "select id, MyT.id community, MyD.provenanceaction.classid provenance, MyD.provenanceaction.classname name "
@ -816,8 +1564,8 @@ public class BulkTagJobTest {
org.apache.spark.sql.Dataset<Row> idExplodeCommunity = spark.sql(query); org.apache.spark.sql.Dataset<Row> idExplodeCommunity = spark.sql(query);
idExplodeCommunity.show(false); Assertions.assertEquals(3, idExplodeCommunity.filter("community = 'dth'").count());
Assertions.assertEquals(0, idExplodeCommunity.count());
} }
} }

View File

@ -83,4 +83,36 @@ class CommunityConfigurationFactoryTest {
Assertions.assertEquals("dariah", comm.get(0)); Assertions.assertEquals("dariah", comm.get(0));
} }
@Test
void loadSelCriteriaTest2() throws DocumentException, IOException, SAXException {
String xml = IOUtils
.toString(
getClass()
.getResourceAsStream(
"/eu/dnetlib/dhp/bulktag/communityconfiguration/community_configuration_selcrit2.xml"));
final CommunityConfiguration cc = CommunityConfigurationFactory.newInstance(xml);
Map<String, List<String>> param = new HashMap<>();
param.put("author", new ArrayList<>(Collections.singletonList("Pippo Pippi")));
param
.put(
"description",
new ArrayList<>(
Collections
.singletonList(
"This work has been partially supported by DARIAH-EU infrastructure")));
param
.put(
"contributor",
new ArrayList<>(
Collections
.singletonList(
"Author X helped to write the paper. X works for DARIAH")));
List<String> comm = cc
.getCommunityForDatasource(
"openaire____::1cfdb2e14977f31a98e0118283401f32", param);
// TODO add more assertions
Assertions.assertEquals(0, comm.size());
}
} }

View File

@ -1,236 +0,0 @@
package eu.dnetlib.dhp.bulktag;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;
import org.apache.commons.io.FileUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* @author miriam.baglioni
* @Date 22/07/22
*/
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.bulktag.eosc.DatasourceMaster;
import eu.dnetlib.dhp.bulktag.eosc.SparkEoscBulkTag;
import eu.dnetlib.dhp.schema.oaf.*;
//"50|475c1990cbb2::0fecfb874d9395aa69d2f4d7cd1acbea" has instance hostedby eosc (cris)
//"50|475c1990cbb2::3185cd5d8a2b0a06bb9b23ef11748eb1" has instance hostedby eosc (zenodo)
//"50|475c1990cbb2::449f28eefccf9f70c04ad70d61e041c7" has two instance one hostedby eosc (wrong compatibility)
//"50|475c1990cbb2::3894c94123e96df8a21249957cf160cb" has EoscTag
public class EOSCContextTaggingTest {
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private static SparkSession spark;
private static Path workingDir;
private static final Logger log = LoggerFactory.getLogger(EOSCContextTaggingTest.class);
@BeforeAll
public static void beforeAll() throws IOException {
workingDir = Files.createTempDirectory(EOSCContextTaggingTest.class.getSimpleName());
log.info("using work dir {}", workingDir);
SparkConf conf = new SparkConf();
conf.setAppName(EOSCContextTaggingTest.class.getSimpleName());
conf.setMaster("local[*]");
conf.set("spark.driver.host", "localhost");
conf.set("hive.metastore.local", "true");
conf.set("spark.ui.enabled", "false");
conf.set("spark.sql.warehouse.dir", workingDir.toString());
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
spark = SparkSession
.builder()
.appName(EOSCTagJobTest.class.getSimpleName())
.config(conf)
.getOrCreate();
}
@AfterAll
public static void afterAll() throws IOException {
FileUtils.deleteDirectory(workingDir.toFile());
spark.stop();
}
@Test
void EoscContextTagTest() throws Exception {
spark
.read()
.textFile(getClass().getResource("/eu/dnetlib/dhp/bulktag/eosc/datasource/datasource_1").getPath())
.map(
(MapFunction<String, Datasource>) value -> OBJECT_MAPPER.readValue(value, Datasource.class),
Encoders.bean(Datasource.class))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(workingDir.toString() + "/input/datasource");
spark
.read()
.textFile(getClass().getResource("/eu/dnetlib/dhp/bulktag/eosc/dataset/dataset_10.json").getPath())
.map(
(MapFunction<String, Dataset>) value -> OBJECT_MAPPER.readValue(value, Dataset.class),
Encoders.bean(Dataset.class))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(workingDir.toString() + "/input/dataset");
SparkEoscBulkTag
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath",
workingDir.toString() + "/input/",
"-workingPath", workingDir.toString() + "/working/",
"-datasourceMapPath",
getClass()
.getResource("/eu/dnetlib/dhp/bulktag/eosc/datasourceMasterAssociation/datasourceMaster")
.getPath(),
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
"-resultType", "dataset"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
Assertions
.assertEquals(
2, sc
.textFile(workingDir.toString() + "/working/datasource")
.map(item -> OBJECT_MAPPER.readValue(item, DatasourceMaster.class))
.count());
JavaRDD<Dataset> tmp = sc
.textFile(workingDir.toString() + "/input/dataset")
.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class));
Assertions.assertEquals(10, tmp.count());
Assertions
.assertEquals(
2,
tmp
.filter(
s -> s.getContext().stream().anyMatch(c -> c.getId().equals("eosc")))
.count());
Assertions
.assertEquals(
1,
tmp
.filter(
d -> d.getId().equals("50|475c1990cbb2::0fecfb874d9395aa69d2f4d7cd1acbea")
&&
d.getContext().stream().anyMatch(c -> c.getId().equals("eosc")))
.count());
Assertions
.assertEquals(
1,
tmp
.filter(
d -> d.getId().equals("50|475c1990cbb2::3185cd5d8a2b0a06bb9b23ef11748eb1")
&&
d.getContext().stream().anyMatch(c -> c.getId().equals("eosc")))
.count());
Assertions
.assertEquals(
0,
tmp
.filter(
d -> d.getId().equals("50|475c1990cbb2::449f28eefccf9f70c04ad70d61e041c7")
&&
d.getContext().stream().anyMatch(c -> c.getId().equals("eosc")))
.count());
Assertions
.assertEquals(
0,
tmp
.filter(
d -> d.getId().equals("50|475c1990cbb2::3894c94123e96df8a21249957cf160cb")
&&
d.getContext().stream().anyMatch(c -> c.getId().equals("eosc")))
.count());
}
@Test
void EoscContextTagTestEmptyDatasource() throws Exception {
spark
.read()
.textFile(getClass().getResource("/eu/dnetlib/dhp/bulktag/eosc/dataset/dataset_10.json").getPath())
.map(
(MapFunction<String, Dataset>) value -> OBJECT_MAPPER.readValue(value, Dataset.class),
Encoders.bean(Dataset.class))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(workingDir.toString() + "/input/dataset");
spark
.read()
.textFile(getClass().getResource("/eu/dnetlib/dhp/bulktag/eosc/datasource/datasource").getPath())
.map(
(MapFunction<String, Datasource>) value -> OBJECT_MAPPER.readValue(value, Datasource.class),
Encoders.bean(Datasource.class))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(workingDir.toString() + "/input/datasource");
SparkEoscBulkTag
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath",
workingDir.toString() + "/input/",
"-workingPath", workingDir.toString() + "/working/",
"-datasourceMapPath",
getClass()
.getResource("/eu/dnetlib/dhp/bulktag/eosc/datasourceMasterAssociation/datasourceMaster")
.getPath(),
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
"-resultType", "dataset"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<Dataset> tmp = sc
.textFile(workingDir.toString() + "/input/dataset")
.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class));
Assertions.assertEquals(10, tmp.count());
Assertions
.assertEquals(
0,
tmp
.filter(
s -> s.getContext().stream().anyMatch(c -> c.getId().equals("eosc")))
.count());
}
}

View File

@ -1,713 +0,0 @@
package eu.dnetlib.dhp.bulktag;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;
import org.apache.commons.io.FileUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.bulktag.eosc.SparkEoscTag;
import eu.dnetlib.dhp.schema.oaf.*;
public class EOSCTagJobTest {
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private static SparkSession spark;
private static Path workingDir;
private static final Logger log = LoggerFactory.getLogger(EOSCTagJobTest.class);
@BeforeAll
public static void beforeAll() throws IOException {
workingDir = Files.createTempDirectory(EOSCTagJobTest.class.getSimpleName());
log.info("using work dir {}", workingDir);
SparkConf conf = new SparkConf();
conf.setAppName(EOSCTagJobTest.class.getSimpleName());
conf.setMaster("local[*]");
conf.set("spark.driver.host", "localhost");
conf.set("hive.metastore.local", "true");
conf.set("spark.ui.enabled", "false");
conf.set("spark.sql.warehouse.dir", workingDir.toString());
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
spark = SparkSession
.builder()
.appName(EOSCTagJobTest.class.getSimpleName())
.config(conf)
.getOrCreate();
}
@AfterAll
public static void afterAll() throws IOException {
FileUtils.deleteDirectory(workingDir.toFile());
spark.stop();
}
@Test
void jupyterUpdatesTest() throws Exception {
spark
.read()
.textFile(getClass().getResource("/eu/dnetlib/dhp/eosctag/jupyter/software").getPath())
.map(
(MapFunction<String, Software>) value -> OBJECT_MAPPER.readValue(value, Software.class),
Encoders.bean(Software.class))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(workingDir.toString() + "/input/software");
spark
.read()
.textFile(getClass().getResource("/eu/dnetlib/dhp/eosctag/jupyter/dataset").getPath())
.map(
(MapFunction<String, Dataset>) value -> OBJECT_MAPPER.readValue(value, Dataset.class),
Encoders.bean(Dataset.class))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(workingDir.toString() + "/input/dataset");
spark
.read()
.textFile(getClass().getResource("/eu/dnetlib/dhp/eosctag/jupyter/otherresearchproduct").getPath())
.map(
(MapFunction<String, OtherResearchProduct>) value -> OBJECT_MAPPER
.readValue(value, OtherResearchProduct.class),
Encoders.bean(OtherResearchProduct.class))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(workingDir.toString() + "/input/otherresearchproduct");
SparkEoscTag
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath",
workingDir.toString() + "/input",
"-workingPath", workingDir.toString() + "/working"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<Software> tmp = sc
.textFile(workingDir.toString() + "/input/software")
.map(item -> OBJECT_MAPPER.readValue(item, Software.class));
Assertions.assertEquals(10, tmp.count());
Assertions
.assertEquals(
4,
tmp
.filter(s -> s.getEoscifguidelines() != null)
.filter(
s -> s
.getEoscifguidelines()
.stream()
.anyMatch(eig -> eig.getCode().equals("EOSC::Jupyter Notebook")))
.count());
Assertions
.assertEquals(
1, tmp
.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
.collect()
.get(0)
.getEoscifguidelines()
.size());
Assertions
.assertEquals(
1, tmp
.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
.collect()
.get(0)
.getSubject()
.size());
Assertions
.assertTrue(
tmp
.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
.collect()
.get(0)
.getEoscifguidelines()
.stream()
.anyMatch(s -> s.getCode().equals("EOSC::Jupyter Notebook")));
Assertions
.assertFalse(
tmp
.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
.collect()
.get(0)
.getSubject()
.stream()
.anyMatch(s -> s.getValue().equals("EOSC::Jupyter Notebook")));
Assertions
.assertEquals(
5, tmp
.filter(sw -> sw.getId().equals("50|od______1582::501b25d420f808c8eddcd9b16e917f11"))
.collect()
.get(0)
.getSubject()
.size());
Assertions
.assertFalse(
tmp
.filter(sw -> sw.getId().equals("50|od______1582::501b25d420f808c8eddcd9b16e917f11"))
.collect()
.get(0)
.getSubject()
.stream()
.anyMatch(s -> s.getValue().equals("EOSC::Jupyter Notebook")));
Assertions
.assertTrue(
tmp
.filter(sw -> sw.getId().equals("50|od______1582::501b25d420f808c8eddcd9b16e917f11"))
.collect()
.get(0)
.getEoscifguidelines() == null);
Assertions
.assertEquals(
8, tmp
.filter(sw -> sw.getId().equals("50|od______1582::581621232a561b7e8b4952b18b8b0e56"))
.collect()
.get(0)
.getSubject()
.size());
Assertions
.assertFalse(
tmp
.filter(sw -> sw.getId().equals("50|od______1582::581621232a561b7e8b4952b18b8b0e56"))
.collect()
.get(0)
.getSubject()
.stream()
.anyMatch(s -> s.getValue().equals("EOSC::Jupyter Notebook")));
Assertions
.assertEquals(
1, tmp
.filter(sw -> sw.getId().equals("50|od______1582::581621232a561b7e8b4952b18b8b0e56"))
.collect()
.get(0)
.getEoscifguidelines()
.size());
Assertions
.assertTrue(
tmp
.filter(sw -> sw.getId().equals("50|od______1582::581621232a561b7e8b4952b18b8b0e56"))
.collect()
.get(0)
.getEoscifguidelines()
.stream()
.anyMatch(s -> s.getCode().equals("EOSC::Jupyter Notebook")));
Assertions
.assertEquals(
5, tmp
.filter(sw -> sw.getId().equals("50|od______1582::5aec1186054301b66c0c5dc35972a589"))
.collect()
.get(0)
.getSubject()
.size());
Assertions
.assertFalse(
tmp
.filter(sw -> sw.getId().equals("50|od______1582::5aec1186054301b66c0c5dc35972a589"))
.collect()
.get(0)
.getSubject()
.stream()
.anyMatch(s -> s.getValue().equals("EOSC::Jupyter Notebook")));
Assertions
.assertTrue(
tmp
.filter(sw -> sw.getId().equals("50|od______1582::5aec1186054301b66c0c5dc35972a589"))
.collect()
.get(0)
.getEoscifguidelines() == null);
Assertions
.assertEquals(
8, tmp
.filter(sw -> sw.getId().equals("50|od______1582::639909adfad9d708308f2aedb733e4a0"))
.collect()
.get(0)
.getSubject()
.size());
Assertions
.assertFalse(
tmp
.filter(sw -> sw.getId().equals("50|od______1582::639909adfad9d708308f2aedb733e4a0"))
.collect()
.get(0)
.getSubject()
.stream()
.anyMatch(s -> s.getValue().equals("EOSC::Jupyter Notebook")));
Assertions
.assertEquals(
1,
tmp
.filter(sw -> sw.getId().equals("50|od______1582::639909adfad9d708308f2aedb733e4a0"))
.collect()
.get(0)
.getEoscifguidelines()
.size());
Assertions
.assertTrue(
tmp
.filter(sw -> sw.getId().equals("50|od______1582::639909adfad9d708308f2aedb733e4a0"))
.collect()
.get(0)
.getEoscifguidelines()
.stream()
.anyMatch(s -> s.getCode().equals("EOSC::Jupyter Notebook")));
List<StructuredProperty> subjects = tmp
.filter(sw -> sw.getId().equals("50|od______1582::6e7a9b21a2feef45673890432af34244"))
.collect()
.get(0)
.getSubject();
Assertions.assertEquals(7, subjects.size());
Assertions.assertTrue(subjects.stream().anyMatch(s -> s.getValue().equals("jupyter")));
Assertions.assertTrue(subjects.stream().anyMatch(s -> s.getValue().equals("Modeling and Simulation")));
Assertions.assertTrue(subjects.stream().anyMatch(s -> s.getValue().equals("structure granulaire")));
Assertions.assertTrue(subjects.stream().anyMatch(s -> s.getValue().equals("algorithme")));
Assertions.assertTrue(subjects.stream().anyMatch(s -> s.getValue().equals("simulation numérique")));
Assertions.assertTrue(subjects.stream().anyMatch(s -> s.getValue().equals("flux de gaz")));
Assertions.assertTrue(subjects.stream().anyMatch(s -> s.getValue().equals("flux de liquide")));
Assertions
.assertEquals(
10, sc
.textFile(workingDir.toString() + "/input/dataset")
.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class))
.count());
Assertions
.assertEquals(
0, sc
.textFile(workingDir.toString() + "/input/dataset")
.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class))
.filter(
ds -> ds.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Jupyter Notebook")))
.count());
Assertions
.assertEquals(
0, sc
.textFile(workingDir.toString() + "/input/dataset")
.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class))
.filter(
ds -> ds
.getEoscifguidelines()
.stream()
.anyMatch(eig -> eig.getCode().equals("EOSC::Jupyter Notebook")))
.count());
Assertions
.assertEquals(
10, sc
.textFile(workingDir.toString() + "/input/otherresearchproduct")
.map(item -> OBJECT_MAPPER.readValue(item, OtherResearchProduct.class))
.count());
Assertions
.assertEquals(
0, sc
.textFile(workingDir.toString() + "/input/otherresearchproduct")
.map(item -> OBJECT_MAPPER.readValue(item, OtherResearchProduct.class))
.filter(
orp -> orp
.getSubject()
.stream()
.anyMatch(sbj -> sbj.getValue().equals("EOSC::Jupyter Notebook")))
.count());
Assertions
.assertEquals(
0, sc
.textFile(workingDir.toString() + "/input/otherresearchproduct")
.map(item -> OBJECT_MAPPER.readValue(item, OtherResearchProduct.class))
.filter(
orp -> orp
.getSubject()
.stream()
.anyMatch(eig -> eig.getValue().equals("EOSC::Jupyter Notebook")))
.count());
// spark.stop();
}
@Test
void galaxyUpdatesTest() throws Exception {
spark
.read()
.textFile(getClass().getResource("/eu/dnetlib/dhp/eosctag/galaxy/software").getPath())
.map(
(MapFunction<String, Software>) value -> OBJECT_MAPPER.readValue(value, Software.class),
Encoders.bean(Software.class))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(workingDir.toString() + "/input/software");
spark
.read()
.textFile(getClass().getResource("/eu/dnetlib/dhp/eosctag/galaxy/dataset").getPath())
.map(
(MapFunction<String, Dataset>) value -> OBJECT_MAPPER.readValue(value, Dataset.class),
Encoders.bean(Dataset.class))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(workingDir.toString() + "/input/dataset");
spark
.read()
.textFile(getClass().getResource("/eu/dnetlib/dhp/eosctag/galaxy/otherresearchproduct").getPath())
.map(
(MapFunction<String, OtherResearchProduct>) value -> OBJECT_MAPPER
.readValue(value, OtherResearchProduct.class),
Encoders.bean(OtherResearchProduct.class))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(workingDir.toString() + "/input/otherresearchproduct");
SparkEoscTag
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath",
workingDir.toString() + "/input",
"-workingPath", workingDir.toString() + "/working"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<Software> tmp = sc
.textFile(workingDir.toString() + "/input/software")
.map(item -> OBJECT_MAPPER.readValue(item, Software.class));
Assertions.assertEquals(11, tmp.count());
Assertions
.assertEquals(
0,
tmp
.filter(
s -> s.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Galaxy Workflow")))
.count());
Assertions
.assertEquals(
1,
tmp
.filter(
s -> s.getEoscifguidelines() != null)
.count());
Assertions
.assertEquals(
1,
tmp
.filter(
s -> s.getEoscifguidelines() != null)
.filter(
s -> s
.getEoscifguidelines()
.stream()
.anyMatch(eig -> eig.getCode().equals("EOSC::Galaxy Workflow")))
.count());
Assertions
.assertEquals(
1, tmp
.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
.collect()
.get(0)
.getSubject()
.size());
Assertions
.assertFalse(
tmp
.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
.collect()
.get(0)
.getSubject()
.stream()
.anyMatch(s -> s.getValue().equals("EOSC::Galaxy Workflow")));
Assertions
.assertEquals(
1, tmp
.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
.collect()
.get(0)
.getEoscifguidelines()
.size());
Assertions
.assertTrue(
tmp
.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
.collect()
.get(0)
.getEoscifguidelines()
.stream()
.anyMatch(eig -> eig.getCode().equals("EOSC::Galaxy Workflow")));
Assertions
.assertEquals(
5, tmp
.filter(sw -> sw.getId().equals("50|od______1582::501b25d420f808c8eddcd9b16e917f11"))
.collect()
.get(0)
.getSubject()
.size());
Assertions
.assertEquals(
8, tmp
.filter(sw -> sw.getId().equals("50|od______1582::581621232a561b7e8b4952b18b8b0e56"))
.collect()
.get(0)
.getSubject()
.size());
Assertions
.assertFalse(
tmp
.filter(sw -> sw.getId().equals("50|od______1582::581621232a561b7e8b4952b18b8b0e56"))
.collect()
.get(0)
.getSubject()
.stream()
.anyMatch(s -> s.getValue().equals("EOSC::Galaxy Workflow")));
JavaRDD<OtherResearchProduct> orp = sc
.textFile(workingDir.toString() + "/input/otherresearchproduct")
.map(item -> OBJECT_MAPPER.readValue(item, OtherResearchProduct.class));
Assertions.assertEquals(10, orp.count());
Assertions
.assertEquals(
0,
orp
.filter(
s -> s.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Galaxy Workflow")))
.count());
orp.foreach(o -> System.out.println(OBJECT_MAPPER.writeValueAsString(o)));
Assertions
.assertEquals(
1, orp
.filter(o -> o.getEoscifguidelines() != null)
.filter(
o -> o
.getEoscifguidelines()
.stream()
.anyMatch(eig -> eig.getCode().equals("EOSC::Galaxy Workflow")))
.count());
Assertions
.assertEquals(
2, orp
.filter(sw -> sw.getId().equals("50|od______2017::0750a4d0782265873d669520f5e33c07"))
.collect()
.get(0)
.getSubject()
.size());
Assertions
.assertFalse(
orp
.filter(sw -> sw.getId().equals("50|od______2017::0750a4d0782265873d669520f5e33c07"))
.collect()
.get(0)
.getSubject()
.stream()
.anyMatch(s -> s.getValue().equals("EOSC::Galaxy Workflow")));
Assertions
.assertEquals(
1, orp
.filter(sw -> sw.getId().equals("50|od______2017::0750a4d0782265873d669520f5e33c07"))
.collect()
.get(0)
.getEoscifguidelines()
.size());
Assertions
.assertTrue(
orp
.filter(sw -> sw.getId().equals("50|od______2017::0750a4d0782265873d669520f5e33c07"))
.collect()
.get(0)
.getEoscifguidelines()
.stream()
.anyMatch(s -> s.getCode().equals("EOSC::Galaxy Workflow")));
Assertions
.assertEquals(
2, orp
.filter(sw -> sw.getId().equals("50|od______2017::1bd97baef19dbd2db3203b112bb83bc5"))
.collect()
.get(0)
.getSubject()
.size());
Assertions
.assertFalse(
orp
.filter(sw -> sw.getId().equals("50|od______2017::1bd97baef19dbd2db3203b112bb83bc5"))
.collect()
.get(0)
.getSubject()
.stream()
.anyMatch(s -> s.getValue().equals("EOSC::Galaxy Workflow")));
Assertions
.assertEquals(
2, orp
.filter(sw -> sw.getId().equals("50|od______2017::1e400f1747487fd15998735c41a55c72"))
.collect()
.get(0)
.getSubject()
.size());
Assertions
.assertFalse(
orp
.filter(sw -> sw.getId().equals("50|od______2017::1e400f1747487fd15998735c41a55c72"))
.collect()
.get(0)
.getSubject()
.stream()
.anyMatch(s -> s.getValue().equals("EOSC::Galaxy Workflow")));
}
@Test
void twitterUpdatesTest() throws Exception {
spark
.read()
.textFile(getClass().getResource("/eu/dnetlib/dhp/eosctag/twitter/software").getPath())
.map(
(MapFunction<String, Software>) value -> OBJECT_MAPPER.readValue(value, Software.class),
Encoders.bean(Software.class))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(workingDir.toString() + "/input/software");
spark
.read()
.textFile(getClass().getResource("/eu/dnetlib/dhp/eosctag/twitter/dataset").getPath())
.map(
(MapFunction<String, Dataset>) value -> OBJECT_MAPPER.readValue(value, Dataset.class),
Encoders.bean(Dataset.class))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(workingDir.toString() + "/input/dataset");
spark
.read()
.textFile(getClass().getResource("/eu/dnetlib/dhp/eosctag/twitter/otherresearchproduct").getPath())
.map(
(MapFunction<String, OtherResearchProduct>) value -> OBJECT_MAPPER
.readValue(value, OtherResearchProduct.class),
Encoders.bean(OtherResearchProduct.class))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(workingDir.toString() + "/input/otherresearchproduct");
SparkEoscTag
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath",
workingDir.toString() + "/input",
"-workingPath", workingDir.toString() + "/working"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<Software> tmp = sc
.textFile(workingDir.toString() + "/input/software")
.map(item -> OBJECT_MAPPER.readValue(item, Software.class));
Assertions.assertEquals(10, tmp.count());
Assertions
.assertEquals(
0,
tmp
.filter(s -> s.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Twitter Data")))
.count());
JavaRDD<OtherResearchProduct> orp = sc
.textFile(workingDir.toString() + "/input/otherresearchproduct")
.map(item -> OBJECT_MAPPER.readValue(item, OtherResearchProduct.class));
Assertions.assertEquals(10, orp.count());
Assertions
.assertEquals(
0,
orp
.filter(s -> s.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Twitter Data")))
.count());
Assertions
.assertEquals(
3,
orp
.filter(
s -> s
.getEoscifguidelines()
.stream()
.anyMatch(eig -> eig.getCode().equals("EOSC::Twitter Data")))
.count());
JavaRDD<Dataset> dats = sc
.textFile(workingDir.toString() + "/input/dataset")
.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class));
Assertions.assertEquals(11, dats.count());
Assertions
.assertEquals(
3,
dats
.filter(
s -> s
.getEoscifguidelines()
.stream()
.anyMatch(eig -> eig.getCode().equals("EOSC::Twitter Data")))
.count());
}
}

Some files were not shown because too many files have changed in this diff Show More