merge with master

This commit is contained in:
Miriam Baglioni 2020-12-03 13:46:15 +01:00
commit f6b7c297a8
737 changed files with 41872 additions and 6444 deletions

View File

@ -15,12 +15,12 @@
<snapshotRepository>
<id>dnet45-snapshots</id>
<name>DNet45 Snapshots</name>
<url>http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-snapshots</url>
<url>https://maven.d4science.org/nexus/content/repositories/dnet45-snapshots</url>
<layout>default</layout>
</snapshotRepository>
<repository>
<id>dnet45-releases</id>
<url>http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-releases</url>
<url>https://maven.d4science.org/nexus/content/repositories/dnet45-releases</url>
</repository>
</distributionManagement>

View File

@ -19,7 +19,7 @@
<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_annotation_type_member_declaration" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_throws" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.parentheses_positions_in_switch_statement" value="common_lines"/>
<setting id="org.eclipse.jdt.core.formatter.comment.format_javadoc_comments" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.comment.format_javadoc_comments" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.indentation.size" value="4"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_postfix_operator" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.parentheses_positions_in_enum_constant_declaration" value="common_lines"/>

View File

@ -87,6 +87,22 @@
<groupId>org.postgresql</groupId>
<artifactId>postgresql</artifactId>
</dependency>
<dependency>
<groupId>com.squareup.okhttp3</groupId>
<artifactId>okhttp</artifactId>
</dependency>
<dependency>
<groupId>eu.dnetlib</groupId>
<artifactId>dnet-pace-core</artifactId>
</dependency>
<dependency>
<groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp-schemas</artifactId>
<version>${project.version}</version>
</dependency>
</dependencies>
</project>

View File

@ -0,0 +1,117 @@
package eu.dnetlib.dhp.common;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.Serializable;
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream;
import org.apache.hadoop.fs.*;
public class MakeTarArchive implements Serializable {
private static TarArchiveOutputStream getTar(FileSystem fileSystem, String outputPath) throws IOException {
Path hdfsWritePath = new Path(outputPath);
FSDataOutputStream fsDataOutputStream = null;
if (fileSystem.exists(hdfsWritePath)) {
fileSystem.delete(hdfsWritePath, true);
}
fsDataOutputStream = fileSystem.create(hdfsWritePath);
return new TarArchiveOutputStream(fsDataOutputStream.getWrappedStream());
}
private static void write(FileSystem fileSystem, String inputPath, String outputPath, String dir_name)
throws IOException {
Path hdfsWritePath = new Path(outputPath);
FSDataOutputStream fsDataOutputStream = null;
if (fileSystem.exists(hdfsWritePath)) {
fileSystem.delete(hdfsWritePath, true);
}
fsDataOutputStream = fileSystem.create(hdfsWritePath);
TarArchiveOutputStream ar = new TarArchiveOutputStream(fsDataOutputStream.getWrappedStream());
RemoteIterator<LocatedFileStatus> fileStatusListIterator = fileSystem
.listFiles(
new Path(inputPath), true);
while (fileStatusListIterator.hasNext()) {
writeCurrentFile(fileSystem, dir_name, fileStatusListIterator, ar, 0);
}
ar.close();
}
public static void tarMaxSize(FileSystem fileSystem, String inputPath, String outputPath, String dir_name,
int gBperSplit) throws IOException {
final long bytesPerSplit = 1024L * 1024L * 1024L * gBperSplit;
long sourceSize = fileSystem.getContentSummary(new Path(inputPath)).getSpaceConsumed();
if (sourceSize < bytesPerSplit) {
write(fileSystem, inputPath, outputPath + ".tar", dir_name);
} else {
int partNum = 0;
RemoteIterator<LocatedFileStatus> fileStatusListIterator = fileSystem
.listFiles(
new Path(inputPath), true);
boolean next = fileStatusListIterator.hasNext();
while (next) {
TarArchiveOutputStream ar = getTar(fileSystem, outputPath + "_" + (partNum + 1) + ".tar");
long current_size = 0;
while (next && current_size < bytesPerSplit) {
current_size = writeCurrentFile(fileSystem, dir_name, fileStatusListIterator, ar, current_size);
next = fileStatusListIterator.hasNext();
}
partNum += 1;
ar.close();
}
}
}
private static long writeCurrentFile(FileSystem fileSystem, String dir_name,
RemoteIterator<LocatedFileStatus> fileStatusListIterator,
TarArchiveOutputStream ar, long current_size) throws IOException {
LocatedFileStatus fileStatus = fileStatusListIterator.next();
Path p = fileStatus.getPath();
String p_string = p.toString();
if (!p_string.endsWith("_SUCCESS")) {
String name = p_string.substring(p_string.lastIndexOf("/") + 1);
if (name.trim().equalsIgnoreCase("communities_infrastructures")) {
name = "communities_infrastructures.json";
}
TarArchiveEntry entry = new TarArchiveEntry(dir_name + "/" + name);
entry.setSize(fileStatus.getLen());
current_size += fileStatus.getLen();
ar.putArchiveEntry(entry);
InputStream is = fileSystem.open(fileStatus.getPath());
BufferedInputStream bis = new BufferedInputStream(is);
int count;
byte data[] = new byte[1024];
while ((count = bis.read(data, 0, data.length)) != -1) {
ar.write(data, 0, count);
}
bis.close();
ar.closeArchiveEntry();
}
return current_size;
}
}

View File

@ -0,0 +1,53 @@
package eu.dnetlib.dhp.common.api;
import java.io.IOException;
import java.io.InputStream;
import okhttp3.MediaType;
import okhttp3.RequestBody;
import okhttp3.internal.Util;
import okio.BufferedSink;
import okio.Okio;
import okio.Source;
public class InputStreamRequestBody extends RequestBody {
private InputStream inputStream;
private MediaType mediaType;
private long lenght;
public static RequestBody create(final MediaType mediaType, final InputStream inputStream, final long len) {
return new InputStreamRequestBody(inputStream, mediaType, len);
}
private InputStreamRequestBody(InputStream inputStream, MediaType mediaType, long len) {
this.inputStream = inputStream;
this.mediaType = mediaType;
this.lenght = len;
}
@Override
public MediaType contentType() {
return mediaType;
}
@Override
public long contentLength() {
return lenght;
}
@Override
public void writeTo(BufferedSink sink) throws IOException {
Source source = null;
try {
source = Okio.source(inputStream);
sink.writeAll(source);
} finally {
Util.closeQuietly(source);
}
}
}

View File

@ -0,0 +1,8 @@
package eu.dnetlib.dhp.common.api;
public class MissingConceptDoiException extends Throwable {
public MissingConceptDoiException(String message) {
super(message);
}
}

View File

@ -0,0 +1,315 @@
package eu.dnetlib.dhp.common.api;
import java.io.*;
import java.io.IOException;
import java.util.concurrent.TimeUnit;
import com.google.gson.Gson;
import eu.dnetlib.dhp.common.api.zenodo.ZenodoModel;
import eu.dnetlib.dhp.common.api.zenodo.ZenodoModelList;
import okhttp3.*;
public class ZenodoAPIClient implements Serializable {
String urlString;
String bucket;
String deposition_id;
String access_token;
public static final MediaType MEDIA_TYPE_JSON = MediaType.parse("application/json; charset=utf-8");
private static final MediaType MEDIA_TYPE_ZIP = MediaType.parse("application/zip");
public String getUrlString() {
return urlString;
}
public void setUrlString(String urlString) {
this.urlString = urlString;
}
public String getBucket() {
return bucket;
}
public void setBucket(String bucket) {
this.bucket = bucket;
}
public void setDeposition_id(String deposition_id) {
this.deposition_id = deposition_id;
}
public ZenodoAPIClient(String urlString, String access_token) throws IOException {
this.urlString = urlString;
this.access_token = access_token;
}
/**
* Brand new deposition in Zenodo. It sets the deposition_id and the bucket where to store the files to upload
*
* @return response code
* @throws IOException
*/
public int newDeposition() throws IOException {
String json = "{}";
OkHttpClient httpClient = new OkHttpClient.Builder().connectTimeout(600, TimeUnit.SECONDS).build();
RequestBody body = RequestBody.create(json, MEDIA_TYPE_JSON);
Request request = new Request.Builder()
.url(urlString)
.addHeader("Content-Type", "application/json") // add request headers
.addHeader("Authorization", "Bearer " + access_token)
.post(body)
.build();
try (Response response = httpClient.newCall(request).execute()) {
if (!response.isSuccessful())
throw new IOException("Unexpected code " + response + response.body().string());
// Get response body
json = response.body().string();
ZenodoModel newSubmission = new Gson().fromJson(json, ZenodoModel.class);
this.bucket = newSubmission.getLinks().getBucket();
this.deposition_id = newSubmission.getId();
return response.code();
}
}
/**
* Upload files in Zenodo.
*
* @param is the inputStream for the file to upload
* @param file_name the name of the file as it will appear on Zenodo
* @param len the size of the file
* @return the response code
*/
public int uploadIS(InputStream is, String file_name, long len) throws IOException {
OkHttpClient httpClient = new OkHttpClient.Builder()
.writeTimeout(600, TimeUnit.SECONDS)
.readTimeout(600, TimeUnit.SECONDS)
.connectTimeout(600, TimeUnit.SECONDS)
.build();
Request request = new Request.Builder()
.url(bucket + "/" + file_name)
.addHeader("Content-Type", "application/zip") // add request headers
.addHeader("Authorization", "Bearer " + access_token)
.put(InputStreamRequestBody.create(MEDIA_TYPE_ZIP, is, len))
.build();
try (Response response = httpClient.newCall(request).execute()) {
if (!response.isSuccessful())
throw new IOException("Unexpected code " + response + response.body().string());
return response.code();
}
}
/**
* Associates metadata information to the current deposition
*
* @param metadata the metadata
* @return response code
* @throws IOException
*/
public int sendMretadata(String metadata) throws IOException {
OkHttpClient httpClient = new OkHttpClient.Builder().connectTimeout(600, TimeUnit.SECONDS).build();
RequestBody body = RequestBody.create(metadata, MEDIA_TYPE_JSON);
Request request = new Request.Builder()
.url(urlString + "/" + deposition_id)
.addHeader("Content-Type", "application/json") // add request headers
.addHeader("Authorization", "Bearer " + access_token)
.put(body)
.build();
try (Response response = httpClient.newCall(request).execute()) {
if (!response.isSuccessful())
throw new IOException("Unexpected code " + response + response.body().string());
return response.code();
}
}
/**
* To publish the current deposition. It works for both new deposition or new version of an old deposition
*
* @return response code
* @throws IOException
*/
public int publish() throws IOException {
String json = "{}";
OkHttpClient httpClient = new OkHttpClient.Builder().connectTimeout(600, TimeUnit.SECONDS).build();
RequestBody body = RequestBody.create(json, MEDIA_TYPE_JSON);
Request request = new Request.Builder()
.url(urlString + "/" + deposition_id + "/actions/publish")
.addHeader("Authorization", "Bearer " + access_token)
.post(body)
.build();
try (Response response = httpClient.newCall(request).execute()) {
if (!response.isSuccessful())
throw new IOException("Unexpected code " + response + response.body().string());
return response.code();
}
}
/**
* To create a new version of an already published deposition. It sets the deposition_id and the bucket to be used
* for the new version.
*
* @param concept_rec_id the concept record id of the deposition for which to create a new version. It is the last
* part of the url for the DOI Zenodo suggests to use to cite all versions: DOI: 10.xxx/zenodo.656930
* concept_rec_id = 656930
* @return response code
* @throws IOException
* @throws MissingConceptDoiException
*/
public int newVersion(String concept_rec_id) throws IOException, MissingConceptDoiException {
setDepositionId(concept_rec_id);
String json = "{}";
OkHttpClient httpClient = new OkHttpClient.Builder().connectTimeout(600, TimeUnit.SECONDS).build();
RequestBody body = RequestBody.create(json, MEDIA_TYPE_JSON);
Request request = new Request.Builder()
.url(urlString + "/" + deposition_id + "/actions/newversion")
.addHeader("Authorization", "Bearer " + access_token)
.post(body)
.build();
try (Response response = httpClient.newCall(request).execute()) {
if (!response.isSuccessful())
throw new IOException("Unexpected code " + response + response.body().string());
ZenodoModel zenodoModel = new Gson().fromJson(response.body().string(), ZenodoModel.class);
String latest_draft = zenodoModel.getLinks().getLatest_draft();
deposition_id = latest_draft.substring(latest_draft.lastIndexOf("/") + 1);
bucket = getBucket(latest_draft);
return response.code();
}
}
/**
* To finish uploading a version or new deposition not published
* It sets the deposition_id and the bucket to be used
*
*
* @param deposition_id the deposition id of the not yet published upload
* concept_rec_id = 656930
* @return response code
* @throws IOException
* @throws MissingConceptDoiException
*/
public int uploadOpenDeposition(String deposition_id) throws IOException, MissingConceptDoiException {
this.deposition_id = deposition_id;
OkHttpClient httpClient = new OkHttpClient.Builder().connectTimeout(600, TimeUnit.SECONDS).build();
Request request = new Request.Builder()
.url(urlString + "/" + deposition_id)
.addHeader("Authorization", "Bearer " + access_token)
.build();
try (Response response = httpClient.newCall(request).execute()) {
if (!response.isSuccessful())
throw new IOException("Unexpected code " + response + response.body().string());
ZenodoModel zenodoModel = new Gson().fromJson(response.body().string(), ZenodoModel.class);
bucket = zenodoModel.getLinks().getBucket();
return response.code();
}
}
private void setDepositionId(String concept_rec_id) throws IOException, MissingConceptDoiException {
ZenodoModelList zenodoModelList = new Gson().fromJson(getPrevDepositions(), ZenodoModelList.class);
for (ZenodoModel zm : zenodoModelList) {
if (zm.getConceptrecid().equals(concept_rec_id)) {
deposition_id = zm.getId();
return;
}
}
throw new MissingConceptDoiException("The concept record id specified was missing in the list of depositions");
}
private String getPrevDepositions() throws IOException {
OkHttpClient httpClient = new OkHttpClient.Builder().connectTimeout(600, TimeUnit.SECONDS).build();
Request request = new Request.Builder()
.url(urlString)
.addHeader("Content-Type", "application/json") // add request headers
.addHeader("Authorization", "Bearer " + access_token)
.get()
.build();
try (Response response = httpClient.newCall(request).execute()) {
if (!response.isSuccessful())
throw new IOException("Unexpected code " + response + response.body().string());
return response.body().string();
}
}
private String getBucket(String url) throws IOException {
OkHttpClient httpClient = new OkHttpClient.Builder()
.connectTimeout(600, TimeUnit.SECONDS)
.build();
Request request = new Request.Builder()
.url(url)
.addHeader("Content-Type", "application/json") // add request headers
.addHeader("Authorization", "Bearer " + access_token)
.get()
.build();
try (Response response = httpClient.newCall(request).execute()) {
if (!response.isSuccessful())
throw new IOException("Unexpected code " + response + response.body().string());
// Get response body
ZenodoModel zenodoModel = new Gson().fromJson(response.body().string(), ZenodoModel.class);
return zenodoModel.getLinks().getBucket();
}
}
}

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.oa.graph.dump.zenodo;
package eu.dnetlib.dhp.common.api.zenodo;
public class Community {
private String identifier;

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.oa.graph.dump.zenodo;
package eu.dnetlib.dhp.common.api.zenodo;
public class Creator {
private String affiliation;
@ -29,4 +29,19 @@ public class Creator {
public void setOrcid(String orcid) {
this.orcid = orcid;
}
public static Creator newInstance(String name, String affiliation, String orcid) {
Creator c = new Creator();
if (!(name == null)) {
c.name = name;
}
if (!(affiliation == null)) {
c.affiliation = affiliation;
}
if (!(orcid == null)) {
c.orcid = orcid;
}
return c;
}
}

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.oa.graph.dump.zenodo;
package eu.dnetlib.dhp.common.api.zenodo;
import java.io.Serializable;

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.oa.graph.dump.zenodo;
package eu.dnetlib.dhp.common.api.zenodo;
import java.io.Serializable;
@ -13,4 +13,11 @@ public class Grant implements Serializable {
public void setId(String id) {
this.id = id;
}
public static Grant newInstance(String id) {
Grant g = new Grant();
g.id = id;
return g;
}
}

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.oa.graph.dump.zenodo;
package eu.dnetlib.dhp.common.api.zenodo;
import java.io.Serializable;

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.oa.graph.dump.zenodo;
package eu.dnetlib.dhp.common.api.zenodo;
import java.io.Serializable;
import java.util.List;

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.oa.graph.dump.zenodo;
package eu.dnetlib.dhp.common.api.zenodo;
import java.io.Serializable;

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.oa.graph.dump.zenodo;
package eu.dnetlib.dhp.common.api.zenodo;
import java.io.Serializable;

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.oa.graph.dump.zenodo;
package eu.dnetlib.dhp.common.api.zenodo;
import java.io.Serializable;
import java.util.List;

View File

@ -0,0 +1,7 @@
package eu.dnetlib.dhp.common.api.zenodo;
import java.util.ArrayList;
public class ZenodoModelList extends ArrayList<ZenodoModel> {
}

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.oa.dedup;
package eu.dnetlib.dhp.oa.merge;
import java.text.Normalizer;
import java.util.*;
@ -94,7 +94,13 @@ public class AuthorMerger {
if (r.getPid() == null) {
r.setPid(new ArrayList<>());
}
r.getPid().add(a._1());
// TERRIBLE HACK but for some reason when we create and Array with Arrays.asList,
// it creates of fixed size, and the add method raise UnsupportedOperationException at
// java.util.AbstractList.add
final List<StructuredProperty> tmp = new ArrayList<>(r.getPid());
tmp.add(a._1());
r.setPid(tmp);
}
}
});

View File

@ -1,19 +1,55 @@
package eu.dnetlib.dhp.oa.graph.raw.common;
package eu.dnetlib.dhp.schema.oaf;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.concurrent.ConcurrentHashMap;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.utils.DHPUtils;
public class OafMapperUtils {
public static Oaf merge(final Oaf o1, final Oaf o2) {
if (ModelSupport.isSubClass(o1, OafEntity.class)) {
if (ModelSupport.isSubClass(o1, Result.class)) {
return mergeResults((Result) o1, (Result) o2);
} else if (ModelSupport.isSubClass(o1, Datasource.class)) {
((Datasource) o1).mergeFrom((Datasource) o2);
} else if (ModelSupport.isSubClass(o1, Organization.class)) {
((Organization) o1).mergeFrom((Organization) o2);
} else if (ModelSupport.isSubClass(o1, Project.class)) {
((Project) o1).mergeFrom((Project) o2);
} else {
throw new RuntimeException("invalid OafEntity subtype:" + o1.getClass().getCanonicalName());
}
} else if (ModelSupport.isSubClass(o1, Relation.class)) {
((Relation) o1).mergeFrom((Relation) o2);
} else {
throw new RuntimeException("invalid Oaf type:" + o1.getClass().getCanonicalName());
}
return o1;
}
public static Result mergeResults(Result r1, Result r2) {
if (new ResultTypeComparator().compare(r1, r2) < 0) {
r1.mergeFrom(r2);
return r1;
} else {
r2.mergeFrom(r1);
return r2;
}
}
public static KeyValue keyValue(final String k, final String v) {
final KeyValue kv = new KeyValue();
kv.setKey(k);
@ -49,6 +85,7 @@ public class OafMapperUtils {
.stream(values)
.map(v -> field(v, info))
.filter(Objects::nonNull)
.filter(distinctByKey(f -> f.getValue()))
.collect(Collectors.toList());
}
@ -57,6 +94,7 @@ public class OafMapperUtils {
.stream()
.map(v -> field(v, info))
.filter(Objects::nonNull)
.filter(distinctByKey(f -> f.getValue()))
.collect(Collectors.toList());
}
@ -89,7 +127,9 @@ public class OafMapperUtils {
}
public static StructuredProperty structuredProperty(
final String value, final Qualifier qualifier, final DataInfo dataInfo) {
final String value,
final Qualifier qualifier,
final DataInfo dataInfo) {
if (value == null) {
return null;
}
@ -137,6 +177,27 @@ public class OafMapperUtils {
return p;
}
public static Journal journal(
final String name,
final String issnPrinted,
final String issnOnline,
final String issnLinking,
final DataInfo dataInfo) {
return journal(
name,
issnPrinted,
issnOnline,
issnLinking,
null,
null,
null,
null,
null,
null,
null,
dataInfo);
}
public static Journal journal(
final String name,
final String issnPrinted,
@ -192,8 +253,12 @@ public class OafMapperUtils {
}
public static String createOpenaireId(
final int prefix, final String originalId, final boolean to_md5) {
if (to_md5) {
final int prefix,
final String originalId,
final boolean to_md5) {
if (StringUtils.isBlank(originalId)) {
return null;
} else if (to_md5) {
final String nsPrefix = StringUtils.substringBefore(originalId, "::");
final String rest = StringUtils.substringAfter(originalId, "::");
return String.format("%s|%s::%s", prefix, nsPrefix, DHPUtils.md5(rest));
@ -203,7 +268,9 @@ public class OafMapperUtils {
}
public static String createOpenaireId(
final String type, final String originalId, final boolean to_md5) {
final String type,
final String originalId,
final boolean to_md5) {
switch (type) {
case "datasource":
return createOpenaireId(10, originalId, to_md5);
@ -221,4 +288,10 @@ public class OafMapperUtils {
public static String asString(final Object o) {
return o == null ? "" : o.toString();
}
public static <T> Predicate<T> distinctByKey(
final Function<? super T, ?> keyExtractor) {
final Map<Object, Boolean> seen = new ConcurrentHashMap<>();
return t -> seen.putIfAbsent(keyExtractor.apply(t), Boolean.TRUE) == null;
}
}

View File

@ -0,0 +1,49 @@
package eu.dnetlib.dhp.schema.oaf;
import java.util.Comparator;
import eu.dnetlib.dhp.schema.common.ModelConstants;
public class ResultTypeComparator implements Comparator<Result> {
@Override
public int compare(Result left, Result right) {
if (left == null && right == null)
return 0;
if (left == null)
return 1;
if (right == null)
return -1;
String lClass = left.getResulttype().getClassid();
String rClass = right.getResulttype().getClassid();
if (lClass.equals(rClass))
return 0;
if (lClass.equals(ModelConstants.PUBLICATION_RESULTTYPE_CLASSID))
return -1;
if (rClass.equals(ModelConstants.PUBLICATION_RESULTTYPE_CLASSID))
return 1;
if (lClass.equals(ModelConstants.DATASET_RESULTTYPE_CLASSID))
return -1;
if (rClass.equals(ModelConstants.DATASET_RESULTTYPE_CLASSID))
return 1;
if (lClass.equals(ModelConstants.SOFTWARE_RESULTTYPE_CLASSID))
return -1;
if (rClass.equals(ModelConstants.SOFTWARE_RESULTTYPE_CLASSID))
return 1;
if (lClass.equals(ModelConstants.ORP_RESULTTYPE_CLASSID))
return -1;
if (rClass.equals(ModelConstants.ORP_RESULTTYPE_CLASSID))
return 1;
// Else (but unlikely), lexicographical ordering will do.
return lClass.compareTo(rClass);
}
}

View File

@ -5,6 +5,7 @@ import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;
import java.util.List;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
@ -15,9 +16,15 @@ import org.apache.commons.codec.binary.Hex;
import com.jayway.jsonpath.JsonPath;
import net.minidev.json.JSONArray;
import scala.collection.JavaConverters;
import scala.collection.Seq;
public class DHPUtils {
public static Seq<String> toSeq(List<String> list) {
return JavaConverters.asScalaIteratorConverter(list.iterator()).asScala().toSeq();
}
public static String md5(final String s) {
try {
final MessageDigest md = MessageDigest.getInstance("MD5");

View File

@ -1,15 +1,22 @@
package eu.dnetlib.dhp.utils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import java.util.Map;
import javax.xml.ws.BindingProvider;
import org.apache.cxf.jaxws.JaxWsProxyFactoryBean;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
public class ISLookupClientFactory {
private static final Log log = LogFactory.getLog(ISLookupClientFactory.class);
private static final Logger log = LoggerFactory.getLogger(ISLookupClientFactory.class);
private static int requestTimeout = 60000 * 10;
private static int connectTimeout = 60000 * 10;
public static ISLookUpService getLookUpService(final String isLookupUrl) {
return getServiceStub(ISLookUpService.class, isLookupUrl);
@ -21,6 +28,25 @@ public class ISLookupClientFactory {
final JaxWsProxyFactoryBean jaxWsProxyFactory = new JaxWsProxyFactoryBean();
jaxWsProxyFactory.setServiceClass(clazz);
jaxWsProxyFactory.setAddress(endpoint);
return (T) jaxWsProxyFactory.create();
final T service = (T) jaxWsProxyFactory.create();
if (service instanceof BindingProvider) {
log
.info(
"setting timeouts for {} to requestTimeout: {}, connectTimeout: {}",
BindingProvider.class.getName(), requestTimeout, connectTimeout);
Map<String, Object> requestContext = ((BindingProvider) service).getRequestContext();
requestContext.put("com.sun.xml.internal.ws.request.timeout", requestTimeout);
requestContext.put("com.sun.xml.internal.ws.connect.timeout", connectTimeout);
requestContext.put("com.sun.xml.ws.request.timeout", requestTimeout);
requestContext.put("com.sun.xml.ws.connect.timeout", connectTimeout);
requestContext.put("javax.xml.ws.client.receiveTimeout", requestTimeout);
requestContext.put("javax.xml.ws.client.connectionTimeout", connectTimeout);
}
return service;
}
}

View File

@ -5,6 +5,8 @@ import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import org.apache.commons.lang3.StringUtils;
import net.sf.saxon.expr.XPathContext;
import net.sf.saxon.om.Sequence;
import net.sf.saxon.trans.XPathException;
@ -19,6 +21,8 @@ public class NormalizeDate extends AbstractExtensionFunction {
private static final String normalizeOutFormat = "yyyy-MM-dd'T'hh:mm:ss'Z'";
public static final String BLANK = "";
@Override
public String getName() {
return "normalizeDate";
@ -27,10 +31,10 @@ public class NormalizeDate extends AbstractExtensionFunction {
@Override
public Sequence doCall(XPathContext context, Sequence[] arguments) throws XPathException {
if (arguments == null | arguments.length == 0) {
return new StringValue("");
return new StringValue(BLANK);
}
String s = arguments[0].head().getStringValue();
return new StringValue(_year(s));
return new StringValue(_normalizeDate(s));
}
@Override
@ -55,8 +59,8 @@ public class NormalizeDate extends AbstractExtensionFunction {
return SequenceType.SINGLE_STRING;
}
private String _year(String s) {
final String date = s != null ? s.trim() : "";
private String _normalizeDate(String s) {
final String date = StringUtils.isNotBlank(s) ? s.trim() : BLANK;
for (String format : normalizeDateFormats) {
try {
@ -66,6 +70,6 @@ public class NormalizeDate extends AbstractExtensionFunction {
} catch (ParseException e) {
}
}
return "";
return BLANK;
}
}

View File

@ -0,0 +1,109 @@
package eu.dnetlib.dhp.common.api;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import org.apache.commons.io.IOUtils;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
@Disabled
public class ZenodoAPIClientTest {
private final String URL_STRING = "https://sandbox.zenodo.org/api/deposit/depositions";
private final String ACCESS_TOKEN = "";
private final String CONCEPT_REC_ID = "657113";
private final String depositionId = "674915";
@Test
public void testUploadOldDeposition() throws IOException, MissingConceptDoiException {
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
ACCESS_TOKEN);
Assertions.assertEquals(200, client.uploadOpenDeposition(depositionId));
File file = new File(getClass()
.getResource("/eu/dnetlib/dhp/common/api/COVID-19.json.gz")
.getPath());
InputStream is = new FileInputStream(file);
Assertions.assertEquals(200, client.uploadIS(is, "COVID-19.json.gz", file.length()));
String metadata = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/common/api/metadata.json"));
Assertions.assertEquals(200, client.sendMretadata(metadata));
Assertions.assertEquals(202, client.publish());
}
@Test
public void testNewDeposition() throws IOException {
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
ACCESS_TOKEN);
Assertions.assertEquals(201, client.newDeposition());
File file = new File(getClass()
.getResource("/eu/dnetlib/dhp/common/api/COVID-19.json.gz")
.getPath());
InputStream is = new FileInputStream(file);
Assertions.assertEquals(200, client.uploadIS(is, "COVID-19.json.gz", file.length()));
String metadata = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/common/api/metadata.json"));
Assertions.assertEquals(200, client.sendMretadata(metadata));
Assertions.assertEquals(202, client.publish());
}
@Test
public void testNewVersionNewName() throws IOException, MissingConceptDoiException {
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
ACCESS_TOKEN);
Assertions.assertEquals(201, client.newVersion(CONCEPT_REC_ID));
File file = new File(getClass()
.getResource("/eu/dnetlib/dhp/common/api/newVersion")
.getPath());
InputStream is = new FileInputStream(file);
Assertions.assertEquals(200, client.uploadIS(is, "newVersion_deposition", file.length()));
Assertions.assertEquals(202, client.publish());
}
@Test
public void testNewVersionOldName() throws IOException, MissingConceptDoiException {
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
ACCESS_TOKEN);
Assertions.assertEquals(201, client.newVersion(CONCEPT_REC_ID));
File file = new File(getClass()
.getResource("/eu/dnetlib/dhp/common/api/newVersion2")
.getPath());
InputStream is = new FileInputStream(file);
Assertions.assertEquals(200, client.uploadIS(is, "newVersion_deposition", file.length()));
Assertions.assertEquals(202, client.publish());
}
}

View File

@ -0,0 +1 @@
{"metadata":{"access_right":"open","communities":[{"identifier":"openaire-research-graph"}],"creators":[{"affiliation":"ISTI - CNR","name":"Bardi, Alessia","orcid":"0000-0002-1112-1292"},{"affiliation":"eifl", "name":"Kuchma, Iryna"},{"affiliation":"BIH", "name":"Brobov, Evgeny"},{"affiliation":"GIDIF RBM", "name":"Truccolo, Ivana"},{"affiliation":"unesp", "name":"Monteiro, Elizabete"},{"affiliation":"und", "name":"Casalegno, Carlotta"},{"affiliation":"CARL ABRC", "name":"Clary, Erin"},{"affiliation":"The University of Edimburgh", "name":"Romanowski, Andrew"},{"affiliation":"ISTI - CNR", "name":"Pavone, Gina"},{"affiliation":"ISTI - CNR", "name":"Artini, Michele"},{"affiliation":"ISTI - CNR","name":"Atzori, Claudio","orcid":"0000-0001-9613-6639"},{"affiliation":"University of Bielefeld","name":"Bäcker, Amelie","orcid":"0000-0001-6015-2063"},{"affiliation":"ISTI - CNR","name":"Baglioni, Miriam","orcid":"0000-0002-2273-9004"},{"affiliation":"University of Bielefeld","name":"Czerniak, Andreas","orcid":"0000-0003-3883-4169"},{"affiliation":"ISTI - CNR","name":"De Bonis, Michele"},{"affiliation":"Athena Research and Innovation Centre","name":"Dimitropoulos, Harry"},{"affiliation":"Athena Research and Innovation Centre","name":"Foufoulas, Ioannis"},{"affiliation":"University of Warsaw","name":"Horst, Marek"},{"affiliation":"Athena Research and Innovation Centre","name":"Iatropoulou, Katerina"},{"affiliation":"University of Warsaw","name":"Jacewicz, Przemyslaw"},{"affiliation":"Athena Research and Innovation Centre","name":"Kokogiannaki, Argiro", "orcid":"0000-0002-3880-0244"},{"affiliation":"ISTI - CNR","name":"La Bruzzo, Sandro","orcid":"0000-0003-2855-1245"},{"affiliation":"ISTI - CNR","name":"Lazzeri, Emma"},{"affiliation":"University of Bielefeld","name":"Löhden, Aenne"},{"affiliation":"ISTI - CNR","name":"Manghi, Paolo","orcid":"0000-0001-7291-3210"},{"affiliation":"ISTI - CNR","name":"Mannocci, Andrea","orcid":"0000-0002-5193-7851"},{"affiliation":"Athena Research and Innovation Center","name":"Manola, Natalia"},{"affiliation":"ISTI - CNR","name":"Ottonello, Enrico"},{"affiliation":"University of Bielefeld","name":"Shirrwagen, Jochen"}],"description":"\\u003cp\\u003eThis dump provides access to the metadata records of publications, research data, software and projects that may be relevant to the Corona Virus Disease (COVID-19) fight. The dump contains records of the OpenAIRE COVID-19 Gateway (https://covid-19.openaire.eu/), identified via full-text mining and inference techniques applied to the OpenAIRE Research Graph (https://explore.openaire.eu/). The Graph is one of the largest Open Access collections of metadata records and links between publications, datasets, software, projects, funders, and organizations, aggregating 12,000+ scientific data sources world-wide, among which the Covid-19 data sources Zenodo COVID-19 Community, WHO (World Health Organization), BIP! FInder for COVID-19, Protein Data Bank, Dimensions, scienceOpen, and RSNA. \\u003cp\\u003eThe dump consists of a gzip file containing one json per line. Each json is compliant to the schema available at https://doi.org/10.5281/zenodo.3974226\\u003c/p\\u003e ","title":"OpenAIRE Covid-19 publications, datasets, software and projects metadata.","upload_type":"dataset","version":"1.0"}}

View File

@ -0,0 +1 @@
This is a test for a new deposition

View File

@ -0,0 +1 @@
This is a test for a new version of an old deposition

View File

@ -0,0 +1,2 @@
This is a test for a new version of an old deposition. This should replace the other new version. I expect to have only two
files in the deposition

View File

@ -14,6 +14,37 @@
<description>This module contains common schema classes meant to be used across the dnet-hadoop submodules</description>
<build>
<plugins>
<plugin>
<groupId>net.alchim31.maven</groupId>
<artifactId>scala-maven-plugin</artifactId>
<version>4.0.1</version>
<executions>
<execution>
<id>scala-compile-first</id>
<phase>initialize</phase>
<goals>
<goal>add-source</goal>
<goal>compile</goal>
</goals>
</execution>
<execution>
<id>scala-test-compile</id>
<phase>process-test-resources</phase>
<goals>
<goal>testCompile</goal>
</goals>
</execution>
</executions>
<configuration>
<scalaVersion>${scala.version}</scalaVersion>
</configuration>
</plugin>
</plugins>
</build>
<dependencies>
<dependency>

View File

@ -1,14 +1,17 @@
package eu.dnetlib.dhp.schema.common;
import java.security.Key;
import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.KeyValue;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
public class ModelConstants {
public static final String ORCID = "orcid";
public static final String ORCID_PENDING = "orcid_pending";
public static final String ORCID_CLASSNAME = "Open Researcher and Contributor ID";
public static final String DNET_SUBJECT_TYPOLOGIES = "dnet:subject_classification_typologies";
public static final String DNET_RESULT_TYPOLOGIES = "dnet:result_typologies";
public static final String DNET_PUBLICATION_RESOURCE = "dnet:publication_resource";
public static final String DNET_ACCESS_MODES = "dnet:access_modes";
@ -40,15 +43,15 @@ public class ModelConstants {
public static final String IS_SUPPLEMENT_TO = "isSupplementTo";
public static final String IS_SUPPLEMENTED_BY = "isSupplementedBy";
public static final String PART = "part";
public static final String IS_PART_OF = "IsPartOf";
public static final String HAS_PARTS = "HasParts";
public static final String IS_PART_OF = "isPartOf";
public static final String HAS_PARTS = "hasParts";
public static final String RELATIONSHIP = "relationship";
public static final String CITATION = "citation";
public static final String CITES = "cites";
public static final String IS_CITED_BY = "IsCitedBy";
public static final String IS_CITED_BY = "isCitedBy";
public static final String REVIEW = "review";
public static final String REVIEWS = "reviews";
public static final String IS_REVIEWED_BY = "IsReviewedBy";
public static final String IS_REVIEWED_BY = "isReviewedBy";
public static final String RESULT_PROJECT = "resultProject";
public static final String OUTCOME = "outcome";

View File

@ -79,6 +79,15 @@ public class ModelSupport {
entityIdPrefix.put("result", "50");
}
public static final Map<String, String> idPrefixEntity = Maps.newHashMap();
static {
idPrefixEntity.put("10", "datasource");
idPrefixEntity.put("20", "organization");
idPrefixEntity.put("40", "project");
idPrefixEntity.put("50", "result");
}
public static final Map<String, RelationInverse> relationInverseMap = Maps.newHashMap();
static {

View File

@ -3,6 +3,10 @@ package eu.dnetlib.dhp.schema.dump.oaf;
import java.io.Serializable;
/**
* Used to refer to the Article Processing Charge information. Not dumped in this release. It contains two parameters: -
* currency of type String to store the currency of the APC - amount of type String to stores the charged amount
*/
public class APC implements Serializable {
private String currency;
private String amount;

View File

@ -1,6 +1,14 @@
package eu.dnetlib.dhp.schema.dump.oaf;
/**
* AccessRight. Used to represent the result access rights. It extends the eu.dnet.lib.dhp.schema.dump.oaf.Qualifier
* element with a parameter scheme of type String to store the scheme. Values for this element are found against the
* COAR access right scheme. The classid of the element accessright in eu.dnetlib.dhp.schema.oaf.Result is used to get
* the COAR corresponding code whose value will be used to set the code parameter. The COAR label corresponding to the
* COAR code will be used to set the label parameter. The scheme value will always be the one referring to the COAR
* access right scheme
*/
public class AccessRight extends Qualifier {
private String scheme;

View File

@ -3,8 +3,21 @@ package eu.dnetlib.dhp.schema.dump.oaf;
import java.io.Serializable;
import java.util.List;
import java.util.Objects;
/**
* Used to represent the generic author of the result. It has six parameters: - name of type String to store the given
* name of the author. The value for this parameter corresponds to eu.dnetlib.dhp.schema.oaf.Author name - surname of
* type String to store the family name of the author. The value for this parameter corresponds to
* eu.dnetlib.dhp.schema.oaf.Author surname - fullname of type String to store the fullname of the author. The value for
* this parameter corresponds to eu.dnetlib.dhp.schema.oaf.Author fullname - rank of type Integer to store the rank on
* the author in the result's authors list. The value for this parameter corresponds to eu.dnetlib.dhp.schema.oaf.Author
* rank - pid of type eu.dnetlib.dhp.schema.dump.oaf.Pid to store the persistent identifier for the author. For the
* moment only ORCID identifiers will be dumped. - The id element is instantiated by using the following values in the
* eu.dnetlib.dhp.schema.oaf.Result pid: * Qualifier.classid for scheme * value for value - The provenance element is
* instantiated only if the dataInfo is set for the pid in the result to be dumped. The provenance element is
* instantiated by using the following values in the eu.dnetlib.dhp.schema.oaf.Result pid: *
* dataInfo.provenanceaction.classname for provenance * dataInfo.trust for trust
*/
public class Author implements Serializable {
private String fullname;
@ -15,9 +28,7 @@ public class Author implements Serializable {
private Integer rank;
private List<ControlledField> pid;
private List<String> affiliation;
private Pid pid;
public String getFullname() {
return fullname;
@ -51,20 +62,12 @@ public class Author implements Serializable {
this.rank = rank;
}
public List<ControlledField> getPid() {
public Pid getPid() {
return pid;
}
public void setPid(List<ControlledField> pid) {
public void setPid(Pid pid) {
this.pid = pid;
}
public List<String> getAffiliation() {
return affiliation;
}
public void setAffiliation(List<String> affiliation) {
this.affiliation = affiliation;
}
}

View File

@ -4,6 +4,23 @@ package eu.dnetlib.dhp.schema.dump.oaf;
import java.io.Serializable;
import java.util.Objects;
/**
* To store information about the conference or journal where the result has been presented or published. It contains
* eleven parameters: - name of type String to store the name of the journal or conference. It corresponds to the
* parameter name of eu.dnetlib.dhp.schema.oaf.Journal - issnPrinted ot type String to store the journal printed issn.
* It corresponds to the parameter issnPrinted of eu.dnetlib.dhp.schema.oaf.Journal - issnOnline of type String to store
* the journal online issn. It corresponds to the parameter issnOnline of eu.dnetlib.dhp.schema.oaf.Journal -
* issnLinking of type String to store the journal linking issn. It corresponds to the parameter issnLinking of
* eu.dnetlib.dhp.schema.oaf.Journal - ep of type String to store the end page. It corresponds to the parameter ep of
* eu.dnetlib.dhp.schema.oaf.Journal - iss of type String to store the journal issue. It corresponds to the parameter
* iss of eu.dnetlib.dhp.schema.oaf.Journal - sp of type String to store the start page. It corresponds to the parameter
* sp of eu.dnetlib.dhp.schema.oaf.Journal - vol of type String to store the Volume. It corresponds to the parameter vol
* of eu.dnetlib.dhp.schema.oaf.Journal - edition of type String to store the edition of the journal or conference
* proceeding. It corresponds to the parameter edition of eu.dnetlib.dhp.schema.oaf.Journal - conferenceplace of type
* String to store the place of the conference. It corresponds to the parameter conferenceplace of
* eu.dnetlib.dhp.schema.oaf.Journal - conferencedate of type String to store the date of the conference. It corresponds
* to the parameter conferencedate of eu.dnetlib.dhp.schema.oaf.Journal
*/
public class Container implements Serializable {
private String name;

View File

@ -1,16 +0,0 @@
package eu.dnetlib.dhp.schema.dump.oaf;
import java.util.List;
public class Context extends Qualifier {
private List<String> provenance;
public List<String> getProvenance() {
return provenance;
}
public void setProvenance(List<String> provenance) {
this.provenance = provenance;
}
}

View File

@ -3,8 +3,10 @@ package eu.dnetlib.dhp.schema.dump.oaf;
import java.io.Serializable;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
/**
* To represent the information described by a scheme and a value in that scheme (i.e. pid). It has two parameters: -
* scheme of type String to store the scheme - value of type String to store the value in that scheme
*/
public class ControlledField implements Serializable {
private String scheme;
private String value;

View File

@ -1,19 +1,28 @@
package eu.dnetlib.dhp.schema.dump.oaf;
/**
* Represents the country associated to this result. It extends eu.dnetlib.dhp.schema.dump.oaf.Qualifier with a
* provenance parameter of type eu.dnetlib.dhp.schema.dumo.oaf.Provenance. The country in not mapped if its value in the
* result reprensented in the internal format is Unknown. The value for this element correspond to: - code corresponds
* to the classid of eu.dnetlib.dhp.schema.oaf.Country - label corresponds to the classname of
* eu.dnetlib.dhp.schema.oaf.Country - provenance set only if the dataInfo associated to the Country of the result to be
* dumped is not null. In this case : - provenance corresponds to dataInfo.provenanceaction.classid (to be modified with
* datainfo.provenanceaction.classname) - trust corresponds to dataInfo.trust
*/
public class Country extends Qualifier {
private String provenance;
private Provenance provenance;
public String getProvenance() {
public Provenance getProvenance() {
return provenance;
}
public void setProvenance(String provenance) {
public void setProvenance(Provenance provenance) {
this.provenance = provenance;
}
public static Country newInstance(String code, String label, String provenance) {
public static Country newInstance(String code, String label, Provenance provenance) {
Country c = new Country();
c.setProvenance(provenance);
c.setCode(code);
@ -21,4 +30,8 @@ public class Country extends Qualifier {
return c;
}
public static Country newInstance(String code, String label, String provenance, String trust) {
return newInstance(code, label, Provenance.newInstance(provenance, trust));
}
}

View File

@ -1,72 +0,0 @@
package eu.dnetlib.dhp.schema.dump.oaf;
import java.io.Serializable;
import java.util.Objects;
import eu.dnetlib.dhp.schema.oaf.ExtraInfo;
//ExtraInfo
public class ExternalReference implements Serializable {
private String name;
private String typology;
private String provenance;
private String trust;
// json containing a Citation or Statistics
private String value;
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getTypology() {
return typology;
}
public void setTypology(String typology) {
this.typology = typology;
}
public String getProvenance() {
return provenance;
}
public void setProvenance(String provenance) {
this.provenance = provenance;
}
public String getTrust() {
return trust;
}
public void setTrust(String trust) {
this.trust = trust;
}
public String getValue() {
return value;
}
public void setValue(String value) {
this.value = value;
}
public static ExternalReference newInstance(ExtraInfo ei) {
ExternalReference er = new ExternalReference();
er.name = ei.getName();
er.typology = ei.getTypology();
er.provenance = ei.getProvenance();
er.trust = ei.getTrust();
er.value = ei.getValue();
return er;
}
}

View File

@ -8,8 +8,6 @@ public class Funder implements Serializable {
private String name;
private String fundingStream;
private String jurisdiction;
public String getJurisdiction() {
@ -35,12 +33,4 @@ public class Funder implements Serializable {
public void setName(String name) {
this.name = name;
}
public String getFundingStream() {
return fundingStream;
}
public void setFundingStream(String fundingStream) {
this.fundingStream = fundingStream;
}
}

View File

@ -7,6 +7,12 @@ import org.apache.commons.lang3.StringUtils;
import com.fasterxml.jackson.annotation.JsonIgnore;
/**
* Represents the geolocation information. It has three parameters: - point of type String to store the point
* information. It corresponds to eu.dnetlib.dhp.schema.oaf.GeoLocation point - box ot type String to store the box
* information. It corresponds to eu.dnetlib.dhp.schema.oaf.GeoLocation box - place of type String to store the place
* information. It corresponds to eu.dnetlib.dhp.schema.oaf.GeoLocation place
*/
public class GeoLocation implements Serializable {
private String point;

View File

@ -4,6 +4,18 @@ package eu.dnetlib.dhp.schema.dump.oaf;
import java.io.Serializable;
import java.util.List;
/**
* Represents the manifestations (i.e. different versions) of the result. For example: the pre-print and the published
* versions are two manifestations of the same research result. It has the following parameters: - license of type
* String to store the license applied to the instance. It corresponds to the value of the licence in the instance to be
* dumped - accessright of type eu.dnetlib.dhp.schema.dump.oaf.AccessRight to store the accessright of the instance. -
* type of type String to store the type of the instance as defined in the corresponding dnet vocabulary
* (dnet:pubication_resource). It corresponds to the instancetype.classname of the instance to be mapped - url of type
* List<String> list of locations where the instance is accessible. It corresponds to url of the instance to be dumped -
* publicationdate of type String to store the publication date of the instance ;// dateofacceptance; - refereed of type
* String to store information abour tthe review status of the instance. Possible values are 'Unknown',
* 'nonPeerReviewed', 'peerReviewed'. It corresponds to refereed.classname of the instance to be dumped
*/
public class Instance implements Serializable {
private String license;
@ -12,22 +24,10 @@ public class Instance implements Serializable {
private String type;
private KeyValue hostedby;
private List<String> url;
private KeyValue collectedfrom;
private String publicationdate;// dateofacceptance;
// ( article | book ) processing charges. Defined here to cope with possible wrongly typed
// results
// private Field<String> processingchargeamount;
//
// // currency - alphabetic code describe in ISO-4217. Defined here to cope with possible wrongly
// // typed results
// private Field<String> processingchargecurrency;
private String refereed; // peer-review status
public String getLicense() {
@ -54,14 +54,6 @@ public class Instance implements Serializable {
this.type = type;
}
public KeyValue getHostedby() {
return hostedby;
}
public void setHostedby(KeyValue hostedby) {
this.hostedby = hostedby;
}
public List<String> getUrl() {
return url;
}
@ -70,14 +62,6 @@ public class Instance implements Serializable {
this.url = url;
}
public KeyValue getCollectedfrom() {
return collectedfrom;
}
public void setCollectedfrom(KeyValue collectedfrom) {
this.collectedfrom = collectedfrom;
}
public String getPublicationdate() {
return publicationdate;
}

View File

@ -7,6 +7,10 @@ import org.apache.commons.lang3.StringUtils;
import com.fasterxml.jackson.annotation.JsonIgnore;
/**
* To represent the information described by a key and a value. It has two parameters: - key to store the key (generally
* the OpenAIRE id for some entity) - value to store the value (generally the OpenAIRE name for the key)
*/
public class KeyValue implements Serializable {
private String key;

View File

@ -1,41 +0,0 @@
package eu.dnetlib.dhp.schema.dump.oaf;
import java.io.Serializable;
import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;
public abstract class Oaf implements Serializable {
/**
* The list of datasource id/name pairs providing this relationship.
*/
protected List<KeyValue> collectedfrom;
private Long lastupdatetimestamp;
public List<KeyValue> getCollectedfrom() {
return collectedfrom;
}
public void setCollectedfrom(List<KeyValue> collectedfrom) {
this.collectedfrom = collectedfrom;
}
public Long getLastupdatetimestamp() {
return lastupdatetimestamp;
}
public void setLastupdatetimestamp(Long lastupdatetimestamp) {
this.lastupdatetimestamp = lastupdatetimestamp;
}
// public void setAllowedValues(eu.dnetlib.dhp.schema.oaf.Oaf o){
// collectedfrom = o.getCollectedfrom().stream().map(cf -> KeyValue.newInstance(cf)).collect(Collectors.toList());
//
// lastupdatetimestamp = o.getLastupdatetimestamp();
//
// }
}

View File

@ -1,59 +0,0 @@
package eu.dnetlib.dhp.schema.dump.oaf;
import java.io.Serializable;
import java.util.List;
public abstract class OafEntity extends Oaf implements Serializable {
private String id;
private List<String> originalId;
private List<ControlledField> pid;
private String dateofcollection;
private List<Projects> projects;
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public List<String> getOriginalId() {
return originalId;
}
public void setOriginalId(List<String> originalId) {
this.originalId = originalId;
}
public List<ControlledField> getPid() {
return pid;
}
public void setPid(List<ControlledField> pid) {
this.pid = pid;
}
public String getDateofcollection() {
return dateofcollection;
}
public void setDateofcollection(String dateofcollection) {
this.dateofcollection = dateofcollection;
}
public List<Projects> getProjects() {
return projects;
}
public void setProjects(List<Projects> projects) {
this.projects = projects;
}
}

View File

@ -0,0 +1,45 @@
package eu.dnetlib.dhp.schema.dump.oaf;
import java.io.Serializable;
/**
* To represent the generic persistent identifier. It has two parameters: - id of type
* eu.dnetlib.dhp.schema.dump.oaf.ControlledField to store the scheme and value of the Persistent Identifier. -
* provenance of type eu.dnetlib.dhp.schema.dump.oaf.Provenance to store the provenance and trust of the information
*/
public class Pid implements Serializable {
private ControlledField id;
private Provenance provenance;
public ControlledField getId() {
return id;
}
public void setId(ControlledField pid) {
this.id = pid;
}
public Provenance getProvenance() {
return provenance;
}
public void setProvenance(Provenance provenance) {
this.provenance = provenance;
}
public static Pid newInstance(ControlledField pid, Provenance provenance) {
Pid p = new Pid();
p.id = pid;
p.provenance = provenance;
return p;
}
public static Pid newInstance(ControlledField pid) {
Pid p = new Pid();
p.id = pid;
return p;
}
}

View File

@ -0,0 +1,51 @@
package eu.dnetlib.dhp.schema.dump.oaf;
import java.io.Serializable;
/**
* This class to store the common information about the project that will be dumped for community and for the whole
* graph - private String id to store the id of the project (OpenAIRE id) - private String code to store the grant
* agreement of the project - private String acronym to store the acronym of the project - private String title to store
* the tile of the project
*/
public class Project implements Serializable {
protected String id;// OpenAIRE id
protected String code;
protected String acronym;
protected String title;
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getCode() {
return code;
}
public void setCode(String code) {
this.code = code;
}
public String getAcronym() {
return acronym;
}
public void setAcronym(String acronym) {
this.acronym = acronym;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
}

View File

@ -1,68 +0,0 @@
package eu.dnetlib.dhp.schema.dump.oaf;
import java.util.List;
import eu.dnetlib.dhp.schema.oaf.Project;
public class Projects {
private String id;// OpenAIRE id
private String code;
private String acronym;
private String title;
private Funder funder;
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getCode() {
return code;
}
public void setCode(String code) {
this.code = code;
}
public String getAcronym() {
return acronym;
}
public void setAcronym(String acronym) {
this.acronym = acronym;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public Funder getFunder() {
return funder;
}
public void setFunder(Funder funders) {
this.funder = funders;
}
public static Projects newInstance(String id, String code, String acronym, String title, Funder funder) {
Projects projects = new Projects();
projects.setAcronym(acronym);
projects.setCode(code);
projects.setFunder(funder);
projects.setId(id);
projects.setTitle(title);
return projects;
}
}

View File

@ -0,0 +1,41 @@
package eu.dnetlib.dhp.schema.dump.oaf;
import java.io.Serializable;
/**
* Indicates the process that produced (or provided) the information, and the trust associated to the information. It
* has two parameters: - provenance of type String to store the provenance of the information, - trust of type String to
* store the trust associated to the information
*/
public class Provenance implements Serializable {
private String provenance;
private String trust;
public String getProvenance() {
return provenance;
}
public void setProvenance(String provenance) {
this.provenance = provenance;
}
public String getTrust() {
return trust;
}
public void setTrust(String trust) {
this.trust = trust;
}
public static Provenance newInstance(String provenance, String trust) {
Provenance p = new Provenance();
p.provenance = provenance;
p.trust = trust;
return p;
}
public String toString() {
return provenance + trust;
}
}

View File

@ -7,6 +7,11 @@ import org.apache.commons.lang3.StringUtils;
import com.fasterxml.jackson.annotation.JsonIgnore;
/**
* To represent the information described by a code and a value It has two parameters: - code to store the code
* (generally the classid of the eu.dnetlib.dhp.schema.oaf.Qualifier element) - label to store the label (generally the
* classname of the eu.dnetlib.dhp.schema.oaf.Qualifier element
*/
public class Qualifier implements Serializable {
private String code; // the classid in the Qualifier

View File

@ -4,7 +4,66 @@ package eu.dnetlib.dhp.schema.dump.oaf;
import java.io.Serializable;
import java.util.List;
public class Result extends OafEntity implements Serializable {
import eu.dnetlib.dhp.schema.dump.oaf.community.Project;
/**
* To represent the dumped result. It will be extended in the dump for Research Communities - Research
* Initiative/Infrastructures. It has the following parameters: - author of type
* List<eu.dnetlib.dhpschema.dump.oaf.Author> to describe the authors of a result. For each author in the result
* represented in the internal model one author in the esternal model is produced. - type of type String to represent
* the category of the result. Possible values are publication, dataset, software, other. It corresponds to
* resulttype.classname of the dumped result - language of type eu.dnetlib.dhp.schema.dump.oaf.Qualifier to store
* information about the language of the result. It is dumped as - code corresponds to language.classid - value
* corresponds to language.classname - country of type List<eu.dnetlib.dhp.schema.dump.oaf.Country> to store the country
* list to which the result is associated. For each country in the result respresented in the internal model one country
* in the external model is produces - subjects of type List<eu.dnetlib.dhp.dump.oaf.Subject> to store the subjects for
* the result. For each subject in the result represented in the internal model one subject in the external model is
* produced - maintitle of type String to store the main title of the result. It corresponds to the value of the first
* title in the resul to be dumped having classid equals to "main title" - subtitle of type String to store the subtitle
* of the result. It corresponds to the value of the first title in the resul to be dumped having classid equals to
* "subtitle" - description of type List<String> to store the description of the result. It corresponds to the list of
* description.value in the result represented in the internal model - publicationdate of type String to store the
* pubblication date. It corresponds to dateofacceptance.value in the result represented in the internal model -
* publisher of type String to store information about the publisher. It corresponds to publisher.value of the result
* represented in the intrenal model - embargoenddate of type String to store the embargo end date. It corresponds to
* embargoenddate.value of the result represented in the internal model - source of type List<String> See definition of
* Dublin Core field dc:source. It corresponds to the list of source.value in the result represented in the internal
* model - format of type List<String> It corresponds to the list of format.value in the result represented in the
* internal model - contributor of type List<String> to represent contributors for this result. It corresponds to the
* list of contributor.value in the result represented in the internal model - coverage of type String. It corresponds
* to the list of coverage.value in the result represented in the internal model - bestaccessright of type
* eu.dnetlib.dhp.schema.dump.oaf.AccessRight to store informatin about the openest access right associated to the
* manifestations of this research results. It corresponds to the same parameter in the result represented in the
* internal model - container of type eu.dnetlib.dhp.schema/dump.oaf.Container (only for result of type publication). It
* corresponds to the parameter journal of the result represented in the internal model - documentationUrl of type
* List<String> (only for results of type software) to store the URLs to the software documentation. It corresponds to
* the list of documentationUrl.value of the result represented in the internal model - codeRepositoryUrl of type String
* (only for results of type software) to store the URL to the repository with the source code. It corresponds to
* codeRepositoryUrl.value of the result represented in the internal model - programmingLanguage of type String (only
* for results of type software) to store the programming language. It corresponds to programmingLanguaga.classid of the
* result represented in the internal model - contactperson of type List<String> (only for results of type other) to
* store the contact person for this result. It corresponds to the list of contactperson.value of the result represented
* in the internal model - contactgroup of type List<String> (only for results of type other) to store the information
* for the contact group. It corresponds to the list of contactgroup.value of the result represented in the internal
* model - tool of type List<String> (only fro results of type other) to store information about tool useful for the
* interpretation and/or re-used of the research product. It corresponds to the list of tool.value in the result
* represented in the internal modelt - size of type String (only for results of type dataset) to store the size of the
* dataset. It corresponds to size.value in the result represented in the internal model - version of type String (only
* for results of type dataset) to store the version. It corresponds to version.value of the result represented in the
* internal model - geolocation fo type List<eu.dnetlib.dhp.schema.dump.oaf.GeoLocation> (only for results of type
* dataset) to store geolocation information. For each geolocation element in the result represented in the internal
* model a GeoLocation in the external model il produced - id of type String to store the OpenAIRE id of the result. It
* corresponds to the id of the result represented in the internal model - originalId of type List<String> to store the
* original ids of the result. It corresponds to the originalId of the result represented in the internal model - pid of
* type List<eu.dnetlib.dhp.schema.dump.oaf.ControlledField> to store the persistent identifiers for the result. For
* each pid in the results represented in the internal model one pid in the external model is produced. The value
* correspondence is: - scheme corresponds to pid.qualifier.classid of the result represented in the internal model -
* value corresponds to the pid.value of the result represented in the internal model - dateofcollection of type String
* to store information about the time OpenAIRE collected the record. It corresponds to dateofcollection of the result
* represented in the internal model - lasteupdatetimestamp of type String to store the timestamp of the last update of
* the record. It corresponds to lastupdatetimestamp of the resord represented in the internal model
*/
public class Result implements Serializable {
private List<Author> author;
@ -16,7 +75,7 @@ public class Result extends OafEntity implements Serializable {
private List<Country> country;
private List<ControlledField> subject;
private List<Subject> subjects;
private String maintitle;
@ -40,12 +99,6 @@ public class Result extends OafEntity implements Serializable {
private AccessRight bestaccessright;
private List<Context> context;
private List<ExternalReference> externalReference;
private List<Instance> instance;
private Container container;// Journal
private List<String> documentationUrl; // software
@ -66,6 +119,56 @@ public class Result extends OafEntity implements Serializable {
private List<GeoLocation> geolocation; // dataset
private String id;
private List<String> originalId;
private List<ControlledField> pid;
private String dateofcollection;
private Long lastupdatetimestamp;
public Long getLastupdatetimestamp() {
return lastupdatetimestamp;
}
public void setLastupdatetimestamp(Long lastupdatetimestamp) {
this.lastupdatetimestamp = lastupdatetimestamp;
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public List<String> getOriginalId() {
return originalId;
}
public void setOriginalId(List<String> originalId) {
this.originalId = originalId;
}
public List<ControlledField> getPid() {
return pid;
}
public void setPid(List<ControlledField> pid) {
this.pid = pid;
}
public String getDateofcollection() {
return dateofcollection;
}
public void setDateofcollection(String dateofcollection) {
this.dateofcollection = dateofcollection;
}
public List<Author> getAuthor() {
return author;
}
@ -106,12 +209,12 @@ public class Result extends OafEntity implements Serializable {
this.country = country;
}
public List<ControlledField> getSubject() {
return subject;
public List<Subject> getSubjects() {
return subjects;
}
public void setSubject(List<ControlledField> subject) {
this.subject = subject;
public void setSubjects(List<Subject> subjects) {
this.subjects = subjects;
}
public String getMaintitle() {
@ -202,30 +305,6 @@ public class Result extends OafEntity implements Serializable {
this.bestaccessright = bestaccessright;
}
public List<Context> getContext() {
return context;
}
public void setContext(List<Context> context) {
this.context = context;
}
public List<ExternalReference> getExternalReference() {
return externalReference;
}
public void setExternalReference(List<ExternalReference> externalReference) {
this.externalReference = externalReference;
}
public List<Instance> getInstance() {
return instance;
}
public void setInstance(List<Instance> instance) {
this.instance = instance;
}
public List<String> getDocumentationUrl() {
return documentationUrl;
}

View File

@ -0,0 +1,34 @@
package eu.dnetlib.dhp.schema.dump.oaf;
import java.io.Serializable;
/**
* To represent keywords associated to the result. It has two parameters: - subject of type
* eu.dnetlib.dhp.schema.dump.oaf.ControlledField to describe the subject. It mapped as: - schema it corresponds to
* qualifier.classid of the dumped subject - value it corresponds to the subject value - provenance of type
* eu.dnetlib.dhp.schema.dump.oaf.Provenance to represent the provenance of the subject. It is dumped only if dataInfo
* is not null. In this case: - provenance corresponds to dataInfo.provenanceaction.classname - trust corresponds to
* dataInfo.trust
*/
public class Subject implements Serializable {
private ControlledField subject;
private Provenance provenance;
public ControlledField getSubject() {
return subject;
}
public void setSubject(ControlledField subject) {
this.subject = subject;
}
public Provenance getProvenance() {
return provenance;
}
public void setProvenance(Provenance provenance) {
this.provenance = provenance;
}
}

View File

@ -0,0 +1,36 @@
package eu.dnetlib.dhp.schema.dump.oaf.community;
import eu.dnetlib.dhp.schema.dump.oaf.Instance;
import eu.dnetlib.dhp.schema.dump.oaf.KeyValue;
/**
* It extends eu.dnetlib.dhp.dump.oaf.Instance with values related to the community dump. In the Result dump this
* information is not present because it is dumped as a set of relations between the result and the datasource. -
* hostedby of type eu.dnetlib.dhp.schema.dump.oaf.KeyValue to store the information about the source from which the
* instance can be viewed or downloaded. It is mapped against the hostedby parameter of the instance to be dumped and -
* key corresponds to hostedby.key - value corresponds to hostedby.value - collectedfrom of type
* eu.dnetlib.dhp.schema.dump.oaf.KeyValue to store the information about the source from which the instance has been
* collected. It is mapped against the collectedfrom parameter of the instance to be dumped and - key corresponds to
* collectedfrom.key - value corresponds to collectedfrom.value
*/
public class CommunityInstance extends Instance {
private KeyValue hostedby;
private KeyValue collectedfrom;
public KeyValue getHostedby() {
return hostedby;
}
public void setHostedby(KeyValue hostedby) {
this.hostedby = hostedby;
}
public KeyValue getCollectedfrom() {
return collectedfrom;
}
public void setCollectedfrom(KeyValue collectedfrom) {
this.collectedfrom = collectedfrom;
}
}

View File

@ -0,0 +1,63 @@
package eu.dnetlib.dhp.schema.dump.oaf.community;
import java.util.List;
import eu.dnetlib.dhp.schema.dump.oaf.KeyValue;
import eu.dnetlib.dhp.schema.dump.oaf.Result;
/**
* extends eu.dnetlib.dhp.schema.dump.oaf.Result with the following parameters: - projects of type
* List<eu.dnetlib.dhp.schema.dump.oaf.community.Project> to store the list of projects related to the result. The
* information is added after the result is mapped to the external model - context of type
* List<eu.dnetlib.dhp.schema.dump.oaf.community.Context> to store information about the RC RI related to the result.
* For each context in the result represented in the internal model one context in the external model is produced -
* collectedfrom of type List<eu.dnetliv.dhp.schema.dump.oaf.KeyValue> to store information about the sources from which
* the record has been collected. For each collectedfrom in the result represented in the internal model one
* collectedfrom in the external model is produced - instance of type
* List<eu.dnetlib.dhp.schema.dump.oaf.community.CommunityInstance> to store all the instances associated to the result.
* It corresponds to the same parameter in the result represented in the internal model
*/
public class CommunityResult extends Result {
private List<Project> projects;
private List<Context> context;
protected List<KeyValue> collectedfrom;
private List<CommunityInstance> instance;
public List<CommunityInstance> getInstance() {
return instance;
}
public void setInstance(List<CommunityInstance> instance) {
this.instance = instance;
}
public List<KeyValue> getCollectedfrom() {
return collectedfrom;
}
public void setCollectedfrom(List<KeyValue> collectedfrom) {
this.collectedfrom = collectedfrom;
}
public List<Project> getProjects() {
return projects;
}
public void setProjects(List<Project> projects) {
this.projects = projects;
}
public List<Context> getContext() {
return context;
}
public void setContext(List<Context> context) {
this.context = context;
}
}

View File

@ -0,0 +1,40 @@
package eu.dnetlib.dhp.schema.dump.oaf.community;
import java.util.List;
import java.util.Objects;
import eu.dnetlib.dhp.schema.dump.oaf.Provenance;
import eu.dnetlib.dhp.schema.dump.oaf.Qualifier;
/**
* Reference to a relevant research infrastructure, initiative or community (RI/RC) among those collaborating with
* OpenAIRE. It extend eu.dnetlib.dhp.shema.dump.oaf.Qualifier with a parameter provenance of type
* List<eu.dnetlib.dhp.schema.dump.oaf.Provenance> to store the provenances of the association between the result and
* the RC/RI. The values for this element correspond to: - code: it corresponds to the id of the context in the result
* to be mapped. If the context id refers to a RC/RI and contains '::' only the part of the id before the first "::"
* will be used as value for code - label it corresponds to the label associated to the id. The information id taken
* from the profile of the RC/RI - provenance it is set only if the dataInfo associated to the contenxt element of the
* result to be dumped is not null. For each dataInfo one instance of type eu.dnetlib.dhp.schema.dump.oaf.Provenance is
* instantiated if the element datainfo.provenanceaction is not null. In this case - provenance corresponds to
* dataInfo.provenanceaction.classname - trust corresponds to dataInfo.trust
*/
public class Context extends Qualifier {
private List<Provenance> provenance;
public List<Provenance> getProvenance() {
return provenance;
}
public void setProvenance(List<Provenance> provenance) {
this.provenance = provenance;
}
@Override
public int hashCode() {
String provenance = new String();
this.provenance.forEach(p -> provenance.concat(p.toString()));
return Objects.hash(getCode(), getLabel(), provenance);
}
}

View File

@ -0,0 +1,23 @@
package eu.dnetlib.dhp.schema.dump.oaf.community;
import java.io.Serializable;
/**
* To store information about the funder funding the project related to the result. It has the following parameters: -
* shortName of type String to store the funder short name (e.c. AKA). - name of type String to store the funder name
* (e.c. Akademy of Finland) - fundingStream of type String to store the funding stream - jurisdiction of type String to
* store the jurisdiction of the funder
*/
public class Funder extends eu.dnetlib.dhp.schema.dump.oaf.Funder {
private String fundingStream;
public String getFundingStream() {
return fundingStream;
}
public void setFundingStream(String fundingStream) {
this.fundingStream = fundingStream;
}
}

View File

@ -0,0 +1,47 @@
package eu.dnetlib.dhp.schema.dump.oaf.community;
import java.io.Serializable;
import eu.dnetlib.dhp.schema.dump.oaf.Provenance;
/**
* To store information about the project related to the result. This information is not directly mapped from the result
* represented in the internal model because it is not there. The mapped result will be enriched with project
* information derived by relation between results and projects. Project extends eu.dnetlib.dhp.schema.dump.oaf.Project
* with the following parameters: - funder of type eu.dnetlib.dhp.schema.dump.oaf.community.Funder to store information
* about the funder funding the project - provenance of type eu.dnetlib.dhp.schema.dump.oaf.Provenance to store
* information about the. provenance of the association between the result and the project
*/
public class Project extends eu.dnetlib.dhp.schema.dump.oaf.Project {
private Funder funder;
private Provenance provenance;
public Provenance getProvenance() {
return provenance;
}
public void setProvenance(Provenance provenance) {
this.provenance = provenance;
}
public Funder getFunder() {
return funder;
}
public void setFunder(Funder funders) {
this.funder = funders;
}
public static Project newInstance(String id, String code, String acronym, String title, Funder funder) {
Project project = new Project();
project.setAcronym(acronym);
project.setCode(code);
project.setFunder(funder);
project.setId(id);
project.setTitle(title);
return project;
}
}

View File

@ -0,0 +1,21 @@
package eu.dnetlib.dhp.schema.dump.oaf.graph;
import java.io.Serializable;
public class Constants implements Serializable {
// collectedFrom va con isProvidedBy -> becco da ModelSupport
public static final String HOSTED_BY = "isHostedBy";
public static final String HOSTS = "hosts";
// community result uso isrelatedto
public static final String RESULT_ENTITY = "result";
public static final String DATASOURCE_ENTITY = "datasource";
public static final String CONTEXT_ENTITY = "context";
public static final String CONTEXT_ID = "60";
public static final String CONTEXT_NS_PREFIX = "context____";
}

View File

@ -0,0 +1,316 @@
package eu.dnetlib.dhp.schema.dump.oaf.graph;
import java.io.Serializable;
import java.util.List;
import eu.dnetlib.dhp.schema.dump.oaf.Container;
import eu.dnetlib.dhp.schema.dump.oaf.ControlledField;
import eu.dnetlib.dhp.schema.dump.oaf.KeyValue;
/**
* To store information about the datasource OpenAIRE collects information from. It contains the following parameters: -
* id of type String to store the OpenAIRE id for the datasource. It corresponds to the parameter id of the datasource
* represented in the internal model - originalId of type List<String> to store the list of original ids associated to
* the datasource. It corresponds to the parameter originalId of the datasource represented in the internal model. The
* null values are filtered out - pid of type List<eu.dnetlib.shp.schema.dump.oaf.ControlledField> to store the
* persistent identifiers for the datasource. For each pid in the datasource represented in the internal model one pid
* in the external model is produced as : - schema corresponds to pid.qualifier.classid of the datasource represented in
* the internal model - value corresponds to pid.value of the datasource represented in the internal model -
* datasourceType of type eu.dnetlib.dhp.schema.dump.oaf.ControlledField to store the datasource type (e.g.
* pubsrepository::institutional, Institutional Repository) as in the dnet vocabulary dnet:datasource_typologies. It
* corresponds to datasourcetype of the datasource represented in the internal model and : - code corresponds to
* datasourcetype.classid - value corresponds to datasourcetype.classname - openairecompatibility of type String to
* store information about the OpenAIRE compatibility of the ingested results (which guidelines they are compliant to).
* It corresponds to openairecompatibility.classname of the datasource represented in the internal model - officialname
* of type Sgtring to store the official name of the datasource. It correspond to officialname.value of the datasource
* represented in the internal model - englishname of type String to store the English name of the datasource. It
* corresponds to englishname.value of the datasource represented in the internal model - websiteurl of type String to
* store the URL of the website of the datasource. It corresponds to websiteurl.value of the datasource represented in
* the internal model - logourl of type String to store the URL of the logo for the datasource. It corresponds to
* logourl.value of the datasource represented in the internal model - dateofvalidation of type String to store the data
* of validation against the guidelines for the datasource records. It corresponds to dateofvalidation.value of the
* datasource represented in the internal model - description of type String to store the description for the
* datasource. It corresponds to description.value of the datasource represented in the internal model
*/
public class Datasource implements Serializable {
private String id; // string
private List<String> originalId; // list string
private List<ControlledField> pid; // list<String>
private ControlledField datasourcetype; // value
private String openairecompatibility; // value
private String officialname; // string
private String englishname; // string
private String websiteurl; // string
private String logourl; // string
private String dateofvalidation; // string
private String description; // description
private List<String> subjects; // List<String>
// opendoar specific fields (od*)
private List<String> languages; // odlanguages List<String>
private List<String> contenttypes; // odcontent types List<String>
// re3data fields
private String releasestartdate; // string
private String releaseenddate; // string
private String missionstatementurl; // string
// {open, restricted or closed}
private String accessrights; // databaseaccesstype string
// {open, restricted or closed}
private String uploadrights; // datauploadtype string
// {feeRequired, registration, other}
private String databaseaccessrestriction; // string
// {feeRequired, registration, other}
private String datauploadrestriction; // string
private Boolean versioning; // boolean
private String citationguidelineurl; // string
// {yes, no, uknown}
private String pidsystems; // string
private String certificates; // string
private List<Object> policies; //
private Container journal; // issn etc del Journal
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public List<String> getOriginalId() {
return originalId;
}
public void setOriginalId(List<String> originalId) {
this.originalId = originalId;
}
public List<ControlledField> getPid() {
return pid;
}
public void setPid(List<ControlledField> pid) {
this.pid = pid;
}
public ControlledField getDatasourcetype() {
return datasourcetype;
}
public void setDatasourcetype(ControlledField datasourcetype) {
this.datasourcetype = datasourcetype;
}
public String getOpenairecompatibility() {
return openairecompatibility;
}
public void setOpenairecompatibility(String openairecompatibility) {
this.openairecompatibility = openairecompatibility;
}
public String getOfficialname() {
return officialname;
}
public void setOfficialname(String officialname) {
this.officialname = officialname;
}
public String getEnglishname() {
return englishname;
}
public void setEnglishname(String englishname) {
this.englishname = englishname;
}
public String getWebsiteurl() {
return websiteurl;
}
public void setWebsiteurl(String websiteurl) {
this.websiteurl = websiteurl;
}
public String getLogourl() {
return logourl;
}
public void setLogourl(String logourl) {
this.logourl = logourl;
}
public String getDateofvalidation() {
return dateofvalidation;
}
public void setDateofvalidation(String dateofvalidation) {
this.dateofvalidation = dateofvalidation;
}
public String getDescription() {
return description;
}
public void setDescription(String description) {
this.description = description;
}
public List<String> getSubjects() {
return subjects;
}
public void setSubjects(List<String> subjects) {
this.subjects = subjects;
}
public List<String> getLanguages() {
return languages;
}
public void setLanguages(List<String> languages) {
this.languages = languages;
}
public List<String> getContenttypes() {
return contenttypes;
}
public void setContenttypes(List<String> contenttypes) {
this.contenttypes = contenttypes;
}
public String getReleasestartdate() {
return releasestartdate;
}
public void setReleasestartdate(String releasestartdate) {
this.releasestartdate = releasestartdate;
}
public String getReleaseenddate() {
return releaseenddate;
}
public void setReleaseenddate(String releaseenddate) {
this.releaseenddate = releaseenddate;
}
public String getMissionstatementurl() {
return missionstatementurl;
}
public void setMissionstatementurl(String missionstatementurl) {
this.missionstatementurl = missionstatementurl;
}
public String getAccessrights() {
return accessrights;
}
public void setAccessrights(String accessrights) {
this.accessrights = accessrights;
}
public String getUploadrights() {
return uploadrights;
}
public void setUploadrights(String uploadrights) {
this.uploadrights = uploadrights;
}
public String getDatabaseaccessrestriction() {
return databaseaccessrestriction;
}
public void setDatabaseaccessrestriction(String databaseaccessrestriction) {
this.databaseaccessrestriction = databaseaccessrestriction;
}
public String getDatauploadrestriction() {
return datauploadrestriction;
}
public void setDatauploadrestriction(String datauploadrestriction) {
this.datauploadrestriction = datauploadrestriction;
}
public Boolean getVersioning() {
return versioning;
}
public void setVersioning(Boolean versioning) {
this.versioning = versioning;
}
public String getCitationguidelineurl() {
return citationguidelineurl;
}
public void setCitationguidelineurl(String citationguidelineurl) {
this.citationguidelineurl = citationguidelineurl;
}
public String getPidsystems() {
return pidsystems;
}
public void setPidsystems(String pidsystems) {
this.pidsystems = pidsystems;
}
public String getCertificates() {
return certificates;
}
public void setCertificates(String certificates) {
this.certificates = certificates;
}
public List<Object> getPolicies() {
return policies;
}
public void setPolicies(List<Object> policiesr3) {
this.policies = policiesr3;
}
public Container getJournal() {
return journal;
}
public void setJournal(Container journal) {
this.journal = journal;
}
}

View File

@ -0,0 +1,22 @@
package eu.dnetlib.dhp.schema.dump.oaf.graph;
import java.io.Serializable;
/**
* To store information about the funder funding the project related to the result. It extends
* eu.dnetlib.dhp.schema.dump.oaf.Funder with the following parameter: - - private
* eu.dnetdlib.dhp.schema.dump.oaf.graph.Fundings funding_stream to store the fundingstream
*/
public class Funder extends eu.dnetlib.dhp.schema.dump.oaf.Funder {
private Fundings funding_stream;
public Fundings getFunding_stream() {
return funding_stream;
}
public void setFunding_stream(Fundings funding_stream) {
this.funding_stream = funding_stream;
}
}

View File

@ -0,0 +1,35 @@
package eu.dnetlib.dhp.schema.dump.oaf.graph;
import java.io.Serializable;
/**
* To store inforamtion about the funding stream. It has two parameters: - private String id to store the id of the
* fundings stream. The id is created by appending the shortname of the funder to the name of each level in the xml
* representing the fundng stream. For example: if the funder is the European Commission, the funding level 0 name is
* FP7, the funding level 1 name is SP3 and the funding level 2 name is PEOPLE then the id will be: EC::FP7::SP3::PEOPLE
* - private String description to describe the funding stream. It is created by concatenating the description of each
* funding level so for the example above the description would be: SEVENTH FRAMEWORK PROGRAMME - SP3-People -
* Marie-Curie Actions
*/
public class Fundings implements Serializable {
private String id;
private String description;
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getDescription() {
return description;
}
public void setDescription(String description) {
this.description = description;
}
}

View File

@ -0,0 +1,55 @@
package eu.dnetlib.dhp.schema.dump.oaf.graph;
import java.io.Serializable;
import java.util.Optional;
/**
* To describe the funded amount. It has the following parameters: - private String currency to store the currency of
* the fund - private float totalcost to store the total cost of the project - private float fundedamount to store the
* funded amount by the funder
*/
public class Granted implements Serializable {
private String currency;
private float totalcost;
private float fundedamount;
public String getCurrency() {
return currency;
}
public void setCurrency(String currency) {
this.currency = currency;
}
public float getTotalcost() {
return totalcost;
}
public void setTotalcost(float totalcost) {
this.totalcost = totalcost;
}
public float getFundedamount() {
return fundedamount;
}
public void setFundedamount(float fundedamount) {
this.fundedamount = fundedamount;
}
public static Granted newInstance(String currency, float totalcost, float fundedamount) {
Granted granted = new Granted();
granted.currency = currency;
granted.totalcost = totalcost;
granted.fundedamount = fundedamount;
return granted;
}
public static Granted newInstance(String currency, float fundedamount) {
Granted granted = new Granted();
granted.currency = currency;
granted.fundedamount = fundedamount;
return granted;
}
}

View File

@ -0,0 +1,24 @@
package eu.dnetlib.dhp.schema.dump.oaf.graph;
import java.util.List;
import eu.dnetlib.dhp.schema.dump.oaf.Instance;
import eu.dnetlib.dhp.schema.dump.oaf.Result;
/**
* It extends the eu.dnetlib.dhp.schema.dump.oaf.Result with - instance of type
* List<eu.dnetlib.dhp.schema.dump.oaf.Instance> to store all the instances associated to the result. It corresponds to
* the same parameter in the result represented in the internal model
*/
public class GraphResult extends Result {
private List<Instance> instance;
public List<Instance> getInstance() {
return instance;
}
public void setInstance(List<Instance> instance) {
this.instance = instance;
}
}

View File

@ -0,0 +1,82 @@
package eu.dnetlib.dhp.schema.dump.oaf.graph;
import java.io.Serializable;
/**
* To store information about the classification for the project. The classification depends on the programme. For example
* H2020-EU.3.4.5.3 can be classified as
* H2020-EU.3. => Societal Challenges (level1)
* H2020-EU.3.4. => Transport (level2)
* H2020-EU.3.4.5. => CLEANSKY2 (level3)
* H2020-EU.3.4.5.3. => IADP Fast Rotorcraft (level4)
*
* We decided to explicitly represent up to three levels in the classification.
*
* H2020Classification has the following parameters:
* - private Programme programme to store the information about the programme related to this classification
* - private String level1 to store the information about the level 1 of the classification (Priority or Pillar of the EC)
* - private String level2 to store the information about the level2 af the classification (Objectives (?))
* - private String level3 to store the information about the level3 of the classification
* - private String classification to store the entire classification related to the programme
*/
public class H2020Classification implements Serializable {
private Programme programme;
private String level1;
private String level2;
private String level3;
private String classification;
public Programme getProgramme() {
return programme;
}
public void setProgramme(Programme programme) {
this.programme = programme;
}
public String getLevel1() {
return level1;
}
public void setLevel1(String level1) {
this.level1 = level1;
}
public String getLevel2() {
return level2;
}
public void setLevel2(String level2) {
this.level2 = level2;
}
public String getLevel3() {
return level3;
}
public void setLevel3(String level3) {
this.level3 = level3;
}
public String getClassification() {
return classification;
}
public void setClassification(String classification) {
this.classification = classification;
}
public static H2020Classification newInstance(String programme_code, String programme_description, String level1,
String level2, String level3, String classification) {
H2020Classification h2020classification = new H2020Classification();
h2020classification.programme = Programme.newInstance(programme_code, programme_description);
h2020classification.level1 = level1;
h2020classification.level2 = level2;
h2020classification.level3 = level3;
h2020classification.classification = classification;
return h2020classification;
}
}

View File

@ -0,0 +1,38 @@
package eu.dnetlib.dhp.schema.dump.oaf.graph;
import java.io.Serializable;
/**
* To represent the generic node in a relation. It has the following parameters: - private String id the openaire id of
* the entity in the relation - private String type the type of the entity in the relation. Consider the generic
* relation between a Result R and a Project P, the node representing R will have as id the id of R and as type result,
* while the node representing the project will have as id the id of the project and as type project
*/
public class Node implements Serializable {
private String id;
private String type;
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
public static Node newInstance(String id, String type) {
Node node = new Node();
node.id = id;
node.type = type;
return node;
}
}

View File

@ -0,0 +1,86 @@
package eu.dnetlib.dhp.schema.dump.oaf.graph;
import java.io.Serializable;
import java.util.List;
import eu.dnetlib.dhp.schema.dump.oaf.ControlledField;
import eu.dnetlib.dhp.schema.dump.oaf.Country;
import eu.dnetlib.dhp.schema.dump.oaf.KeyValue;
import eu.dnetlib.dhp.schema.dump.oaf.Qualifier;
import eu.dnetlib.dhp.schema.dump.oaf.community.Project;
/**
* To represent the generic organizaiton. It has the following parameters: - private String legalshortname to store the
* legalshortname of the organizaiton - private String legalname to store the legal name of the organization - private
* String websiteurl to store the websiteurl of the organization - private List<String> alternativenames to store the
* alternative names of the organization - private Qualifier country to store the country of the organization - private
* String id to store the id of the organization - private List<ControlledField> pid to store the list of pids for the
* organization
*/
public class Organization implements Serializable {
private String legalshortname;
private String legalname;
private String websiteurl;
private List<String> alternativenames;
private Qualifier country;
private String id;
private List<ControlledField> pid;
public String getLegalshortname() {
return legalshortname;
}
public void setLegalshortname(String legalshortname) {
this.legalshortname = legalshortname;
}
public String getLegalname() {
return legalname;
}
public void setLegalname(String legalname) {
this.legalname = legalname;
}
public String getWebsiteurl() {
return websiteurl;
}
public void setWebsiteurl(String websiteurl) {
this.websiteurl = websiteurl;
}
public List<String> getAlternativenames() {
return alternativenames;
}
public void setAlternativenames(List<String> alternativenames) {
this.alternativenames = alternativenames;
}
public Qualifier getCountry() {
return country;
}
public void setCountry(Qualifier country) {
this.country = country;
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public List<ControlledField> getPid() {
return pid;
}
public void setPid(List<ControlledField> pid) {
this.pid = pid;
}
}

View File

@ -0,0 +1,36 @@
package eu.dnetlib.dhp.schema.dump.oaf.graph;
import java.io.Serializable;
/**
* To store information about the ec programme for the project. It has the following parameters: - private String code
* to store the code of the programme - private String description to store the description of the programme
*/
public class Programme implements Serializable {
private String code;
private String description;
public String getCode() {
return code;
}
public void setCode(String code) {
this.code = code;
}
public String getDescription() {
return description;
}
public void setDescription(String description) {
this.description = description;
}
public static Programme newInstance(String code, String description) {
Programme p = new Programme();
p.code = code;
p.description = description;
return p;
}
}

View File

@ -0,0 +1,192 @@
package eu.dnetlib.dhp.schema.dump.oaf.graph;
import java.io.Serializable;
import java.util.List;
/**
* This is the class representing the Project in the model used for the dumps of the whole graph. At the moment the dump
* of the Projects differs from the other dumps because we do not create relations between Funders (Organization) and
* Projects but we put the information about the Funder within the Project representation. We also removed the
* collected from element from the Project. No relation between the Project and the Datasource entity from which it is
* collected will be created. We will never create relations between Project and Datasource. In case some relation will
* be extracted from the Project they will refer the Funder and will be of type ( organization -> funds -> project,
* project -> isFundedBy -> organization) We also removed the duration parameter because the most of times it is set to
* 0. It has the following parameters:
* - private String id to store the id of the project (OpenAIRE id)
* - private String websiteurl to store the websiteurl of the project
* - private String code to store the grant agreement of the project
* - private String acronym to store the acronym of the project
* - private String title to store the tile of the project
* - private String startdate to store the start date
* - private String enddate to store the end date
* - private String callidentifier to store the call indentifier
* - private String keywords to store the keywords
* - private boolean openaccessmandateforpublications to store if the project must accomplish to the open access mandate
* for publications. This value will be set to true if one of the field in the project represented in the internal model
* is set to true
* - private boolean openaccessmandatefordataset to store if the project must accomplish to the open access mandate for
* dataset. It is set to the value in the corresponding filed of the project represented in the internal model
* - private List<String> subject to store the list of subjects of the project
* - private List<Funder> funding to store the list of funder of the project
* - private String summary to store the summary of the project
* - private Granted granted to store the granted amount
* - private List<Programme> h2020programme to store the list of programmes the project is related to
*/
public class Project implements Serializable {
private String id;
private String websiteurl;
private String code;
private String acronym;
private String title;
private String startdate;
private String enddate;
private String callidentifier;
private String keywords;
private boolean openaccessmandateforpublications;
private boolean openaccessmandatefordataset;
private List<String> subject;
private List<Funder> funding;
private String summary;
private Granted granted;
private List<Programme> h2020programme;
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getWebsiteurl() {
return websiteurl;
}
public void setWebsiteurl(String websiteurl) {
this.websiteurl = websiteurl;
}
public String getCode() {
return code;
}
public void setCode(String code) {
this.code = code;
}
public String getAcronym() {
return acronym;
}
public void setAcronym(String acronym) {
this.acronym = acronym;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getStartdate() {
return startdate;
}
public void setStartdate(String startdate) {
this.startdate = startdate;
}
public String getEnddate() {
return enddate;
}
public void setEnddate(String enddate) {
this.enddate = enddate;
}
public String getCallidentifier() {
return callidentifier;
}
public void setCallidentifier(String callidentifier) {
this.callidentifier = callidentifier;
}
public String getKeywords() {
return keywords;
}
public void setKeywords(String keywords) {
this.keywords = keywords;
}
public boolean isOpenaccessmandateforpublications() {
return openaccessmandateforpublications;
}
public void setOpenaccessmandateforpublications(boolean openaccessmandateforpublications) {
this.openaccessmandateforpublications = openaccessmandateforpublications;
}
public boolean isOpenaccessmandatefordataset() {
return openaccessmandatefordataset;
}
public void setOpenaccessmandatefordataset(boolean openaccessmandatefordataset) {
this.openaccessmandatefordataset = openaccessmandatefordataset;
}
public List<String> getSubject() {
return subject;
}
public void setSubject(List<String> subject) {
this.subject = subject;
}
public List<Funder> getFunding() {
return funding;
}
public void setFunding(List<Funder> funding) {
this.funding = funding;
}
public String getSummary() {
return summary;
}
public void setSummary(String summary) {
this.summary = summary;
}
public Granted getGranted() {
return granted;
}
public void setGranted(Granted granted) {
this.granted = granted;
}
public List<Programme> getH2020programme() {
return h2020programme;
}
public void setH2020programme(List<Programme> h2020programme) {
this.h2020programme = h2020programme;
}
}

View File

@ -0,0 +1,39 @@
package eu.dnetlib.dhp.schema.dump.oaf.graph;
import java.io.Serializable;
/**
* To represent the semantics of the generic relation between two entities. It has the following parameters: - private
* String name to store the semantics of the relation (i.e. isAuthorInstitutionOf). It corresponds to the relclass
* parameter in the relation represented in the internal model represented in the internal model - private String type
* to store the type of the relation (i.e. affiliation). It corresponds to the subreltype parameter of the relation
* represented in theinternal model
*/
public class RelType implements Serializable {
private String name; // relclass
private String type; // subreltype
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
public static RelType newInstance(String name, String type) {
RelType rel = new RelType();
rel.name = name;
rel.type = type;
return rel;
}
}

View File

@ -0,0 +1,67 @@
package eu.dnetlib.dhp.schema.dump.oaf.graph;
import java.io.Serializable;
import java.util.Objects;
import eu.dnetlib.dhp.schema.dump.oaf.Provenance;
/**
* To represent the gereric relation between two entities. It has the following parameters: - private Node source to
* represent the entity source of the relation - private Node target to represent the entity target of the relation -
* private RelType reltype to represent the semantics of the relation - private Provenance provenance to represent the
* provenance of the relation
*/
public class Relation implements Serializable {
private Node source;
private Node target;
private RelType reltype;
private Provenance provenance;
public Node getSource() {
return source;
}
public void setSource(Node source) {
this.source = source;
}
public Node getTarget() {
return target;
}
public void setTarget(Node target) {
this.target = target;
}
public RelType getReltype() {
return reltype;
}
public void setReltype(RelType reltype) {
this.reltype = reltype;
}
public Provenance getProvenance() {
return provenance;
}
public void setProvenance(Provenance provenance) {
this.provenance = provenance;
}
@Override
public int hashCode() {
return Objects.hash(source.getId(), target.getId(), reltype.getType() + ":" + reltype.getName());
}
public static Relation newInstance(Node source, Node target, RelType reltype, Provenance provenance) {
Relation relation = new Relation();
relation.source = source;
relation.target = target;
relation.reltype = reltype;
relation.provenance = provenance;
return relation;
}
}

View File

@ -0,0 +1,20 @@
package eu.dnetlib.dhp.schema.dump.oaf.graph;
import java.util.List;
/**
* To represent RC entities. It extends eu.dnetlib.dhp.dump.oaf.grap.ResearchInitiative by adding the parameter subject
* to store the list of subjects related to the community
*/
public class ResearchCommunity extends ResearchInitiative {
private List<String> subject;
public List<String> getSubject() {
return subject;
}
public void setSubject(List<String> subject) {
this.subject = subject;
}
}

View File

@ -0,0 +1,75 @@
package eu.dnetlib.dhp.schema.dump.oaf.graph;
import java.io.Serializable;
/**
* To represent entity of type RC/RI. It has the following parameters, which are mostly derived by the profile
* - private
* String id to store the openaire id for the entity. Is has as code 00 and will be created as
* 00|context_____::md5(originalId) private
* String originalId to store the id of the context as provided in the profile
* (i.e. mes)
* - private String name to store the name of the context (got from the label attribute in the context
* definition)
* - private String type to store the type of the context (i.e.: research initiative or research community)
* - private String description to store the description of the context as given in the profile
* -private String
* zenodo_community to store the zenodo community associated to the context (main zenodo community)
*/
public class ResearchInitiative implements Serializable {
private String id; // openaireId
private String originalId; // context id
private String name; // context name
private String type; // context type: research initiative or research community
private String description;
private String zenodo_community;
public String getZenodo_community() {
return zenodo_community;
}
public void setZenodo_community(String zenodo_community) {
this.zenodo_community = zenodo_community;
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getName() {
return name;
}
public void setName(String label) {
this.name = label;
}
public String getOriginalId() {
return originalId;
}
public void setOriginalId(String originalId) {
this.originalId = originalId;
}
public String getDescription() {
return description;
}
public void setDescription(String description) {
this.description = description;
}
}

View File

@ -0,0 +1,88 @@
package eu.dnetlib.dhp.schema.oaf;
import java.io.Serializable;
import java.util.Objects;
/**
* To store information about the classification for the project. The classification depends on the programme. For example
* H2020-EU.3.4.5.3 can be classified as
* H2020-EU.3. => Societal Challenges (level1)
* H2020-EU.3.4. => Transport (level2)
* H2020-EU.3.4.5. => CLEANSKY2 (level3)
* H2020-EU.3.4.5.3. => IADP Fast Rotorcraft (level4)
*
* We decided to explicitly represent up to three levels in the classification.
*
* H2020Classification has the following parameters:
* - private Programme programme to store the information about the programme related to this classification
* - private String level1 to store the information about the level 1 of the classification (Priority or Pillar of the EC)
* - private String level2 to store the information about the level2 af the classification (Objectives (?))
* - private String level3 to store the information about the level3 of the classification
* - private String classification to store the entire classification related to the programme
*/
public class H2020Classification implements Serializable {
private H2020Programme h2020Programme;
private String level1;
private String level2;
private String level3;
private String classification;
public H2020Programme getH2020Programme() {
return h2020Programme;
}
public void setH2020Programme(H2020Programme h2020Programme) {
this.h2020Programme = h2020Programme;
}
public String getLevel1() {
return level1;
}
public void setLevel1(String level1) {
this.level1 = level1;
}
public String getLevel2() {
return level2;
}
public void setLevel2(String level2) {
this.level2 = level2;
}
public String getLevel3() {
return level3;
}
public void setLevel3(String level3) {
this.level3 = level3;
}
public String getClassification() {
return classification;
}
public void setClassification(String classification) {
this.classification = classification;
}
@Override
public boolean equals(Object o) {
if (this == o)
return true;
if (o == null || getClass() != o.getClass())
return false;
H2020Classification h2020classification = (H2020Classification) o;
return Objects.equals(level1, h2020classification.level1) &&
Objects.equals(level2, h2020classification.level2) &&
Objects.equals(level3, h2020classification.level3) &&
Objects.equals(classification, h2020classification.classification) &&
h2020Programme.equals(h2020classification.h2020Programme);
}
}

View File

@ -4,7 +4,13 @@ package eu.dnetlib.dhp.schema.oaf;
import java.io.Serializable;
import java.util.Objects;
public class Programme implements Serializable {
/**
* To store information about the ec programme for the project. It has the following parameters:
* - private String code to store the code of the programme
* - private String description to store the description of the programme
*/
public class H2020Programme implements Serializable {
private String code;
private String description;
@ -31,8 +37,8 @@ public class Programme implements Serializable {
if (o == null || getClass() != o.getClass())
return false;
Programme programme = (Programme) o;
return Objects.equals(code, programme.code);
H2020Programme h2020Programme = (H2020Programme) o;
return Objects.equals(code, h2020Programme.code);
}
}

View File

@ -58,7 +58,35 @@ public class Project extends OafEntity implements Serializable {
private Float fundedamount;
private List<Programme> programme;
private String h2020topiccode;
private String h2020topicdescription;
private List<H2020Classification> h2020classification;
public String getH2020topicdescription() {
return h2020topicdescription;
}
public void setH2020topicdescription(String h2020topicdescription) {
this.h2020topicdescription = h2020topicdescription;
}
public String getH2020topiccode() {
return h2020topiccode;
}
public void setH2020topiccode(String h2020topiccode) {
this.h2020topiccode = h2020topiccode;
}
public List<H2020Classification> getH2020classification() {
return h2020classification;
}
public void setH2020classification(List<H2020Classification> h2020classification) {
this.h2020classification = h2020classification;
}
public Field<String> getWebsiteurl() {
return websiteurl;
@ -268,14 +296,6 @@ public class Project extends OafEntity implements Serializable {
this.fundedamount = fundedamount;
}
public List<Programme> getProgramme() {
return programme;
}
public void setProgramme(List<Programme> programme) {
this.programme = programme;
}
@Override
public void mergeFrom(OafEntity e) {
super.mergeFrom(e);
@ -331,7 +351,9 @@ public class Project extends OafEntity implements Serializable {
? p.getFundedamount()
: fundedamount;
programme = mergeLists(programme, p.getProgramme());
// programme = mergeLists(programme, p.getProgramme());
h2020classification = mergeLists(h2020classification, p.getH2020classification());
mergeOAFDataInfo(e);
}

View File

@ -7,6 +7,8 @@ import java.util.Comparator;
import java.util.List;
import java.util.stream.Collectors;
import eu.dnetlib.dhp.schema.common.LicenseComparator;
public class Result extends OafEntity implements Serializable {
private List<Measure> measures;
@ -245,7 +247,8 @@ public class Result extends OafEntity implements Serializable {
instance = mergeLists(instance, r.getInstance());
if (r.getBestaccessright() != null && compareTrust(this, r) < 0)
if (r.getBestaccessright() != null
&& new LicenseComparator().compare(r.getBestaccessright(), bestaccessright) < 0)
bestaccessright = r.getBestaccessright();
if (r.getResulttype() != null && compareTrust(this, r) < 0)

View File

@ -1,7 +1,14 @@
package eu.dnetlib.doiboost.orcid.model;
package eu.dnetlib.dhp.schema.orcid;
import java.io.Serializable;
import java.util.List;
import com.google.common.collect.Lists;
/**
* This class models the data that are retrieved from orcid publication
*/
public class AuthorData implements Serializable {
@ -10,6 +17,7 @@ public class AuthorData implements Serializable {
private String surname;
private String creditName;
private String errorCode;
private List<String> otherNames;
public String getErrorCode() {
return errorCode;
@ -50,4 +58,15 @@ public class AuthorData implements Serializable {
public void setOid(String oid) {
this.oid = oid;
}
public List<String> getOtherNames() {
return otherNames;
}
public void setOtherNames(List<String> otherNames) {
if (this.otherNames == null) {
this.otherNames = Lists.newArrayList();
}
this.otherNames = otherNames;
}
}

View File

@ -1,30 +0,0 @@
package eu.dnetlib.dhp.schema.scholexplorer;
import java.util.List;
import eu.dnetlib.dhp.schema.oaf.KeyValue;
import eu.dnetlib.dhp.schema.oaf.Relation;
public class DLIRelation extends Relation {
private String dateOfCollection;
private List<KeyValue> collectedFrom;
public List<KeyValue> getCollectedFrom() {
return collectedFrom;
}
public void setCollectedFrom(List<KeyValue> collectedFrom) {
this.collectedFrom = collectedFrom;
}
public String getDateOfCollection() {
return dateOfCollection;
}
public void setDateOfCollection(String dateOfCollection) {
this.dateOfCollection = dateOfCollection;
}
}

View File

@ -2,10 +2,8 @@
package eu.dnetlib.dhp.schema.scholexplorer;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.*;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
@ -78,6 +76,25 @@ public class DLIUnknown extends Oaf implements Serializable {
if ("complete".equalsIgnoreCase(p.completionStatus))
completionStatus = "complete";
dlicollectedfrom = mergeProvenance(dlicollectedfrom, p.getDlicollectedfrom());
if (StringUtils.isEmpty(id) && StringUtils.isNoneEmpty(p.getId()))
id = p.getId();
if (StringUtils.isEmpty(dateofcollection) && StringUtils.isNoneEmpty(p.getDateofcollection()))
dateofcollection = p.getDateofcollection();
if (StringUtils.isEmpty(dateoftransformation) && StringUtils.isNoneEmpty(p.getDateoftransformation()))
dateofcollection = p.getDateoftransformation();
pid = mergeLists(pid, p.getPid());
}
protected <T> List<T> mergeLists(final List<T>... lists) {
return Arrays
.stream(lists)
.filter(Objects::nonNull)
.flatMap(List::stream)
.filter(Objects::nonNull)
.distinct()
.collect(Collectors.toList());
}
private List<ProvenaceInfo> mergeProvenance(

View File

@ -0,0 +1,90 @@
package eu.dnetlib.dhp.schema.scholexplorer
import eu.dnetlib.dhp.schema.oaf.{DataInfo, Field, KeyValue, Qualifier, StructuredProperty}
object OafUtils {
def generateKeyValue(key: String, value: String): KeyValue = {
val kv: KeyValue = new KeyValue()
kv.setKey(key)
kv.setValue(value)
kv.setDataInfo(generateDataInfo("0.9"))
kv
}
def generateDataInfo(trust: String = "0.9", invisibile: Boolean = false): DataInfo = {
val di = new DataInfo
di.setDeletedbyinference(false)
di.setInferred(false)
di.setInvisible(false)
di.setTrust(trust)
di.setProvenanceaction(createQualifier("sysimport:actionset", "dnet:provenanceActions"))
di
}
def createQualifier(cls: String, sch: String): Qualifier = {
createQualifier(cls, cls, sch, sch)
}
def createQualifier(classId: String, className: String, schemeId: String, schemeName: String): Qualifier = {
val q: Qualifier = new Qualifier
q.setClassid(classId)
q.setClassname(className)
q.setSchemeid(schemeId)
q.setSchemename(schemeName)
q
}
def asField[T](value: T): Field[T] = {
val tmp = new Field[T]
tmp.setValue(value)
tmp
}
def createSP(value: String, classId: String,className:String, schemeId: String, schemeName:String): StructuredProperty = {
val sp = new StructuredProperty
sp.setQualifier(createQualifier(classId,className, schemeId, schemeName))
sp.setValue(value)
sp
}
def createSP(value: String, classId: String,className:String, schemeId: String, schemeName:String, dataInfo: DataInfo): StructuredProperty = {
val sp = new StructuredProperty
sp.setQualifier(createQualifier(classId,className, schemeId, schemeName))
sp.setValue(value)
sp.setDataInfo(dataInfo)
sp
}
def createSP(value: String, classId: String, schemeId: String): StructuredProperty = {
val sp = new StructuredProperty
sp.setQualifier(createQualifier(classId, schemeId))
sp.setValue(value)
sp
}
def createSP(value: String, classId: String, schemeId: String, dataInfo: DataInfo): StructuredProperty = {
val sp = new StructuredProperty
sp.setQualifier(createQualifier(classId, schemeId))
sp.setValue(value)
sp.setDataInfo(dataInfo)
sp
}
}

View File

@ -66,6 +66,19 @@
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.commons/commons-compress -->
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-compress</artifactId>
</dependency>
</dependencies>

View File

@ -4,11 +4,15 @@ package eu.dnetlib.dhp.actionmanager.project;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.*;
import org.slf4j.Logger;
@ -16,11 +20,79 @@ import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.actionmanager.project.csvutils.CSVProgramme;
import eu.dnetlib.dhp.actionmanager.project.utils.CSVProgramme;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport;
import scala.Tuple2;
/**
* Among all the programmes provided in the csv file, selects those in H2020 framework that have an english title.
*
* The title is then handled to get the programme description at a certain level. The set of programme titles will then
* be used to associate a classification for the programme.
*
* The programme code describes an hierarchy that can be exploited to provide the classification. To determine the hierarchy
* the code can be split by '.'. If the length of the splitted code is less than or equal to 2 it can be directly used
* as the classification: H2020-EU -> Horizon 2020 Framework Programme (It will never be repeated),
* H2020-EU.1. -> Excellent science, H2020-EU.2. -> Industrial leadership etc.
*
* The codes are ordered and for all of them the concatenation of all the titles (from the element in position 1 of
* the splitted code) handled as below is used to create the classification. For example:
*
* H2020-EU.1.1 -> Excellent science | European Research Council (ERC)
* from H2020-EU.1. -> Excellence science and H2020-EU.1.1. -> European Research Council (ERC)
*
* H2020-EU.3.1.3.1. -> Societal challenges | Health, demographic change and well-being | Treating and managing disease | Treating disease, including developing regenerative medicine
* from H2020-EU.3. -> Societal challenges,
* H2020-EU.3.1. -> Health, demographic change and well-being
* H2020-EU.3.1.3 -> Treating and managing disease
* H2020-EU.3.1.3.1. -> Treating disease, including developing regenerative medicine
*
* The classification up to level three, will be split in dedicated variables, while the complete classification will be stored
* in a variable called classification and provided as shown above.
*
* The programme title is not give in a standardized way:
*
* - Sometimes associated to the higher level in the hierarchy we can find Priority in title other times it is not the
* case. Since it is not uniform, we removed priority from the handled titles:
*
* H2020-EU.1. -> PRIORITY 'Excellent science'
* H2020-EU.2. -> PRIORITY 'Industrial leadership'
* H2020-EU.3. -> PRIORITY 'Societal challenges
*
* will become
*
* H2020-EU.1. -> Excellent science
* H2020-EU.2. -> Industrial leadership
* H2020-EU.3. -> Societal challenges
*
* - Sometimes the title of the parent is repeated in the title for the code, but it is not always the case, so, titles
* associated to previous levels in the hierarchy are removed from the code title.
*
* H2020-EU.1.2. -> EXCELLENT SCIENCE - Future and Emerging Technologies (FET)
* H2020-EU.2.2. -> INDUSTRIAL LEADERSHIP - Access to risk finance
* H2020-EU.3.4. -> SOCIETAL CHALLENGES - Smart, Green And Integrated Transport
*
* will become
*
* H2020-EU.1.2. -> Future and Emerging Technologies (FET)
* H2020-EU.2.2. -> Access to risk finance
* H2020-EU.3.4. -> Smart, Green And Integrated Transport
*
* This holds at all levels in the hierarchy. Hence
*
* H2020-EU.2.1.2. -> INDUSTRIAL LEADERSHIP - Leadership in enabling and industrial technologies Nanotechnologies
*
* will become
*
* H2020-EU.2.1.2. -> Nanotechnologies
*
* - Euratom is not given in the way the other programmes are: H2020-EU. but H2020-Euratom- . So we need to write
* specific code for it
*
*
*
*/
public class PrepareProgramme {
private static final Logger log = LoggerFactory.getLogger(PrepareProgramme.class);
@ -69,49 +141,149 @@ public class PrepareProgramme {
private static void exec(SparkSession spark, String programmePath, String outputPath) {
Dataset<CSVProgramme> programme = readPath(spark, programmePath, CSVProgramme.class);
programme
JavaRDD<CSVProgramme> h2020Programmes = programme
.toJavaRDD()
.filter(p -> !p.getCode().contains("FP7"))
.filter(p -> p.getFrameworkProgramme().trim().equalsIgnoreCase("H2020"))
.mapToPair(csvProgramme -> new Tuple2<>(csvProgramme.getCode(), csvProgramme))
.reduceByKey((a, b) -> {
if (StringUtils.isEmpty(a.getShortTitle())) {
if (StringUtils.isEmpty(b.getShortTitle())) {
if (StringUtils.isEmpty(a.getTitle())) {
if (StringUtils.isNotEmpty(b.getTitle())) {
a.setShortTitle(b.getTitle());
a.setLanguage(b.getLanguage());
}
} else {// notIsEmpty a.getTitle
if (StringUtils.isEmpty(b.getTitle())) {
a.setShortTitle(a.getTitle());
} else {
if (b.getLanguage().equalsIgnoreCase("en")) {
a.setShortTitle(b.getTitle());
a.setLanguage(b.getLanguage());
} else {
a.setShortTitle(a.getTitle());
}
}
}
} else {// not isEmpty b.getShortTitle
a.setShortTitle(b.getShortTitle());
// a.setLanguage(b.getLanguage());
if (!a.getLanguage().equals("en")) {
if (b.getLanguage().equalsIgnoreCase("en")) {
a.setTitle(b.getTitle());
a.setLanguage(b.getLanguage());
}
}
if (StringUtils.isEmpty(a.getShortTitle())) {
if (!StringUtils.isEmpty(b.getShortTitle())) {
a.setShortTitle(b.getShortTitle());
}
}
return a;
})
.map(p -> {
CSVProgramme csvProgramme = p._2();
if (StringUtils.isEmpty(csvProgramme.getShortTitle())) {
csvProgramme.setShortTitle(csvProgramme.getTitle());
String programmeTitle = csvProgramme.getTitle().trim();
if (programmeTitle.length() > 8 && programmeTitle.substring(0, 8).equalsIgnoreCase("PRIORITY")) {
programmeTitle = programmeTitle.substring(9);
if (programmeTitle.charAt(0) == '\'') {
programmeTitle = programmeTitle.substring(1);
}
if (programmeTitle.charAt(programmeTitle.length() - 1) == '\'') {
programmeTitle = programmeTitle.substring(0, programmeTitle.length() - 1);
}
csvProgramme.setTitle(programmeTitle);
}
return OBJECT_MAPPER.writeValueAsString(csvProgramme);
return csvProgramme;
});
// prepareClassification(h2020Programmes);
JavaSparkContext jsc = new JavaSparkContext(spark.sparkContext());
JavaRDD<CSVProgramme> rdd = jsc.parallelize(prepareClassification(h2020Programmes), 1);
rdd
.map(csvProgramme -> {
String tmp = OBJECT_MAPPER.writeValueAsString(csvProgramme);
return tmp;
})
.saveAsTextFile(outputPath);
}
private static List<CSVProgramme> prepareClassification(JavaRDD<CSVProgramme> h2020Programmes) {
Object[] codedescription = h2020Programmes
.map(
value -> new Tuple2<>(value.getCode(),
new Tuple2<String, String>(value.getTitle(), value.getShortTitle())))
.collect()
.toArray();
for (int i = 0; i < codedescription.length - 1; i++) {
for (int j = i + 1; j < codedescription.length; j++) {
Tuple2<String, Tuple2<String, String>> t2i = (Tuple2<String, Tuple2<String, String>>) codedescription[i];
Tuple2<String, Tuple2<String, String>> t2j = (Tuple2<String, Tuple2<String, String>>) codedescription[j];
if (t2i._1().compareTo(t2j._1()) > 0) {
Tuple2<String, Tuple2<String, String>> temp = t2i;
codedescription[i] = t2j;
codedescription[j] = temp;
}
}
}
Map<String, Tuple2<String, String>> map = new HashMap<>();
for (int j = 0; j < codedescription.length; j++) {
Tuple2<String, Tuple2<String, String>> entry = (Tuple2<String, Tuple2<String, String>>) codedescription[j];
String ent = entry._1();
if (ent.contains("Euratom-")) {
ent = ent.replace("-Euratom-", ".Euratom.");
}
String[] tmp = ent.split("\\.");
if (tmp.length <= 2) {
if (StringUtils.isEmpty(entry._2()._2())) {
map.put(entry._1(), new Tuple2<String, String>(entry._2()._1(), entry._2()._1()));
} else {
map.put(entry._1(), entry._2());
}
} else {
if (ent.endsWith(".")) {
ent = ent.substring(0, ent.length() - 1);
}
String key = ent.substring(0, ent.lastIndexOf(".") + 1);
if (key.contains("Euratom")) {
key = key.replace(".Euratom.", "-Euratom-");
ent = ent.replace(".Euratom.", "-Euratom-");
if (key.endsWith("-")) {
key = key.substring(0, key.length() - 1);
}
}
String current = entry._2()._1();
if (!ent.contains("Euratom")) {
String parent;
String tmp_key = tmp[0] + ".";
for (int i = 1; i < tmp.length - 1; i++) {
tmp_key += tmp[i] + ".";
parent = map.get(tmp_key)._1().toLowerCase().trim();
if (parent.contains("|")) {
parent = parent.substring(parent.lastIndexOf("|") + 1).trim();
}
if (current.trim().length() > parent.length()
&& current.toLowerCase().trim().substring(0, parent.length()).equals(parent)) {
current = current.substring(parent.length() + 1);
if (current.trim().charAt(0) == '-' || current.trim().charAt(0) == '') {
current = current.trim().substring(1).trim();
}
}
}
}
String shortTitle = entry._2()._2();
if (StringUtils.isEmpty(shortTitle)) {
shortTitle = current;
}
Tuple2<String, String> newEntry = new Tuple2<>(map.get(key)._1() + " | " + current,
map.get(key)._2() + " | " + shortTitle);
map.put(ent + ".", newEntry);
}
}
return h2020Programmes.map(csvProgramme -> {
String code = csvProgramme.getCode();
if (!code.endsWith(".") && !code.contains("Euratom")
&& !code.equals("H2020-EC"))
code += ".";
csvProgramme.setClassification(map.get(code)._1());
csvProgramme.setClassification_short(map.get(code)._2());
return csvProgramme;
}).collect();
}
public static <R> Dataset<R> readPath(
SparkSession spark, String inputPath, Class<R> clazz) {
return spark

View File

@ -6,9 +6,7 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.util.*;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset;
@ -20,12 +18,16 @@ import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.actionmanager.project.csvutils.CSVProgramme;
import eu.dnetlib.dhp.actionmanager.project.csvutils.CSVProject;
import eu.dnetlib.dhp.actionmanager.project.utils.CSVProgramme;
import eu.dnetlib.dhp.actionmanager.project.utils.CSVProject;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport;
import scala.Tuple2;
/**
* Selects only the relevant information collected with the projects: project grant agreement, project programme code and
* project topic code for the projects that are also collected from OpenAIRE.
*/
public class PrepareProjects {
private static final Logger log = LoggerFactory.getLogger(PrepareProgramme.class);
@ -97,10 +99,14 @@ public class PrepareProjects {
if (csvProject.isPresent()) {
String[] programme = csvProject.get().getProgramme().split(";");
String topic = csvProject.get().getTopics();
Arrays
.stream(programme)
.forEach(p -> {
CSVProject proj = new CSVProject();
proj.setTopics(topic);
proj.setProgramme(p);
proj.setId(csvProject.get().getId());
csvProjectList.add(proj);

View File

@ -3,6 +3,9 @@ package eu.dnetlib.dhp.actionmanager.project;
import java.io.Serializable;
/**
* Class to store the grande agreement (code) of the collected projects
*/
public class ProjectSubset implements Serializable {
private String code;
@ -14,4 +17,5 @@ public class ProjectSubset implements Serializable {
public void setCode(String code) {
this.code = code;
}
}

View File

@ -25,6 +25,10 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.DbClient;
/**
* queries the OpenAIRE database to get the grant agreement of projects collected from corda__h2020. The code collected
* are written on hdfs using the ProjectSubset model
*/
public class ReadProjectsFromDB implements Closeable {
private final DbClient dbClient;
@ -33,7 +37,7 @@ public class ReadProjectsFromDB implements Closeable {
private final BufferedWriter writer;
private final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private final static String query = "SELECT code " +
private final static String query = "SELECT code " +
"from projects where id like 'corda__h2020%' ";
public static void main(final String[] args) throws Exception {
@ -72,7 +76,6 @@ public class ReadProjectsFromDB implements Closeable {
try {
ProjectSubset p = new ProjectSubset();
p.setCode(rs.getString("code"));
return Arrays.asList(p);
} catch (final Exception e) {

View File

@ -3,47 +3,53 @@ package eu.dnetlib.dhp.actionmanager.project;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Objects;
import java.util.Optional;
import java.util.function.Consumer;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.api.java.function.MapGroupsFunction;
import org.apache.spark.rdd.SequenceFileRDDFunctions;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.actionmanager.project.csvutils.CSVProgramme;
import eu.dnetlib.dhp.actionmanager.project.csvutils.CSVProject;
import eu.dnetlib.dhp.actionmanager.project.utils.CSVProgramme;
import eu.dnetlib.dhp.actionmanager.project.utils.CSVProject;
import eu.dnetlib.dhp.actionmanager.project.utils.EXCELTopic;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.schema.action.AtomicAction;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.Programme;
import eu.dnetlib.dhp.schema.oaf.H2020Classification;
import eu.dnetlib.dhp.schema.oaf.H2020Programme;
import eu.dnetlib.dhp.schema.oaf.Project;
import eu.dnetlib.dhp.utils.DHPUtils;
import scala.Function1;
import scala.Tuple2;
import scala.runtime.BoxedUnit;
/**
* Class that makes the ActionSet. To prepare the AS two joins are needed
*
* 1. join betweem the collected project subset and the programme extenden with the classification on the grant agreement.
* For each entry a
* eu.dnetlib.dhp.Project entity is created and the information about H2020Classification is set together with the
* h2020topiccode variable
* 2. join between the output of the previous step and the topic information on the topic code. Each time a match is
* found the h2020topicdescription variable is set.
*
* To produce one single entry for each project code a step of groupoing is needed: each project can be associated to more
* than one programme.
*
*
*/
public class SparkAtomicActionJob {
private static final Logger log = LoggerFactory.getLogger(SparkAtomicActionJob.class);
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
@ -77,6 +83,9 @@ public class SparkAtomicActionJob {
final String programmePath = parser.get("programmePath");
log.info("programmePath {}: ", programmePath);
final String topicPath = parser.get("topicPath");
log.info("topic path {}: ", topicPath);
SparkConf conf = new SparkConf();
runWithSparkSession(
@ -88,6 +97,7 @@ public class SparkAtomicActionJob {
spark,
projectPath,
programmePath,
topicPath,
outputPath);
});
}
@ -98,31 +108,54 @@ public class SparkAtomicActionJob {
private static void getAtomicActions(SparkSession spark, String projectPatH,
String programmePath,
String topicPath,
String outputPath) {
Dataset<CSVProject> project = readPath(spark, projectPatH, CSVProject.class);
Dataset<CSVProgramme> programme = readPath(spark, programmePath, CSVProgramme.class);
Dataset<EXCELTopic> topic = readPath(spark, topicPath, EXCELTopic.class);
project
Dataset<Project> aaproject = project
.joinWith(programme, project.col("programme").equalTo(programme.col("code")), "left")
.map(c -> {
CSVProject csvProject = c._1();
Optional<CSVProgramme> csvProgramme = Optional.ofNullable(c._2());
if (csvProgramme.isPresent()) {
Project p = new Project();
p
.setId(
createOpenaireId(
ModelSupport.entityIdPrefix.get("project"),
"corda__h2020", csvProject.getId()));
Programme pm = new Programme();
pm.setCode(csvProject.getProgramme());
pm.setDescription(csvProgramme.get().getShortTitle());
p.setProgramme(Arrays.asList(pm));
return p;
}
.map((MapFunction<Tuple2<CSVProject, CSVProgramme>, Project>) c -> {
return null;
CSVProject csvProject = c._1();
Optional<CSVProgramme> ocsvProgramme = Optional.ofNullable(c._2());
return Optional
.ofNullable(c._2())
.map(csvProgramme -> {
Project pp = new Project();
pp
.setId(
createOpenaireId(
ModelSupport.entityIdPrefix.get("project"),
"corda__h2020", csvProject.getId()));
pp.setH2020topiccode(csvProject.getTopics());
H2020Programme pm = new H2020Programme();
H2020Classification h2020classification = new H2020Classification();
pm.setCode(csvProject.getProgramme());
h2020classification.setClassification(ocsvProgramme.get().getClassification());
h2020classification.setH2020Programme(pm);
setLevelsandProgramme(h2020classification, ocsvProgramme.get().getClassification_short());
// setProgramme(h2020classification, ocsvProgramme.get().getClassification());
pp.setH2020classification(Arrays.asList(h2020classification));
return pp;
})
.orElse(null);
}, Encoders.bean(Project.class));
aaproject
.joinWith(topic, aaproject.col("h2020topiccode").equalTo(topic.col("code")))
.map((MapFunction<Tuple2<Project, EXCELTopic>, Project>) p -> {
Optional<EXCELTopic> op = Optional.ofNullable(p._2());
Project rp = p._1();
if (op.isPresent()) {
rp.setH2020topicdescription(op.get().getTitle());
}
return rp;
}, Encoders.bean(Project.class))
.filter(Objects::nonNull)
.groupByKey(
@ -144,6 +177,24 @@ public class SparkAtomicActionJob {
}
private static void setLevelsandProgramme(H2020Classification h2020Classification, String classification_short) {
String[] tmp = classification_short.split(" \\| ");
h2020Classification.setLevel1(tmp[0]);
if (tmp.length > 1) {
h2020Classification.setLevel2(tmp[1]);
}
if (tmp.length > 2) {
h2020Classification.setLevel3(tmp[2]);
}
h2020Classification.getH2020Programme().setDescription(tmp[tmp.length - 1]);
}
// private static void setProgramme(H2020Classification h2020Classification, String classification) {
// String[] tmp = classification.split(" \\| ");
//
// h2020Classification.getH2020Programme().setDescription(tmp[tmp.length - 1]);
// }
public static <R> Dataset<R> readPath(
SparkSession spark, String inputPath, Class<R> clazz) {
return spark

View File

@ -1,52 +0,0 @@
package eu.dnetlib.dhp.actionmanager.project.csvutils;
import java.io.Serializable;
public class CSVProgramme implements Serializable {
private String rcn;
private String code;
private String title;
private String shortTitle;
private String language;
public String getRcn() {
return rcn;
}
public void setRcn(String rcn) {
this.rcn = rcn;
}
public String getCode() {
return code;
}
public void setCode(String code) {
this.code = code;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getShortTitle() {
return shortTitle;
}
public void setShortTitle(String shortTitle) {
this.shortTitle = shortTitle;
}
public String getLanguage() {
return language;
}
public void setLanguage(String language) {
this.language = language;
}
}

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.actionmanager.project.csvutils;
package eu.dnetlib.dhp.actionmanager.project.utils;
import java.io.IOException;
import java.util.ArrayList;
@ -10,6 +10,9 @@ import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVRecord;
import org.apache.commons.lang.reflect.FieldUtils;
/**
* Reads a generic csv and maps it into classes that mirror its schema
*/
public class CSVParser {
public <R> List<R> parse(String csvFile, String classForName)

View File

@ -0,0 +1,146 @@
package eu.dnetlib.dhp.actionmanager.project.utils;
import java.io.Serializable;
/**
* The model for the programme csv file
*/
public class CSVProgramme implements Serializable {
private String parentProgramme;
private String frameworkProgramme;
private String startDate;
private String endDate;
private String objective;
private String subjects;
private String legalBasis;
private String call;
private String rcn;
private String code;
private String title;
private String shortTitle;
private String language;
private String classification;
private String classification_short;
public String getClassification_short() {
return classification_short;
}
public void setClassification_short(String classification_short) {
this.classification_short = classification_short;
}
public String getClassification() {
return classification;
}
public void setClassification(String classification) {
this.classification = classification;
}
public String getRcn() {
return rcn;
}
public void setRcn(String rcn) {
this.rcn = rcn;
}
public String getCode() {
return code;
}
public void setCode(String code) {
this.code = code;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getShortTitle() {
return shortTitle;
}
public void setShortTitle(String shortTitle) {
this.shortTitle = shortTitle;
}
public String getLanguage() {
return language;
}
public void setLanguage(String language) {
this.language = language;
}
public String getParentProgramme() {
return parentProgramme;
}
public void setParentProgramme(String parentProgramme) {
this.parentProgramme = parentProgramme;
}
public String getFrameworkProgramme() {
return frameworkProgramme;
}
public void setFrameworkProgramme(String frameworkProgramme) {
this.frameworkProgramme = frameworkProgramme;
}
public String getStartDate() {
return startDate;
}
public void setStartDate(String startDate) {
this.startDate = startDate;
}
public String getEndDate() {
return endDate;
}
public void setEndDate(String endDate) {
this.endDate = endDate;
}
public String getObjective() {
return objective;
}
public void setObjective(String objective) {
this.objective = objective;
}
public String getSubjects() {
return subjects;
}
public void setSubjects(String subjects) {
this.subjects = subjects;
}
public String getLegalBasis() {
return legalBasis;
}
public void setLegalBasis(String legalBasis) {
this.legalBasis = legalBasis;
}
public String getCall() {
return call;
}
public void setCall(String call) {
this.call = call;
}
}

View File

@ -1,8 +1,11 @@
package eu.dnetlib.dhp.actionmanager.project.csvutils;
package eu.dnetlib.dhp.actionmanager.project.utils;
import java.io.Serializable;
/**
* the mmodel for the projects csv file
*/
public class CSVProject implements Serializable {
private String rcn;
private String id;

View File

@ -0,0 +1,75 @@
package eu.dnetlib.dhp.actionmanager.project.utils;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.reflect.FieldUtils;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
/**
* Reads a generic excel file and maps it into classes that mirror its schema
*/
public class EXCELParser {
public <R> List<R> parse(InputStream file, String classForName)
throws ClassNotFoundException, IOException, IllegalAccessException, InstantiationException,
InvalidFormatException {
// OPCPackage pkg = OPCPackage.open(httpConnector.getInputSourceAsStream(URL));
OPCPackage pkg = OPCPackage.open(file);
XSSFWorkbook wb = new XSSFWorkbook(pkg);
XSSFSheet sheet = wb.getSheet("cordisref-H2020topics");
List<R> ret = new ArrayList<>();
DataFormatter dataFormatter = new DataFormatter();
Iterator<Row> rowIterator = sheet.rowIterator();
List<String> headers = new ArrayList<>();
int count = 0;
while (rowIterator.hasNext()) {
Row row = rowIterator.next();
if (count == 0) {
Iterator<Cell> cellIterator = row.cellIterator();
while (cellIterator.hasNext()) {
Cell cell = cellIterator.next();
headers.add(dataFormatter.formatCellValue(cell));
}
} else {
Class<?> clazz = Class.forName("eu.dnetlib.dhp.actionmanager.project.utils.EXCELTopic");
final Object cc = clazz.newInstance();
for (int i = 0; i < headers.size(); i++) {
Cell cell = row.getCell(i);
String value = dataFormatter.formatCellValue(cell);
FieldUtils.writeField(cc, headers.get(i), dataFormatter.formatCellValue(cell), true);
}
EXCELTopic et = (EXCELTopic) cc;
if (StringUtils.isNotBlank(et.getRcn())) {
ret.add((R) cc);
}
}
count += 1;
}
return ret;
}
}

View File

@ -0,0 +1,127 @@
package eu.dnetlib.dhp.actionmanager.project.utils;
import java.io.Serializable;
/**
* the model class for the topic excel file
*/
public class EXCELTopic implements Serializable {
private String rcn;
private String language;
private String code;
private String parentProgramme;
private String frameworkProgramme;
private String startDate;
private String endDate;
private String title;
private String shortTitle;
private String objective;
private String subjects;
private String legalBasis;
private String call;
public String getRcn() {
return rcn;
}
public void setRcn(String rcn) {
this.rcn = rcn;
}
public String getLanguage() {
return language;
}
public void setLanguage(String language) {
this.language = language;
}
public String getCode() {
return code;
}
public void setCode(String code) {
this.code = code;
}
public String getParentProgramme() {
return parentProgramme;
}
public void setParentProgramme(String parentProgramme) {
this.parentProgramme = parentProgramme;
}
public String getFrameworkProgramme() {
return frameworkProgramme;
}
public void setFrameworkProgramme(String frameworkProgramme) {
this.frameworkProgramme = frameworkProgramme;
}
public String getStartDate() {
return startDate;
}
public void setStartDate(String startDate) {
this.startDate = startDate;
}
public String getEndDate() {
return endDate;
}
public void setEndDate(String endDate) {
this.endDate = endDate;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getShortTitle() {
return shortTitle;
}
public void setShortTitle(String shortTitle) {
this.shortTitle = shortTitle;
}
public String getObjective() {
return objective;
}
public void setObjective(String objective) {
this.objective = objective;
}
public String getSubjects() {
return subjects;
}
public void setSubjects(String subjects) {
this.subjects = subjects;
}
public String getLegalBasis() {
return legalBasis;
}
public void setLegalBasis(String legalBasis) {
this.legalBasis = legalBasis;
}
public String getCall() {
return call;
}
public void setCall(String call) {
this.call = call;
}
}

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.actionmanager.project.csvutils;
package eu.dnetlib.dhp.actionmanager.project.utils;
import java.io.BufferedWriter;
import java.io.Closeable;
@ -20,6 +20,9 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.actionmanager.project.httpconnector.HttpConnector;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
/**
* Applies the parsing of a csv file and writes the Serialization of it in hdfs
*/
public class ReadCSV implements Closeable {
private static final Log log = LogFactory.getLog(ReadCSV.class);
private final Configuration conf;

View File

@ -0,0 +1,98 @@
package eu.dnetlib.dhp.actionmanager.project.utils;
import java.io.*;
import java.nio.charset.StandardCharsets;
import org.apache.commons.io.IOUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.actionmanager.project.httpconnector.HttpConnector;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
/**
* Applies the parsing of an excel file and writes the Serialization of it in hdfs
*/
public class ReadExcel implements Closeable {
private static final Log log = LogFactory.getLog(ReadCSV.class);
private final Configuration conf;
private final BufferedWriter writer;
private final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private InputStream excelFile;
public static void main(final String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils
.toString(
ReadCSV.class
.getResourceAsStream(
"/eu/dnetlib/dhp/actionmanager/project/parameters.json")));
parser.parseArgument(args);
final String fileURL = parser.get("fileURL");
final String hdfsPath = parser.get("hdfsPath");
final String hdfsNameNode = parser.get("hdfsNameNode");
final String classForName = parser.get("classForName");
try (final ReadExcel readExcel = new ReadExcel(hdfsPath, hdfsNameNode, fileURL)) {
log.info("Getting Excel file...");
readExcel.execute(classForName);
}
}
public void execute(final String classForName) throws Exception {
EXCELParser excelParser = new EXCELParser();
excelParser
.parse(excelFile, classForName)
.stream()
.forEach(p -> write(p));
}
@Override
public void close() throws IOException {
writer.close();
}
public ReadExcel(
final String hdfsPath,
final String hdfsNameNode,
final String fileURL)
throws Exception {
this.conf = new Configuration();
this.conf.set("fs.defaultFS", hdfsNameNode);
HttpConnector httpConnector = new HttpConnector();
FileSystem fileSystem = FileSystem.get(this.conf);
Path hdfsWritePath = new Path(hdfsPath);
FSDataOutputStream fsDataOutputStream = null;
if (fileSystem.exists(hdfsWritePath)) {
fileSystem.delete(hdfsWritePath, false);
}
fsDataOutputStream = fileSystem.create(hdfsWritePath);
this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8));
this.excelFile = httpConnector.getInputSourceAsStream(fileURL);
;
}
protected void write(final Object p) {
try {
writer.write(OBJECT_MAPPER.writeValueAsString(p));
writer.newLine();
} catch (final Exception e) {
throw new RuntimeException(e);
}
}
}

View File

@ -17,6 +17,12 @@
"paramDescription": "the URL from where to get the programme file",
"paramRequired": true
},
{
"paramName": "tp",
"paramLongName": "topicPath",
"paramDescription": "the URL from where to get the topic file",
"paramRequired": true
},
{
"paramName": "o",
"paramLongName": "outputPath",

View File

@ -31,6 +31,10 @@
<name>spark2SqlQueryExecutionListeners</name>
<value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
</property>
<property>
<name>oozie.launcher.mapreduce.user.classpath.first</name>
<value>true</value>
</property>
<property>
<name>sparkExecutorNumber</name>
<value>4</value>

View File

@ -10,6 +10,10 @@
<description>the url where to get the programme file</description>
</property>
<property>
<name>topicFileURL</name>
<description>the url where to get the topic file</description>
</property>
<property>
<name>outputPath</name>
<description>path where to store the action set</description>
@ -33,11 +37,11 @@
<action name="get_project_file">
<java>
<main-class>eu.dnetlib.dhp.actionmanager.project.csvutils.ReadCSV</main-class>
<main-class>eu.dnetlib.dhp.actionmanager.project.utils.ReadCSV</main-class>
<arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
<arg>--fileURL</arg><arg>${projectFileURL}</arg>
<arg>--hdfsPath</arg><arg>${workingDir}/projects</arg>
<arg>--classForName</arg><arg>eu.dnetlib.dhp.actionmanager.project.csvutils.CSVProject</arg>
<arg>--classForName</arg><arg>eu.dnetlib.dhp.actionmanager.project.utils.CSVProject</arg>
</java>
<ok to="get_programme_file"/>
<error to="Kill"/>
@ -45,11 +49,23 @@
<action name="get_programme_file">
<java>
<main-class>eu.dnetlib.dhp.actionmanager.project.csvutils.ReadCSV</main-class>
<main-class>eu.dnetlib.dhp.actionmanager.project.utils.ReadCSV</main-class>
<arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
<arg>--fileURL</arg><arg>${programmeFileURL}</arg>
<arg>--hdfsPath</arg><arg>${workingDir}/programme</arg>
<arg>--classForName</arg><arg>eu.dnetlib.dhp.actionmanager.project.csvutils.CSVProgramme</arg>
<arg>--classForName</arg><arg>eu.dnetlib.dhp.actionmanager.project.utils.CSVProgramme</arg>
</java>
<ok to="get_topic_file"/>
<error to="Kill"/>
</action>
<action name="get_topic_file">
<java>
<main-class>eu.dnetlib.dhp.actionmanager.project.utils.ReadExcel</main-class>
<arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
<arg>--fileURL</arg><arg>${topicFileURL}</arg>
<arg>--hdfsPath</arg><arg>${workingDir}/topic</arg>
<arg>--classForName</arg><arg>eu.dnetlib.dhp.actionmanager.project.utils.EXCELTopic</arg>
</java>
<ok to="read_projects"/>
<error to="Kill"/>
@ -136,6 +152,7 @@
</spark-opts>
<arg>--projectPath</arg><arg>${workingDir}/preparedProjects</arg>
<arg>--programmePath</arg><arg>${workingDir}/preparedProgramme</arg>
<arg>--topicPath</arg><arg>${workingDir}/topic</arg>
<arg>--outputPath</arg><arg>${outputPath}</arg>
</spark>
<ok to="End"/>

View File

@ -1,27 +1,16 @@
package eu.dnetlib.dhp.actionmanager.project;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;
import org.apache.commons.io.IOUtils;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import eu.dnetlib.dhp.actionmanager.project.csvutils.CSVParser;
import eu.dnetlib.dhp.actionmanager.project.utils.CSVParser;
public class CSVParserTest {
private static Path workingDir;
@BeforeAll
public static void beforeAll() throws IOException {
workingDir = Files.createTempDirectory(CSVParserTest.class.getSimpleName());
}
@Test
public void readProgrammeTest() throws Exception {
@ -33,9 +22,10 @@ public class CSVParserTest {
CSVParser csvParser = new CSVParser();
List<Object> pl = csvParser.parse(programmecsv, "eu.dnetlib.dhp.actionmanager.project.csvutils.CSVProgramme");
List<Object> pl = csvParser.parse(programmecsv, "eu.dnetlib.dhp.actionmanager.project.utils.CSVProgramme");
System.out.println(pl.size());
Assertions.assertEquals(24, pl.size());
}
}

View File

@ -0,0 +1,44 @@
package eu.dnetlib.dhp.actionmanager.project;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import eu.dnetlib.dhp.actionmanager.project.httpconnector.CollectorServiceException;
import eu.dnetlib.dhp.actionmanager.project.httpconnector.HttpConnector;
import eu.dnetlib.dhp.actionmanager.project.utils.EXCELParser;
@Disabled
public class EXCELParserTest {
private static Path workingDir;
private HttpConnector httpConnector = new HttpConnector();
private static final String URL = "http://cordis.europa.eu/data/reference/cordisref-H2020topics.xlsx";
@BeforeAll
public static void beforeAll() throws IOException {
workingDir = Files.createTempDirectory(CSVParserTest.class.getSimpleName());
}
@Test
public void test1() throws CollectorServiceException, IOException, InvalidFormatException, ClassNotFoundException,
IllegalAccessException, InstantiationException {
EXCELParser excelParser = new EXCELParser();
List<Object> pl = excelParser
.parse(httpConnector.getInputSourceAsStream(URL), "eu.dnetlib.dhp.actionmanager.project.utils.ExcelTopic");
Assertions.assertEquals(3837, pl.size());
}
}

View File

@ -21,29 +21,29 @@ import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.actionmanager.project.csvutils.CSVProgramme;
import eu.dnetlib.dhp.actionmanager.project.utils.CSVProgramme;
public class PrepareProgrammeTest {
public class PrepareH2020ProgrammeTest {
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private static final ClassLoader cl = eu.dnetlib.dhp.actionmanager.project.PrepareProgrammeTest.class
private static final ClassLoader cl = PrepareH2020ProgrammeTest.class
.getClassLoader();
private static SparkSession spark;
private static Path workingDir;
private static final Logger log = LoggerFactory
.getLogger(eu.dnetlib.dhp.actionmanager.project.PrepareProgrammeTest.class);
.getLogger(PrepareH2020ProgrammeTest.class);
@BeforeAll
public static void beforeAll() throws IOException {
workingDir = Files
.createTempDirectory(eu.dnetlib.dhp.actionmanager.project.PrepareProgrammeTest.class.getSimpleName());
.createTempDirectory(PrepareH2020ProgrammeTest.class.getSimpleName());
log.info("using work dir {}", workingDir);
SparkConf conf = new SparkConf();
conf.setAppName(eu.dnetlib.dhp.actionmanager.project.PrepareProgrammeTest.class.getSimpleName());
conf.setAppName(PrepareH2020ProgrammeTest.class.getSimpleName());
conf.setMaster("local[*]");
conf.set("spark.driver.host", "localhost");
@ -54,7 +54,7 @@ public class PrepareProgrammeTest {
spark = SparkSession
.builder()
.appName(PrepareProgrammeTest.class.getSimpleName())
.appName(PrepareH2020ProgrammeTest.class.getSimpleName())
.config(conf)
.getOrCreate();
}
@ -88,7 +88,62 @@ public class PrepareProgrammeTest {
Dataset<CSVProgramme> verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(CSVProgramme.class));
Assertions.assertEquals(0, verificationDataset.filter("shortTitle =''").count());
Assertions.assertEquals(0, verificationDataset.filter("title =''").count());
Assertions.assertEquals(0, verificationDataset.filter("classification = ''").count());
// tmp.foreach(csvProgramme -> System.out.println(OBJECT_MAPPER.writeValueAsString(csvProgramme)));
Assertions
.assertEquals(
"Societal challenges | Smart, Green And Integrated Transport | CLEANSKY2 | IADP Fast Rotorcraft",
verificationDataset
.filter("code = 'H2020-EU.3.4.5.3.'")
.select("classification")
.collectAsList()
.get(0)
.getString(0));
Assertions
.assertEquals(
"Euratom | Indirect actions | European Fusion Development Agreement",
verificationDataset
.filter("code = 'H2020-Euratom-1.9.'")
.select("classification")
.collectAsList()
.get(0)
.getString(0));
Assertions
.assertEquals(
"Industrial leadership | Leadership in enabling and industrial technologies | Advanced manufacturing and processing | New sustainable business models",
verificationDataset
.filter("code = 'H2020-EU.2.1.5.4.'")
.select("classification")
.collectAsList()
.get(0)
.getString(0));
Assertions
.assertEquals(
"Excellent science | Future and Emerging Technologies (FET) | FET Open",
verificationDataset
.filter("code = 'H2020-EU.1.2.1.'")
.select("classification")
.collectAsList()
.get(0)
.getString(0));
Assertions
.assertEquals(
"Industrial leadership | Leadership in enabling and industrial technologies | Biotechnology",
verificationDataset
.filter("code = 'H2020-EU.2.1.4.'")
.select("classification")
.collectAsList()
.get(0)
.getString(0));
}
}

Some files were not shown because too many files have changed in this diff Show More