forked from D-Net/dnet-hadoop
merge with master
This commit is contained in:
commit
f6b7c297a8
|
@ -15,12 +15,12 @@
|
|||
<snapshotRepository>
|
||||
<id>dnet45-snapshots</id>
|
||||
<name>DNet45 Snapshots</name>
|
||||
<url>http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-snapshots</url>
|
||||
<url>https://maven.d4science.org/nexus/content/repositories/dnet45-snapshots</url>
|
||||
<layout>default</layout>
|
||||
</snapshotRepository>
|
||||
<repository>
|
||||
<id>dnet45-releases</id>
|
||||
<url>http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-releases</url>
|
||||
<url>https://maven.d4science.org/nexus/content/repositories/dnet45-releases</url>
|
||||
</repository>
|
||||
</distributionManagement>
|
||||
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_annotation_type_member_declaration" value="do not insert"/>
|
||||
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_throws" value="do not insert"/>
|
||||
<setting id="org.eclipse.jdt.core.formatter.parentheses_positions_in_switch_statement" value="common_lines"/>
|
||||
<setting id="org.eclipse.jdt.core.formatter.comment.format_javadoc_comments" value="true"/>
|
||||
<setting id="org.eclipse.jdt.core.formatter.comment.format_javadoc_comments" value="false"/>
|
||||
<setting id="org.eclipse.jdt.core.formatter.indentation.size" value="4"/>
|
||||
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_postfix_operator" value="do not insert"/>
|
||||
<setting id="org.eclipse.jdt.core.formatter.parentheses_positions_in_enum_constant_declaration" value="common_lines"/>
|
||||
|
|
|
@ -87,6 +87,22 @@
|
|||
<groupId>org.postgresql</groupId>
|
||||
<artifactId>postgresql</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.squareup.okhttp3</groupId>
|
||||
<artifactId>okhttp</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>eu.dnetlib</groupId>
|
||||
<artifactId>dnet-pace-core</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>eu.dnetlib.dhp</groupId>
|
||||
<artifactId>dhp-schemas</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
</project>
|
||||
|
|
|
@ -0,0 +1,117 @@
|
|||
|
||||
package eu.dnetlib.dhp.common;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.Serializable;
|
||||
|
||||
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
|
||||
import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream;
|
||||
import org.apache.hadoop.fs.*;
|
||||
|
||||
public class MakeTarArchive implements Serializable {
|
||||
|
||||
private static TarArchiveOutputStream getTar(FileSystem fileSystem, String outputPath) throws IOException {
|
||||
Path hdfsWritePath = new Path(outputPath);
|
||||
FSDataOutputStream fsDataOutputStream = null;
|
||||
if (fileSystem.exists(hdfsWritePath)) {
|
||||
fileSystem.delete(hdfsWritePath, true);
|
||||
|
||||
}
|
||||
fsDataOutputStream = fileSystem.create(hdfsWritePath);
|
||||
|
||||
return new TarArchiveOutputStream(fsDataOutputStream.getWrappedStream());
|
||||
}
|
||||
|
||||
private static void write(FileSystem fileSystem, String inputPath, String outputPath, String dir_name)
|
||||
throws IOException {
|
||||
|
||||
Path hdfsWritePath = new Path(outputPath);
|
||||
FSDataOutputStream fsDataOutputStream = null;
|
||||
if (fileSystem.exists(hdfsWritePath)) {
|
||||
fileSystem.delete(hdfsWritePath, true);
|
||||
|
||||
}
|
||||
fsDataOutputStream = fileSystem.create(hdfsWritePath);
|
||||
|
||||
TarArchiveOutputStream ar = new TarArchiveOutputStream(fsDataOutputStream.getWrappedStream());
|
||||
|
||||
RemoteIterator<LocatedFileStatus> fileStatusListIterator = fileSystem
|
||||
.listFiles(
|
||||
new Path(inputPath), true);
|
||||
|
||||
while (fileStatusListIterator.hasNext()) {
|
||||
writeCurrentFile(fileSystem, dir_name, fileStatusListIterator, ar, 0);
|
||||
}
|
||||
|
||||
ar.close();
|
||||
}
|
||||
|
||||
public static void tarMaxSize(FileSystem fileSystem, String inputPath, String outputPath, String dir_name,
|
||||
int gBperSplit) throws IOException {
|
||||
final long bytesPerSplit = 1024L * 1024L * 1024L * gBperSplit;
|
||||
|
||||
long sourceSize = fileSystem.getContentSummary(new Path(inputPath)).getSpaceConsumed();
|
||||
|
||||
if (sourceSize < bytesPerSplit) {
|
||||
write(fileSystem, inputPath, outputPath + ".tar", dir_name);
|
||||
} else {
|
||||
int partNum = 0;
|
||||
|
||||
RemoteIterator<LocatedFileStatus> fileStatusListIterator = fileSystem
|
||||
.listFiles(
|
||||
new Path(inputPath), true);
|
||||
boolean next = fileStatusListIterator.hasNext();
|
||||
while (next) {
|
||||
TarArchiveOutputStream ar = getTar(fileSystem, outputPath + "_" + (partNum + 1) + ".tar");
|
||||
|
||||
long current_size = 0;
|
||||
while (next && current_size < bytesPerSplit) {
|
||||
current_size = writeCurrentFile(fileSystem, dir_name, fileStatusListIterator, ar, current_size);
|
||||
next = fileStatusListIterator.hasNext();
|
||||
|
||||
}
|
||||
|
||||
partNum += 1;
|
||||
ar.close();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private static long writeCurrentFile(FileSystem fileSystem, String dir_name,
|
||||
RemoteIterator<LocatedFileStatus> fileStatusListIterator,
|
||||
TarArchiveOutputStream ar, long current_size) throws IOException {
|
||||
LocatedFileStatus fileStatus = fileStatusListIterator.next();
|
||||
|
||||
Path p = fileStatus.getPath();
|
||||
String p_string = p.toString();
|
||||
if (!p_string.endsWith("_SUCCESS")) {
|
||||
String name = p_string.substring(p_string.lastIndexOf("/") + 1);
|
||||
if (name.trim().equalsIgnoreCase("communities_infrastructures")) {
|
||||
name = "communities_infrastructures.json";
|
||||
}
|
||||
TarArchiveEntry entry = new TarArchiveEntry(dir_name + "/" + name);
|
||||
entry.setSize(fileStatus.getLen());
|
||||
current_size += fileStatus.getLen();
|
||||
ar.putArchiveEntry(entry);
|
||||
|
||||
InputStream is = fileSystem.open(fileStatus.getPath());
|
||||
|
||||
BufferedInputStream bis = new BufferedInputStream(is);
|
||||
|
||||
int count;
|
||||
byte data[] = new byte[1024];
|
||||
while ((count = bis.read(data, 0, data.length)) != -1) {
|
||||
ar.write(data, 0, count);
|
||||
}
|
||||
bis.close();
|
||||
ar.closeArchiveEntry();
|
||||
|
||||
}
|
||||
return current_size;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,53 @@
|
|||
|
||||
package eu.dnetlib.dhp.common.api;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
||||
import okhttp3.MediaType;
|
||||
import okhttp3.RequestBody;
|
||||
import okhttp3.internal.Util;
|
||||
import okio.BufferedSink;
|
||||
import okio.Okio;
|
||||
import okio.Source;
|
||||
|
||||
public class InputStreamRequestBody extends RequestBody {
|
||||
|
||||
private InputStream inputStream;
|
||||
private MediaType mediaType;
|
||||
private long lenght;
|
||||
|
||||
public static RequestBody create(final MediaType mediaType, final InputStream inputStream, final long len) {
|
||||
|
||||
return new InputStreamRequestBody(inputStream, mediaType, len);
|
||||
}
|
||||
|
||||
private InputStreamRequestBody(InputStream inputStream, MediaType mediaType, long len) {
|
||||
this.inputStream = inputStream;
|
||||
this.mediaType = mediaType;
|
||||
this.lenght = len;
|
||||
}
|
||||
|
||||
@Override
|
||||
public MediaType contentType() {
|
||||
return mediaType;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long contentLength() {
|
||||
|
||||
return lenght;
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeTo(BufferedSink sink) throws IOException {
|
||||
Source source = null;
|
||||
try {
|
||||
source = Okio.source(inputStream);
|
||||
sink.writeAll(source);
|
||||
} finally {
|
||||
Util.closeQuietly(source);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,8 @@
|
|||
|
||||
package eu.dnetlib.dhp.common.api;
|
||||
|
||||
public class MissingConceptDoiException extends Throwable {
|
||||
public MissingConceptDoiException(String message) {
|
||||
super(message);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,315 @@
|
|||
|
||||
package eu.dnetlib.dhp.common.api;
|
||||
|
||||
import java.io.*;
|
||||
import java.io.IOException;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
|
||||
import eu.dnetlib.dhp.common.api.zenodo.ZenodoModel;
|
||||
import eu.dnetlib.dhp.common.api.zenodo.ZenodoModelList;
|
||||
import okhttp3.*;
|
||||
|
||||
public class ZenodoAPIClient implements Serializable {
|
||||
|
||||
String urlString;
|
||||
String bucket;
|
||||
|
||||
String deposition_id;
|
||||
String access_token;
|
||||
|
||||
public static final MediaType MEDIA_TYPE_JSON = MediaType.parse("application/json; charset=utf-8");
|
||||
|
||||
private static final MediaType MEDIA_TYPE_ZIP = MediaType.parse("application/zip");
|
||||
|
||||
public String getUrlString() {
|
||||
return urlString;
|
||||
}
|
||||
|
||||
public void setUrlString(String urlString) {
|
||||
this.urlString = urlString;
|
||||
}
|
||||
|
||||
public String getBucket() {
|
||||
return bucket;
|
||||
}
|
||||
|
||||
public void setBucket(String bucket) {
|
||||
this.bucket = bucket;
|
||||
}
|
||||
|
||||
public void setDeposition_id(String deposition_id) {
|
||||
this.deposition_id = deposition_id;
|
||||
}
|
||||
|
||||
public ZenodoAPIClient(String urlString, String access_token) throws IOException {
|
||||
|
||||
this.urlString = urlString;
|
||||
this.access_token = access_token;
|
||||
}
|
||||
|
||||
/**
|
||||
* Brand new deposition in Zenodo. It sets the deposition_id and the bucket where to store the files to upload
|
||||
*
|
||||
* @return response code
|
||||
* @throws IOException
|
||||
*/
|
||||
public int newDeposition() throws IOException {
|
||||
String json = "{}";
|
||||
OkHttpClient httpClient = new OkHttpClient.Builder().connectTimeout(600, TimeUnit.SECONDS).build();
|
||||
|
||||
RequestBody body = RequestBody.create(json, MEDIA_TYPE_JSON);
|
||||
|
||||
Request request = new Request.Builder()
|
||||
.url(urlString)
|
||||
.addHeader("Content-Type", "application/json") // add request headers
|
||||
.addHeader("Authorization", "Bearer " + access_token)
|
||||
.post(body)
|
||||
.build();
|
||||
|
||||
try (Response response = httpClient.newCall(request).execute()) {
|
||||
|
||||
if (!response.isSuccessful())
|
||||
throw new IOException("Unexpected code " + response + response.body().string());
|
||||
|
||||
// Get response body
|
||||
json = response.body().string();
|
||||
|
||||
ZenodoModel newSubmission = new Gson().fromJson(json, ZenodoModel.class);
|
||||
this.bucket = newSubmission.getLinks().getBucket();
|
||||
this.deposition_id = newSubmission.getId();
|
||||
|
||||
return response.code();
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Upload files in Zenodo.
|
||||
*
|
||||
* @param is the inputStream for the file to upload
|
||||
* @param file_name the name of the file as it will appear on Zenodo
|
||||
* @param len the size of the file
|
||||
* @return the response code
|
||||
*/
|
||||
public int uploadIS(InputStream is, String file_name, long len) throws IOException {
|
||||
OkHttpClient httpClient = new OkHttpClient.Builder()
|
||||
.writeTimeout(600, TimeUnit.SECONDS)
|
||||
.readTimeout(600, TimeUnit.SECONDS)
|
||||
.connectTimeout(600, TimeUnit.SECONDS)
|
||||
.build();
|
||||
|
||||
Request request = new Request.Builder()
|
||||
.url(bucket + "/" + file_name)
|
||||
.addHeader("Content-Type", "application/zip") // add request headers
|
||||
.addHeader("Authorization", "Bearer " + access_token)
|
||||
.put(InputStreamRequestBody.create(MEDIA_TYPE_ZIP, is, len))
|
||||
.build();
|
||||
|
||||
try (Response response = httpClient.newCall(request).execute()) {
|
||||
if (!response.isSuccessful())
|
||||
throw new IOException("Unexpected code " + response + response.body().string());
|
||||
return response.code();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Associates metadata information to the current deposition
|
||||
*
|
||||
* @param metadata the metadata
|
||||
* @return response code
|
||||
* @throws IOException
|
||||
*/
|
||||
public int sendMretadata(String metadata) throws IOException {
|
||||
|
||||
OkHttpClient httpClient = new OkHttpClient.Builder().connectTimeout(600, TimeUnit.SECONDS).build();
|
||||
|
||||
RequestBody body = RequestBody.create(metadata, MEDIA_TYPE_JSON);
|
||||
|
||||
Request request = new Request.Builder()
|
||||
.url(urlString + "/" + deposition_id)
|
||||
.addHeader("Content-Type", "application/json") // add request headers
|
||||
.addHeader("Authorization", "Bearer " + access_token)
|
||||
.put(body)
|
||||
.build();
|
||||
|
||||
try (Response response = httpClient.newCall(request).execute()) {
|
||||
|
||||
if (!response.isSuccessful())
|
||||
throw new IOException("Unexpected code " + response + response.body().string());
|
||||
|
||||
return response.code();
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* To publish the current deposition. It works for both new deposition or new version of an old deposition
|
||||
*
|
||||
* @return response code
|
||||
* @throws IOException
|
||||
*/
|
||||
public int publish() throws IOException {
|
||||
|
||||
String json = "{}";
|
||||
|
||||
OkHttpClient httpClient = new OkHttpClient.Builder().connectTimeout(600, TimeUnit.SECONDS).build();
|
||||
|
||||
RequestBody body = RequestBody.create(json, MEDIA_TYPE_JSON);
|
||||
|
||||
Request request = new Request.Builder()
|
||||
.url(urlString + "/" + deposition_id + "/actions/publish")
|
||||
.addHeader("Authorization", "Bearer " + access_token)
|
||||
.post(body)
|
||||
.build();
|
||||
|
||||
try (Response response = httpClient.newCall(request).execute()) {
|
||||
|
||||
if (!response.isSuccessful())
|
||||
throw new IOException("Unexpected code " + response + response.body().string());
|
||||
|
||||
return response.code();
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* To create a new version of an already published deposition. It sets the deposition_id and the bucket to be used
|
||||
* for the new version.
|
||||
*
|
||||
* @param concept_rec_id the concept record id of the deposition for which to create a new version. It is the last
|
||||
* part of the url for the DOI Zenodo suggests to use to cite all versions: DOI: 10.xxx/zenodo.656930
|
||||
* concept_rec_id = 656930
|
||||
* @return response code
|
||||
* @throws IOException
|
||||
* @throws MissingConceptDoiException
|
||||
*/
|
||||
public int newVersion(String concept_rec_id) throws IOException, MissingConceptDoiException {
|
||||
setDepositionId(concept_rec_id);
|
||||
String json = "{}";
|
||||
|
||||
OkHttpClient httpClient = new OkHttpClient.Builder().connectTimeout(600, TimeUnit.SECONDS).build();
|
||||
|
||||
RequestBody body = RequestBody.create(json, MEDIA_TYPE_JSON);
|
||||
|
||||
Request request = new Request.Builder()
|
||||
.url(urlString + "/" + deposition_id + "/actions/newversion")
|
||||
.addHeader("Authorization", "Bearer " + access_token)
|
||||
.post(body)
|
||||
.build();
|
||||
|
||||
try (Response response = httpClient.newCall(request).execute()) {
|
||||
|
||||
if (!response.isSuccessful())
|
||||
throw new IOException("Unexpected code " + response + response.body().string());
|
||||
|
||||
ZenodoModel zenodoModel = new Gson().fromJson(response.body().string(), ZenodoModel.class);
|
||||
String latest_draft = zenodoModel.getLinks().getLatest_draft();
|
||||
deposition_id = latest_draft.substring(latest_draft.lastIndexOf("/") + 1);
|
||||
bucket = getBucket(latest_draft);
|
||||
return response.code();
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* To finish uploading a version or new deposition not published
|
||||
* It sets the deposition_id and the bucket to be used
|
||||
*
|
||||
*
|
||||
* @param deposition_id the deposition id of the not yet published upload
|
||||
* concept_rec_id = 656930
|
||||
* @return response code
|
||||
* @throws IOException
|
||||
* @throws MissingConceptDoiException
|
||||
*/
|
||||
public int uploadOpenDeposition(String deposition_id) throws IOException, MissingConceptDoiException {
|
||||
|
||||
this.deposition_id = deposition_id;
|
||||
|
||||
OkHttpClient httpClient = new OkHttpClient.Builder().connectTimeout(600, TimeUnit.SECONDS).build();
|
||||
|
||||
Request request = new Request.Builder()
|
||||
.url(urlString + "/" + deposition_id)
|
||||
.addHeader("Authorization", "Bearer " + access_token)
|
||||
.build();
|
||||
|
||||
try (Response response = httpClient.newCall(request).execute()) {
|
||||
|
||||
if (!response.isSuccessful())
|
||||
throw new IOException("Unexpected code " + response + response.body().string());
|
||||
|
||||
ZenodoModel zenodoModel = new Gson().fromJson(response.body().string(), ZenodoModel.class);
|
||||
bucket = zenodoModel.getLinks().getBucket();
|
||||
return response.code();
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private void setDepositionId(String concept_rec_id) throws IOException, MissingConceptDoiException {
|
||||
|
||||
ZenodoModelList zenodoModelList = new Gson().fromJson(getPrevDepositions(), ZenodoModelList.class);
|
||||
|
||||
for (ZenodoModel zm : zenodoModelList) {
|
||||
if (zm.getConceptrecid().equals(concept_rec_id)) {
|
||||
deposition_id = zm.getId();
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
throw new MissingConceptDoiException("The concept record id specified was missing in the list of depositions");
|
||||
|
||||
}
|
||||
|
||||
private String getPrevDepositions() throws IOException {
|
||||
OkHttpClient httpClient = new OkHttpClient.Builder().connectTimeout(600, TimeUnit.SECONDS).build();
|
||||
|
||||
Request request = new Request.Builder()
|
||||
.url(urlString)
|
||||
.addHeader("Content-Type", "application/json") // add request headers
|
||||
.addHeader("Authorization", "Bearer " + access_token)
|
||||
.get()
|
||||
.build();
|
||||
|
||||
try (Response response = httpClient.newCall(request).execute()) {
|
||||
|
||||
if (!response.isSuccessful())
|
||||
throw new IOException("Unexpected code " + response + response.body().string());
|
||||
|
||||
return response.body().string();
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private String getBucket(String url) throws IOException {
|
||||
OkHttpClient httpClient = new OkHttpClient.Builder()
|
||||
.connectTimeout(600, TimeUnit.SECONDS)
|
||||
.build();
|
||||
|
||||
Request request = new Request.Builder()
|
||||
.url(url)
|
||||
.addHeader("Content-Type", "application/json") // add request headers
|
||||
.addHeader("Authorization", "Bearer " + access_token)
|
||||
.get()
|
||||
.build();
|
||||
|
||||
try (Response response = httpClient.newCall(request).execute()) {
|
||||
|
||||
if (!response.isSuccessful())
|
||||
throw new IOException("Unexpected code " + response + response.body().string());
|
||||
|
||||
// Get response body
|
||||
ZenodoModel zenodoModel = new Gson().fromJson(response.body().string(), ZenodoModel.class);
|
||||
|
||||
return zenodoModel.getLinks().getBucket();
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.graph.dump.zenodo;
|
||||
package eu.dnetlib.dhp.common.api.zenodo;
|
||||
|
||||
public class Community {
|
||||
private String identifier;
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.graph.dump.zenodo;
|
||||
package eu.dnetlib.dhp.common.api.zenodo;
|
||||
|
||||
public class Creator {
|
||||
private String affiliation;
|
||||
|
@ -29,4 +29,19 @@ public class Creator {
|
|||
public void setOrcid(String orcid) {
|
||||
this.orcid = orcid;
|
||||
}
|
||||
|
||||
public static Creator newInstance(String name, String affiliation, String orcid) {
|
||||
Creator c = new Creator();
|
||||
if (!(name == null)) {
|
||||
c.name = name;
|
||||
}
|
||||
if (!(affiliation == null)) {
|
||||
c.affiliation = affiliation;
|
||||
}
|
||||
if (!(orcid == null)) {
|
||||
c.orcid = orcid;
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
}
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.graph.dump.zenodo;
|
||||
package eu.dnetlib.dhp.common.api.zenodo;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.graph.dump.zenodo;
|
||||
package eu.dnetlib.dhp.common.api.zenodo;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
|
@ -13,4 +13,11 @@ public class Grant implements Serializable {
|
|||
public void setId(String id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public static Grant newInstance(String id) {
|
||||
Grant g = new Grant();
|
||||
g.id = id;
|
||||
|
||||
return g;
|
||||
}
|
||||
}
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.graph.dump.zenodo;
|
||||
package eu.dnetlib.dhp.common.api.zenodo;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.graph.dump.zenodo;
|
||||
package eu.dnetlib.dhp.common.api.zenodo;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.graph.dump.zenodo;
|
||||
package eu.dnetlib.dhp.common.api.zenodo;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.graph.dump.zenodo;
|
||||
package eu.dnetlib.dhp.common.api.zenodo;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.graph.dump.zenodo;
|
||||
package eu.dnetlib.dhp.common.api.zenodo;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
|
@ -0,0 +1,7 @@
|
|||
|
||||
package eu.dnetlib.dhp.common.api.zenodo;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
||||
public class ZenodoModelList extends ArrayList<ZenodoModel> {
|
||||
}
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.dedup;
|
||||
package eu.dnetlib.dhp.oa.merge;
|
||||
|
||||
import java.text.Normalizer;
|
||||
import java.util.*;
|
||||
|
@ -94,7 +94,13 @@ public class AuthorMerger {
|
|||
if (r.getPid() == null) {
|
||||
r.setPid(new ArrayList<>());
|
||||
}
|
||||
r.getPid().add(a._1());
|
||||
|
||||
// TERRIBLE HACK but for some reason when we create and Array with Arrays.asList,
|
||||
// it creates of fixed size, and the add method raise UnsupportedOperationException at
|
||||
// java.util.AbstractList.add
|
||||
final List<StructuredProperty> tmp = new ArrayList<>(r.getPid());
|
||||
tmp.add(a._1());
|
||||
r.setPid(tmp);
|
||||
}
|
||||
}
|
||||
});
|
|
@ -1,19 +1,55 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.graph.raw.common;
|
||||
package eu.dnetlib.dhp.schema.oaf;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.function.Function;
|
||||
import java.util.function.Predicate;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||
|
||||
public class OafMapperUtils {
|
||||
|
||||
public static Oaf merge(final Oaf o1, final Oaf o2) {
|
||||
if (ModelSupport.isSubClass(o1, OafEntity.class)) {
|
||||
if (ModelSupport.isSubClass(o1, Result.class)) {
|
||||
|
||||
return mergeResults((Result) o1, (Result) o2);
|
||||
} else if (ModelSupport.isSubClass(o1, Datasource.class)) {
|
||||
((Datasource) o1).mergeFrom((Datasource) o2);
|
||||
} else if (ModelSupport.isSubClass(o1, Organization.class)) {
|
||||
((Organization) o1).mergeFrom((Organization) o2);
|
||||
} else if (ModelSupport.isSubClass(o1, Project.class)) {
|
||||
((Project) o1).mergeFrom((Project) o2);
|
||||
} else {
|
||||
throw new RuntimeException("invalid OafEntity subtype:" + o1.getClass().getCanonicalName());
|
||||
}
|
||||
} else if (ModelSupport.isSubClass(o1, Relation.class)) {
|
||||
((Relation) o1).mergeFrom((Relation) o2);
|
||||
} else {
|
||||
throw new RuntimeException("invalid Oaf type:" + o1.getClass().getCanonicalName());
|
||||
}
|
||||
return o1;
|
||||
}
|
||||
|
||||
public static Result mergeResults(Result r1, Result r2) {
|
||||
if (new ResultTypeComparator().compare(r1, r2) < 0) {
|
||||
r1.mergeFrom(r2);
|
||||
return r1;
|
||||
} else {
|
||||
r2.mergeFrom(r1);
|
||||
return r2;
|
||||
}
|
||||
}
|
||||
|
||||
public static KeyValue keyValue(final String k, final String v) {
|
||||
final KeyValue kv = new KeyValue();
|
||||
kv.setKey(k);
|
||||
|
@ -49,6 +85,7 @@ public class OafMapperUtils {
|
|||
.stream(values)
|
||||
.map(v -> field(v, info))
|
||||
.filter(Objects::nonNull)
|
||||
.filter(distinctByKey(f -> f.getValue()))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
|
@ -57,6 +94,7 @@ public class OafMapperUtils {
|
|||
.stream()
|
||||
.map(v -> field(v, info))
|
||||
.filter(Objects::nonNull)
|
||||
.filter(distinctByKey(f -> f.getValue()))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
|
@ -89,7 +127,9 @@ public class OafMapperUtils {
|
|||
}
|
||||
|
||||
public static StructuredProperty structuredProperty(
|
||||
final String value, final Qualifier qualifier, final DataInfo dataInfo) {
|
||||
final String value,
|
||||
final Qualifier qualifier,
|
||||
final DataInfo dataInfo) {
|
||||
if (value == null) {
|
||||
return null;
|
||||
}
|
||||
|
@ -137,6 +177,27 @@ public class OafMapperUtils {
|
|||
return p;
|
||||
}
|
||||
|
||||
public static Journal journal(
|
||||
final String name,
|
||||
final String issnPrinted,
|
||||
final String issnOnline,
|
||||
final String issnLinking,
|
||||
final DataInfo dataInfo) {
|
||||
return journal(
|
||||
name,
|
||||
issnPrinted,
|
||||
issnOnline,
|
||||
issnLinking,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
dataInfo);
|
||||
}
|
||||
|
||||
public static Journal journal(
|
||||
final String name,
|
||||
final String issnPrinted,
|
||||
|
@ -192,8 +253,12 @@ public class OafMapperUtils {
|
|||
}
|
||||
|
||||
public static String createOpenaireId(
|
||||
final int prefix, final String originalId, final boolean to_md5) {
|
||||
if (to_md5) {
|
||||
final int prefix,
|
||||
final String originalId,
|
||||
final boolean to_md5) {
|
||||
if (StringUtils.isBlank(originalId)) {
|
||||
return null;
|
||||
} else if (to_md5) {
|
||||
final String nsPrefix = StringUtils.substringBefore(originalId, "::");
|
||||
final String rest = StringUtils.substringAfter(originalId, "::");
|
||||
return String.format("%s|%s::%s", prefix, nsPrefix, DHPUtils.md5(rest));
|
||||
|
@ -203,7 +268,9 @@ public class OafMapperUtils {
|
|||
}
|
||||
|
||||
public static String createOpenaireId(
|
||||
final String type, final String originalId, final boolean to_md5) {
|
||||
final String type,
|
||||
final String originalId,
|
||||
final boolean to_md5) {
|
||||
switch (type) {
|
||||
case "datasource":
|
||||
return createOpenaireId(10, originalId, to_md5);
|
||||
|
@ -221,4 +288,10 @@ public class OafMapperUtils {
|
|||
public static String asString(final Object o) {
|
||||
return o == null ? "" : o.toString();
|
||||
}
|
||||
|
||||
public static <T> Predicate<T> distinctByKey(
|
||||
final Function<? super T, ?> keyExtractor) {
|
||||
final Map<Object, Boolean> seen = new ConcurrentHashMap<>();
|
||||
return t -> seen.putIfAbsent(keyExtractor.apply(t), Boolean.TRUE) == null;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,49 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.oaf;
|
||||
|
||||
import java.util.Comparator;
|
||||
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
|
||||
public class ResultTypeComparator implements Comparator<Result> {
|
||||
|
||||
@Override
|
||||
public int compare(Result left, Result right) {
|
||||
|
||||
if (left == null && right == null)
|
||||
return 0;
|
||||
if (left == null)
|
||||
return 1;
|
||||
if (right == null)
|
||||
return -1;
|
||||
|
||||
String lClass = left.getResulttype().getClassid();
|
||||
String rClass = right.getResulttype().getClassid();
|
||||
|
||||
if (lClass.equals(rClass))
|
||||
return 0;
|
||||
|
||||
if (lClass.equals(ModelConstants.PUBLICATION_RESULTTYPE_CLASSID))
|
||||
return -1;
|
||||
if (rClass.equals(ModelConstants.PUBLICATION_RESULTTYPE_CLASSID))
|
||||
return 1;
|
||||
|
||||
if (lClass.equals(ModelConstants.DATASET_RESULTTYPE_CLASSID))
|
||||
return -1;
|
||||
if (rClass.equals(ModelConstants.DATASET_RESULTTYPE_CLASSID))
|
||||
return 1;
|
||||
|
||||
if (lClass.equals(ModelConstants.SOFTWARE_RESULTTYPE_CLASSID))
|
||||
return -1;
|
||||
if (rClass.equals(ModelConstants.SOFTWARE_RESULTTYPE_CLASSID))
|
||||
return 1;
|
||||
|
||||
if (lClass.equals(ModelConstants.ORP_RESULTTYPE_CLASSID))
|
||||
return -1;
|
||||
if (rClass.equals(ModelConstants.ORP_RESULTTYPE_CLASSID))
|
||||
return 1;
|
||||
|
||||
// Else (but unlikely), lexicographical ordering will do.
|
||||
return lClass.compareTo(rClass);
|
||||
}
|
||||
}
|
|
@ -5,6 +5,7 @@ import java.io.ByteArrayInputStream;
|
|||
import java.io.ByteArrayOutputStream;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.security.MessageDigest;
|
||||
import java.util.List;
|
||||
import java.util.zip.GZIPInputStream;
|
||||
import java.util.zip.GZIPOutputStream;
|
||||
|
||||
|
@ -15,9 +16,15 @@ import org.apache.commons.codec.binary.Hex;
|
|||
import com.jayway.jsonpath.JsonPath;
|
||||
|
||||
import net.minidev.json.JSONArray;
|
||||
import scala.collection.JavaConverters;
|
||||
import scala.collection.Seq;
|
||||
|
||||
public class DHPUtils {
|
||||
|
||||
public static Seq<String> toSeq(List<String> list) {
|
||||
return JavaConverters.asScalaIteratorConverter(list.iterator()).asScala().toSeq();
|
||||
}
|
||||
|
||||
public static String md5(final String s) {
|
||||
try {
|
||||
final MessageDigest md = MessageDigest.getInstance("MD5");
|
||||
|
|
|
@ -1,15 +1,22 @@
|
|||
|
||||
package eu.dnetlib.dhp.utils;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import java.util.Map;
|
||||
|
||||
import javax.xml.ws.BindingProvider;
|
||||
|
||||
import org.apache.cxf.jaxws.JaxWsProxyFactoryBean;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||
|
||||
public class ISLookupClientFactory {
|
||||
|
||||
private static final Log log = LogFactory.getLog(ISLookupClientFactory.class);
|
||||
private static final Logger log = LoggerFactory.getLogger(ISLookupClientFactory.class);
|
||||
|
||||
private static int requestTimeout = 60000 * 10;
|
||||
private static int connectTimeout = 60000 * 10;
|
||||
|
||||
public static ISLookUpService getLookUpService(final String isLookupUrl) {
|
||||
return getServiceStub(ISLookUpService.class, isLookupUrl);
|
||||
|
@ -21,6 +28,25 @@ public class ISLookupClientFactory {
|
|||
final JaxWsProxyFactoryBean jaxWsProxyFactory = new JaxWsProxyFactoryBean();
|
||||
jaxWsProxyFactory.setServiceClass(clazz);
|
||||
jaxWsProxyFactory.setAddress(endpoint);
|
||||
return (T) jaxWsProxyFactory.create();
|
||||
|
||||
final T service = (T) jaxWsProxyFactory.create();
|
||||
|
||||
if (service instanceof BindingProvider) {
|
||||
log
|
||||
.info(
|
||||
"setting timeouts for {} to requestTimeout: {}, connectTimeout: {}",
|
||||
BindingProvider.class.getName(), requestTimeout, connectTimeout);
|
||||
|
||||
Map<String, Object> requestContext = ((BindingProvider) service).getRequestContext();
|
||||
|
||||
requestContext.put("com.sun.xml.internal.ws.request.timeout", requestTimeout);
|
||||
requestContext.put("com.sun.xml.internal.ws.connect.timeout", connectTimeout);
|
||||
requestContext.put("com.sun.xml.ws.request.timeout", requestTimeout);
|
||||
requestContext.put("com.sun.xml.ws.connect.timeout", connectTimeout);
|
||||
requestContext.put("javax.xml.ws.client.receiveTimeout", requestTimeout);
|
||||
requestContext.put("javax.xml.ws.client.connectionTimeout", connectTimeout);
|
||||
}
|
||||
|
||||
return service;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -5,6 +5,8 @@ import java.text.ParseException;
|
|||
import java.text.SimpleDateFormat;
|
||||
import java.util.Date;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import net.sf.saxon.expr.XPathContext;
|
||||
import net.sf.saxon.om.Sequence;
|
||||
import net.sf.saxon.trans.XPathException;
|
||||
|
@ -19,6 +21,8 @@ public class NormalizeDate extends AbstractExtensionFunction {
|
|||
|
||||
private static final String normalizeOutFormat = "yyyy-MM-dd'T'hh:mm:ss'Z'";
|
||||
|
||||
public static final String BLANK = "";
|
||||
|
||||
@Override
|
||||
public String getName() {
|
||||
return "normalizeDate";
|
||||
|
@ -27,10 +31,10 @@ public class NormalizeDate extends AbstractExtensionFunction {
|
|||
@Override
|
||||
public Sequence doCall(XPathContext context, Sequence[] arguments) throws XPathException {
|
||||
if (arguments == null | arguments.length == 0) {
|
||||
return new StringValue("");
|
||||
return new StringValue(BLANK);
|
||||
}
|
||||
String s = arguments[0].head().getStringValue();
|
||||
return new StringValue(_year(s));
|
||||
return new StringValue(_normalizeDate(s));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -55,8 +59,8 @@ public class NormalizeDate extends AbstractExtensionFunction {
|
|||
return SequenceType.SINGLE_STRING;
|
||||
}
|
||||
|
||||
private String _year(String s) {
|
||||
final String date = s != null ? s.trim() : "";
|
||||
private String _normalizeDate(String s) {
|
||||
final String date = StringUtils.isNotBlank(s) ? s.trim() : BLANK;
|
||||
|
||||
for (String format : normalizeDateFormats) {
|
||||
try {
|
||||
|
@ -66,6 +70,6 @@ public class NormalizeDate extends AbstractExtensionFunction {
|
|||
} catch (ParseException e) {
|
||||
}
|
||||
}
|
||||
return "";
|
||||
return BLANK;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,109 @@
|
|||
|
||||
package eu.dnetlib.dhp.common.api;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.Disabled;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
@Disabled
|
||||
public class ZenodoAPIClientTest {
|
||||
|
||||
private final String URL_STRING = "https://sandbox.zenodo.org/api/deposit/depositions";
|
||||
private final String ACCESS_TOKEN = "";
|
||||
|
||||
private final String CONCEPT_REC_ID = "657113";
|
||||
|
||||
private final String depositionId = "674915";
|
||||
|
||||
@Test
|
||||
public void testUploadOldDeposition() throws IOException, MissingConceptDoiException {
|
||||
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
|
||||
ACCESS_TOKEN);
|
||||
Assertions.assertEquals(200, client.uploadOpenDeposition(depositionId));
|
||||
|
||||
File file = new File(getClass()
|
||||
.getResource("/eu/dnetlib/dhp/common/api/COVID-19.json.gz")
|
||||
.getPath());
|
||||
|
||||
InputStream is = new FileInputStream(file);
|
||||
|
||||
Assertions.assertEquals(200, client.uploadIS(is, "COVID-19.json.gz", file.length()));
|
||||
|
||||
String metadata = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/common/api/metadata.json"));
|
||||
|
||||
Assertions.assertEquals(200, client.sendMretadata(metadata));
|
||||
|
||||
Assertions.assertEquals(202, client.publish());
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNewDeposition() throws IOException {
|
||||
|
||||
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
|
||||
ACCESS_TOKEN);
|
||||
Assertions.assertEquals(201, client.newDeposition());
|
||||
|
||||
File file = new File(getClass()
|
||||
.getResource("/eu/dnetlib/dhp/common/api/COVID-19.json.gz")
|
||||
.getPath());
|
||||
|
||||
InputStream is = new FileInputStream(file);
|
||||
|
||||
Assertions.assertEquals(200, client.uploadIS(is, "COVID-19.json.gz", file.length()));
|
||||
|
||||
String metadata = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/common/api/metadata.json"));
|
||||
|
||||
Assertions.assertEquals(200, client.sendMretadata(metadata));
|
||||
|
||||
Assertions.assertEquals(202, client.publish());
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNewVersionNewName() throws IOException, MissingConceptDoiException {
|
||||
|
||||
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
|
||||
ACCESS_TOKEN);
|
||||
|
||||
Assertions.assertEquals(201, client.newVersion(CONCEPT_REC_ID));
|
||||
|
||||
File file = new File(getClass()
|
||||
.getResource("/eu/dnetlib/dhp/common/api/newVersion")
|
||||
.getPath());
|
||||
|
||||
InputStream is = new FileInputStream(file);
|
||||
|
||||
Assertions.assertEquals(200, client.uploadIS(is, "newVersion_deposition", file.length()));
|
||||
|
||||
Assertions.assertEquals(202, client.publish());
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNewVersionOldName() throws IOException, MissingConceptDoiException {
|
||||
|
||||
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
|
||||
ACCESS_TOKEN);
|
||||
|
||||
Assertions.assertEquals(201, client.newVersion(CONCEPT_REC_ID));
|
||||
|
||||
File file = new File(getClass()
|
||||
.getResource("/eu/dnetlib/dhp/common/api/newVersion2")
|
||||
.getPath());
|
||||
|
||||
InputStream is = new FileInputStream(file);
|
||||
|
||||
Assertions.assertEquals(200, client.uploadIS(is, "newVersion_deposition", file.length()));
|
||||
|
||||
Assertions.assertEquals(202, client.publish());
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1 @@
|
|||
{"metadata":{"access_right":"open","communities":[{"identifier":"openaire-research-graph"}],"creators":[{"affiliation":"ISTI - CNR","name":"Bardi, Alessia","orcid":"0000-0002-1112-1292"},{"affiliation":"eifl", "name":"Kuchma, Iryna"},{"affiliation":"BIH", "name":"Brobov, Evgeny"},{"affiliation":"GIDIF RBM", "name":"Truccolo, Ivana"},{"affiliation":"unesp", "name":"Monteiro, Elizabete"},{"affiliation":"und", "name":"Casalegno, Carlotta"},{"affiliation":"CARL ABRC", "name":"Clary, Erin"},{"affiliation":"The University of Edimburgh", "name":"Romanowski, Andrew"},{"affiliation":"ISTI - CNR", "name":"Pavone, Gina"},{"affiliation":"ISTI - CNR", "name":"Artini, Michele"},{"affiliation":"ISTI - CNR","name":"Atzori, Claudio","orcid":"0000-0001-9613-6639"},{"affiliation":"University of Bielefeld","name":"Bäcker, Amelie","orcid":"0000-0001-6015-2063"},{"affiliation":"ISTI - CNR","name":"Baglioni, Miriam","orcid":"0000-0002-2273-9004"},{"affiliation":"University of Bielefeld","name":"Czerniak, Andreas","orcid":"0000-0003-3883-4169"},{"affiliation":"ISTI - CNR","name":"De Bonis, Michele"},{"affiliation":"Athena Research and Innovation Centre","name":"Dimitropoulos, Harry"},{"affiliation":"Athena Research and Innovation Centre","name":"Foufoulas, Ioannis"},{"affiliation":"University of Warsaw","name":"Horst, Marek"},{"affiliation":"Athena Research and Innovation Centre","name":"Iatropoulou, Katerina"},{"affiliation":"University of Warsaw","name":"Jacewicz, Przemyslaw"},{"affiliation":"Athena Research and Innovation Centre","name":"Kokogiannaki, Argiro", "orcid":"0000-0002-3880-0244"},{"affiliation":"ISTI - CNR","name":"La Bruzzo, Sandro","orcid":"0000-0003-2855-1245"},{"affiliation":"ISTI - CNR","name":"Lazzeri, Emma"},{"affiliation":"University of Bielefeld","name":"Löhden, Aenne"},{"affiliation":"ISTI - CNR","name":"Manghi, Paolo","orcid":"0000-0001-7291-3210"},{"affiliation":"ISTI - CNR","name":"Mannocci, Andrea","orcid":"0000-0002-5193-7851"},{"affiliation":"Athena Research and Innovation Center","name":"Manola, Natalia"},{"affiliation":"ISTI - CNR","name":"Ottonello, Enrico"},{"affiliation":"University of Bielefeld","name":"Shirrwagen, Jochen"}],"description":"\\u003cp\\u003eThis dump provides access to the metadata records of publications, research data, software and projects that may be relevant to the Corona Virus Disease (COVID-19) fight. The dump contains records of the OpenAIRE COVID-19 Gateway (https://covid-19.openaire.eu/), identified via full-text mining and inference techniques applied to the OpenAIRE Research Graph (https://explore.openaire.eu/). The Graph is one of the largest Open Access collections of metadata records and links between publications, datasets, software, projects, funders, and organizations, aggregating 12,000+ scientific data sources world-wide, among which the Covid-19 data sources Zenodo COVID-19 Community, WHO (World Health Organization), BIP! FInder for COVID-19, Protein Data Bank, Dimensions, scienceOpen, and RSNA. \\u003cp\\u003eThe dump consists of a gzip file containing one json per line. Each json is compliant to the schema available at https://doi.org/10.5281/zenodo.3974226\\u003c/p\\u003e ","title":"OpenAIRE Covid-19 publications, datasets, software and projects metadata.","upload_type":"dataset","version":"1.0"}}
|
|
@ -0,0 +1 @@
|
|||
This is a test for a new deposition
|
|
@ -0,0 +1 @@
|
|||
This is a test for a new version of an old deposition
|
|
@ -0,0 +1,2 @@
|
|||
This is a test for a new version of an old deposition. This should replace the other new version. I expect to have only two
|
||||
files in the deposition
|
|
@ -14,6 +14,37 @@
|
|||
|
||||
<description>This module contains common schema classes meant to be used across the dnet-hadoop submodules</description>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>net.alchim31.maven</groupId>
|
||||
<artifactId>scala-maven-plugin</artifactId>
|
||||
<version>4.0.1</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>scala-compile-first</id>
|
||||
<phase>initialize</phase>
|
||||
<goals>
|
||||
<goal>add-source</goal>
|
||||
<goal>compile</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
<execution>
|
||||
<id>scala-test-compile</id>
|
||||
<phase>process-test-resources</phase>
|
||||
<goals>
|
||||
<goal>testCompile</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
<configuration>
|
||||
<scalaVersion>${scala.version}</scalaVersion>
|
||||
</configuration>
|
||||
</plugin>
|
||||
|
||||
</plugins>
|
||||
</build>
|
||||
|
||||
<dependencies>
|
||||
|
||||
<dependency>
|
||||
|
|
|
@ -1,14 +1,17 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.common;
|
||||
|
||||
import java.security.Key;
|
||||
|
||||
import eu.dnetlib.dhp.schema.oaf.DataInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
||||
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||
|
||||
public class ModelConstants {
|
||||
|
||||
public static final String ORCID = "orcid";
|
||||
public static final String ORCID_PENDING = "orcid_pending";
|
||||
public static final String ORCID_CLASSNAME = "Open Researcher and Contributor ID";
|
||||
|
||||
public static final String DNET_SUBJECT_TYPOLOGIES = "dnet:subject_classification_typologies";
|
||||
public static final String DNET_RESULT_TYPOLOGIES = "dnet:result_typologies";
|
||||
public static final String DNET_PUBLICATION_RESOURCE = "dnet:publication_resource";
|
||||
public static final String DNET_ACCESS_MODES = "dnet:access_modes";
|
||||
|
@ -40,15 +43,15 @@ public class ModelConstants {
|
|||
public static final String IS_SUPPLEMENT_TO = "isSupplementTo";
|
||||
public static final String IS_SUPPLEMENTED_BY = "isSupplementedBy";
|
||||
public static final String PART = "part";
|
||||
public static final String IS_PART_OF = "IsPartOf";
|
||||
public static final String HAS_PARTS = "HasParts";
|
||||
public static final String IS_PART_OF = "isPartOf";
|
||||
public static final String HAS_PARTS = "hasParts";
|
||||
public static final String RELATIONSHIP = "relationship";
|
||||
public static final String CITATION = "citation";
|
||||
public static final String CITES = "cites";
|
||||
public static final String IS_CITED_BY = "IsCitedBy";
|
||||
public static final String IS_CITED_BY = "isCitedBy";
|
||||
public static final String REVIEW = "review";
|
||||
public static final String REVIEWS = "reviews";
|
||||
public static final String IS_REVIEWED_BY = "IsReviewedBy";
|
||||
public static final String IS_REVIEWED_BY = "isReviewedBy";
|
||||
|
||||
public static final String RESULT_PROJECT = "resultProject";
|
||||
public static final String OUTCOME = "outcome";
|
||||
|
|
|
@ -79,6 +79,15 @@ public class ModelSupport {
|
|||
entityIdPrefix.put("result", "50");
|
||||
}
|
||||
|
||||
public static final Map<String, String> idPrefixEntity = Maps.newHashMap();
|
||||
|
||||
static {
|
||||
idPrefixEntity.put("10", "datasource");
|
||||
idPrefixEntity.put("20", "organization");
|
||||
idPrefixEntity.put("40", "project");
|
||||
idPrefixEntity.put("50", "result");
|
||||
}
|
||||
|
||||
public static final Map<String, RelationInverse> relationInverseMap = Maps.newHashMap();
|
||||
|
||||
static {
|
||||
|
|
|
@ -3,6 +3,10 @@ package eu.dnetlib.dhp.schema.dump.oaf;
|
|||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* Used to refer to the Article Processing Charge information. Not dumped in this release. It contains two parameters: -
|
||||
* currency of type String to store the currency of the APC - amount of type String to stores the charged amount
|
||||
*/
|
||||
public class APC implements Serializable {
|
||||
private String currency;
|
||||
private String amount;
|
||||
|
|
|
@ -1,6 +1,14 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf;
|
||||
|
||||
/**
|
||||
* AccessRight. Used to represent the result access rights. It extends the eu.dnet.lib.dhp.schema.dump.oaf.Qualifier
|
||||
* element with a parameter scheme of type String to store the scheme. Values for this element are found against the
|
||||
* COAR access right scheme. The classid of the element accessright in eu.dnetlib.dhp.schema.oaf.Result is used to get
|
||||
* the COAR corresponding code whose value will be used to set the code parameter. The COAR label corresponding to the
|
||||
* COAR code will be used to set the label parameter. The scheme value will always be the one referring to the COAR
|
||||
* access right scheme
|
||||
*/
|
||||
public class AccessRight extends Qualifier {
|
||||
|
||||
private String scheme;
|
||||
|
|
|
@ -3,8 +3,21 @@ package eu.dnetlib.dhp.schema.dump.oaf;
|
|||
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* Used to represent the generic author of the result. It has six parameters: - name of type String to store the given
|
||||
* name of the author. The value for this parameter corresponds to eu.dnetlib.dhp.schema.oaf.Author name - surname of
|
||||
* type String to store the family name of the author. The value for this parameter corresponds to
|
||||
* eu.dnetlib.dhp.schema.oaf.Author surname - fullname of type String to store the fullname of the author. The value for
|
||||
* this parameter corresponds to eu.dnetlib.dhp.schema.oaf.Author fullname - rank of type Integer to store the rank on
|
||||
* the author in the result's authors list. The value for this parameter corresponds to eu.dnetlib.dhp.schema.oaf.Author
|
||||
* rank - pid of type eu.dnetlib.dhp.schema.dump.oaf.Pid to store the persistent identifier for the author. For the
|
||||
* moment only ORCID identifiers will be dumped. - The id element is instantiated by using the following values in the
|
||||
* eu.dnetlib.dhp.schema.oaf.Result pid: * Qualifier.classid for scheme * value for value - The provenance element is
|
||||
* instantiated only if the dataInfo is set for the pid in the result to be dumped. The provenance element is
|
||||
* instantiated by using the following values in the eu.dnetlib.dhp.schema.oaf.Result pid: *
|
||||
* dataInfo.provenanceaction.classname for provenance * dataInfo.trust for trust
|
||||
*/
|
||||
public class Author implements Serializable {
|
||||
|
||||
private String fullname;
|
||||
|
@ -15,9 +28,7 @@ public class Author implements Serializable {
|
|||
|
||||
private Integer rank;
|
||||
|
||||
private List<ControlledField> pid;
|
||||
|
||||
private List<String> affiliation;
|
||||
private Pid pid;
|
||||
|
||||
public String getFullname() {
|
||||
return fullname;
|
||||
|
@ -51,20 +62,12 @@ public class Author implements Serializable {
|
|||
this.rank = rank;
|
||||
}
|
||||
|
||||
public List<ControlledField> getPid() {
|
||||
public Pid getPid() {
|
||||
return pid;
|
||||
}
|
||||
|
||||
public void setPid(List<ControlledField> pid) {
|
||||
public void setPid(Pid pid) {
|
||||
this.pid = pid;
|
||||
}
|
||||
|
||||
public List<String> getAffiliation() {
|
||||
return affiliation;
|
||||
}
|
||||
|
||||
public void setAffiliation(List<String> affiliation) {
|
||||
this.affiliation = affiliation;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -4,6 +4,23 @@ package eu.dnetlib.dhp.schema.dump.oaf;
|
|||
import java.io.Serializable;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* To store information about the conference or journal where the result has been presented or published. It contains
|
||||
* eleven parameters: - name of type String to store the name of the journal or conference. It corresponds to the
|
||||
* parameter name of eu.dnetlib.dhp.schema.oaf.Journal - issnPrinted ot type String to store the journal printed issn.
|
||||
* It corresponds to the parameter issnPrinted of eu.dnetlib.dhp.schema.oaf.Journal - issnOnline of type String to store
|
||||
* the journal online issn. It corresponds to the parameter issnOnline of eu.dnetlib.dhp.schema.oaf.Journal -
|
||||
* issnLinking of type String to store the journal linking issn. It corresponds to the parameter issnLinking of
|
||||
* eu.dnetlib.dhp.schema.oaf.Journal - ep of type String to store the end page. It corresponds to the parameter ep of
|
||||
* eu.dnetlib.dhp.schema.oaf.Journal - iss of type String to store the journal issue. It corresponds to the parameter
|
||||
* iss of eu.dnetlib.dhp.schema.oaf.Journal - sp of type String to store the start page. It corresponds to the parameter
|
||||
* sp of eu.dnetlib.dhp.schema.oaf.Journal - vol of type String to store the Volume. It corresponds to the parameter vol
|
||||
* of eu.dnetlib.dhp.schema.oaf.Journal - edition of type String to store the edition of the journal or conference
|
||||
* proceeding. It corresponds to the parameter edition of eu.dnetlib.dhp.schema.oaf.Journal - conferenceplace of type
|
||||
* String to store the place of the conference. It corresponds to the parameter conferenceplace of
|
||||
* eu.dnetlib.dhp.schema.oaf.Journal - conferencedate of type String to store the date of the conference. It corresponds
|
||||
* to the parameter conferencedate of eu.dnetlib.dhp.schema.oaf.Journal
|
||||
*/
|
||||
public class Container implements Serializable {
|
||||
|
||||
private String name;
|
||||
|
|
|
@ -1,16 +0,0 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public class Context extends Qualifier {
|
||||
private List<String> provenance;
|
||||
|
||||
public List<String> getProvenance() {
|
||||
return provenance;
|
||||
}
|
||||
|
||||
public void setProvenance(List<String> provenance) {
|
||||
this.provenance = provenance;
|
||||
}
|
||||
}
|
|
@ -3,8 +3,10 @@ package eu.dnetlib.dhp.schema.dump.oaf;
|
|||
|
||||
import java.io.Serializable;
|
||||
|
||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||
|
||||
/**
|
||||
* To represent the information described by a scheme and a value in that scheme (i.e. pid). It has two parameters: -
|
||||
* scheme of type String to store the scheme - value of type String to store the value in that scheme
|
||||
*/
|
||||
public class ControlledField implements Serializable {
|
||||
private String scheme;
|
||||
private String value;
|
||||
|
|
|
@ -1,19 +1,28 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf;
|
||||
|
||||
/**
|
||||
* Represents the country associated to this result. It extends eu.dnetlib.dhp.schema.dump.oaf.Qualifier with a
|
||||
* provenance parameter of type eu.dnetlib.dhp.schema.dumo.oaf.Provenance. The country in not mapped if its value in the
|
||||
* result reprensented in the internal format is Unknown. The value for this element correspond to: - code corresponds
|
||||
* to the classid of eu.dnetlib.dhp.schema.oaf.Country - label corresponds to the classname of
|
||||
* eu.dnetlib.dhp.schema.oaf.Country - provenance set only if the dataInfo associated to the Country of the result to be
|
||||
* dumped is not null. In this case : - provenance corresponds to dataInfo.provenanceaction.classid (to be modified with
|
||||
* datainfo.provenanceaction.classname) - trust corresponds to dataInfo.trust
|
||||
*/
|
||||
public class Country extends Qualifier {
|
||||
|
||||
private String provenance;
|
||||
private Provenance provenance;
|
||||
|
||||
public String getProvenance() {
|
||||
public Provenance getProvenance() {
|
||||
return provenance;
|
||||
}
|
||||
|
||||
public void setProvenance(String provenance) {
|
||||
public void setProvenance(Provenance provenance) {
|
||||
this.provenance = provenance;
|
||||
}
|
||||
|
||||
public static Country newInstance(String code, String label, String provenance) {
|
||||
public static Country newInstance(String code, String label, Provenance provenance) {
|
||||
Country c = new Country();
|
||||
c.setProvenance(provenance);
|
||||
c.setCode(code);
|
||||
|
@ -21,4 +30,8 @@ public class Country extends Qualifier {
|
|||
return c;
|
||||
}
|
||||
|
||||
public static Country newInstance(String code, String label, String provenance, String trust) {
|
||||
return newInstance(code, label, Provenance.newInstance(provenance, trust));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -1,72 +0,0 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.Objects;
|
||||
|
||||
import eu.dnetlib.dhp.schema.oaf.ExtraInfo;
|
||||
|
||||
//ExtraInfo
|
||||
public class ExternalReference implements Serializable {
|
||||
private String name;
|
||||
|
||||
private String typology;
|
||||
|
||||
private String provenance;
|
||||
|
||||
private String trust;
|
||||
|
||||
// json containing a Citation or Statistics
|
||||
private String value;
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public void setName(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
public String getTypology() {
|
||||
return typology;
|
||||
}
|
||||
|
||||
public void setTypology(String typology) {
|
||||
this.typology = typology;
|
||||
}
|
||||
|
||||
public String getProvenance() {
|
||||
return provenance;
|
||||
}
|
||||
|
||||
public void setProvenance(String provenance) {
|
||||
this.provenance = provenance;
|
||||
}
|
||||
|
||||
public String getTrust() {
|
||||
return trust;
|
||||
}
|
||||
|
||||
public void setTrust(String trust) {
|
||||
this.trust = trust;
|
||||
}
|
||||
|
||||
public String getValue() {
|
||||
return value;
|
||||
}
|
||||
|
||||
public void setValue(String value) {
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
public static ExternalReference newInstance(ExtraInfo ei) {
|
||||
ExternalReference er = new ExternalReference();
|
||||
|
||||
er.name = ei.getName();
|
||||
er.typology = ei.getTypology();
|
||||
er.provenance = ei.getProvenance();
|
||||
er.trust = ei.getTrust();
|
||||
er.value = ei.getValue();
|
||||
return er;
|
||||
}
|
||||
}
|
|
@ -8,8 +8,6 @@ public class Funder implements Serializable {
|
|||
|
||||
private String name;
|
||||
|
||||
private String fundingStream;
|
||||
|
||||
private String jurisdiction;
|
||||
|
||||
public String getJurisdiction() {
|
||||
|
@ -35,12 +33,4 @@ public class Funder implements Serializable {
|
|||
public void setName(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
public String getFundingStream() {
|
||||
return fundingStream;
|
||||
}
|
||||
|
||||
public void setFundingStream(String fundingStream) {
|
||||
this.fundingStream = fundingStream;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -7,6 +7,12 @@ import org.apache.commons.lang3.StringUtils;
|
|||
|
||||
import com.fasterxml.jackson.annotation.JsonIgnore;
|
||||
|
||||
/**
|
||||
* Represents the geolocation information. It has three parameters: - point of type String to store the point
|
||||
* information. It corresponds to eu.dnetlib.dhp.schema.oaf.GeoLocation point - box ot type String to store the box
|
||||
* information. It corresponds to eu.dnetlib.dhp.schema.oaf.GeoLocation box - place of type String to store the place
|
||||
* information. It corresponds to eu.dnetlib.dhp.schema.oaf.GeoLocation place
|
||||
*/
|
||||
public class GeoLocation implements Serializable {
|
||||
|
||||
private String point;
|
||||
|
|
|
@ -4,6 +4,18 @@ package eu.dnetlib.dhp.schema.dump.oaf;
|
|||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Represents the manifestations (i.e. different versions) of the result. For example: the pre-print and the published
|
||||
* versions are two manifestations of the same research result. It has the following parameters: - license of type
|
||||
* String to store the license applied to the instance. It corresponds to the value of the licence in the instance to be
|
||||
* dumped - accessright of type eu.dnetlib.dhp.schema.dump.oaf.AccessRight to store the accessright of the instance. -
|
||||
* type of type String to store the type of the instance as defined in the corresponding dnet vocabulary
|
||||
* (dnet:pubication_resource). It corresponds to the instancetype.classname of the instance to be mapped - url of type
|
||||
* List<String> list of locations where the instance is accessible. It corresponds to url of the instance to be dumped -
|
||||
* publicationdate of type String to store the publication date of the instance ;// dateofacceptance; - refereed of type
|
||||
* String to store information abour tthe review status of the instance. Possible values are 'Unknown',
|
||||
* 'nonPeerReviewed', 'peerReviewed'. It corresponds to refereed.classname of the instance to be dumped
|
||||
*/
|
||||
public class Instance implements Serializable {
|
||||
|
||||
private String license;
|
||||
|
@ -12,22 +24,10 @@ public class Instance implements Serializable {
|
|||
|
||||
private String type;
|
||||
|
||||
private KeyValue hostedby;
|
||||
|
||||
private List<String> url;
|
||||
|
||||
private KeyValue collectedfrom;
|
||||
|
||||
private String publicationdate;// dateofacceptance;
|
||||
|
||||
// ( article | book ) processing charges. Defined here to cope with possible wrongly typed
|
||||
// results
|
||||
// private Field<String> processingchargeamount;
|
||||
//
|
||||
// // currency - alphabetic code describe in ISO-4217. Defined here to cope with possible wrongly
|
||||
// // typed results
|
||||
// private Field<String> processingchargecurrency;
|
||||
|
||||
private String refereed; // peer-review status
|
||||
|
||||
public String getLicense() {
|
||||
|
@ -54,14 +54,6 @@ public class Instance implements Serializable {
|
|||
this.type = type;
|
||||
}
|
||||
|
||||
public KeyValue getHostedby() {
|
||||
return hostedby;
|
||||
}
|
||||
|
||||
public void setHostedby(KeyValue hostedby) {
|
||||
this.hostedby = hostedby;
|
||||
}
|
||||
|
||||
public List<String> getUrl() {
|
||||
return url;
|
||||
}
|
||||
|
@ -70,14 +62,6 @@ public class Instance implements Serializable {
|
|||
this.url = url;
|
||||
}
|
||||
|
||||
public KeyValue getCollectedfrom() {
|
||||
return collectedfrom;
|
||||
}
|
||||
|
||||
public void setCollectedfrom(KeyValue collectedfrom) {
|
||||
this.collectedfrom = collectedfrom;
|
||||
}
|
||||
|
||||
public String getPublicationdate() {
|
||||
return publicationdate;
|
||||
}
|
||||
|
|
|
@ -7,6 +7,10 @@ import org.apache.commons.lang3.StringUtils;
|
|||
|
||||
import com.fasterxml.jackson.annotation.JsonIgnore;
|
||||
|
||||
/**
|
||||
* To represent the information described by a key and a value. It has two parameters: - key to store the key (generally
|
||||
* the OpenAIRE id for some entity) - value to store the value (generally the OpenAIRE name for the key)
|
||||
*/
|
||||
public class KeyValue implements Serializable {
|
||||
|
||||
private String key;
|
||||
|
|
|
@ -1,41 +0,0 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public abstract class Oaf implements Serializable {
|
||||
|
||||
/**
|
||||
* The list of datasource id/name pairs providing this relationship.
|
||||
*/
|
||||
protected List<KeyValue> collectedfrom;
|
||||
|
||||
private Long lastupdatetimestamp;
|
||||
|
||||
public List<KeyValue> getCollectedfrom() {
|
||||
return collectedfrom;
|
||||
}
|
||||
|
||||
public void setCollectedfrom(List<KeyValue> collectedfrom) {
|
||||
this.collectedfrom = collectedfrom;
|
||||
}
|
||||
|
||||
public Long getLastupdatetimestamp() {
|
||||
return lastupdatetimestamp;
|
||||
}
|
||||
|
||||
public void setLastupdatetimestamp(Long lastupdatetimestamp) {
|
||||
this.lastupdatetimestamp = lastupdatetimestamp;
|
||||
}
|
||||
|
||||
// public void setAllowedValues(eu.dnetlib.dhp.schema.oaf.Oaf o){
|
||||
// collectedfrom = o.getCollectedfrom().stream().map(cf -> KeyValue.newInstance(cf)).collect(Collectors.toList());
|
||||
//
|
||||
// lastupdatetimestamp = o.getLastupdatetimestamp();
|
||||
//
|
||||
// }
|
||||
|
||||
}
|
|
@ -1,59 +0,0 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
|
||||
public abstract class OafEntity extends Oaf implements Serializable {
|
||||
|
||||
private String id;
|
||||
|
||||
private List<String> originalId;
|
||||
|
||||
private List<ControlledField> pid;
|
||||
|
||||
private String dateofcollection;
|
||||
|
||||
private List<Projects> projects;
|
||||
|
||||
public String getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public void setId(String id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public List<String> getOriginalId() {
|
||||
return originalId;
|
||||
}
|
||||
|
||||
public void setOriginalId(List<String> originalId) {
|
||||
this.originalId = originalId;
|
||||
}
|
||||
|
||||
public List<ControlledField> getPid() {
|
||||
return pid;
|
||||
}
|
||||
|
||||
public void setPid(List<ControlledField> pid) {
|
||||
this.pid = pid;
|
||||
}
|
||||
|
||||
public String getDateofcollection() {
|
||||
return dateofcollection;
|
||||
}
|
||||
|
||||
public void setDateofcollection(String dateofcollection) {
|
||||
this.dateofcollection = dateofcollection;
|
||||
}
|
||||
|
||||
public List<Projects> getProjects() {
|
||||
return projects;
|
||||
}
|
||||
|
||||
public void setProjects(List<Projects> projects) {
|
||||
this.projects = projects;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,45 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* To represent the generic persistent identifier. It has two parameters: - id of type
|
||||
* eu.dnetlib.dhp.schema.dump.oaf.ControlledField to store the scheme and value of the Persistent Identifier. -
|
||||
* provenance of type eu.dnetlib.dhp.schema.dump.oaf.Provenance to store the provenance and trust of the information
|
||||
*/
|
||||
public class Pid implements Serializable {
|
||||
private ControlledField id;
|
||||
private Provenance provenance;
|
||||
|
||||
public ControlledField getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public void setId(ControlledField pid) {
|
||||
this.id = pid;
|
||||
}
|
||||
|
||||
public Provenance getProvenance() {
|
||||
return provenance;
|
||||
}
|
||||
|
||||
public void setProvenance(Provenance provenance) {
|
||||
this.provenance = provenance;
|
||||
}
|
||||
|
||||
public static Pid newInstance(ControlledField pid, Provenance provenance) {
|
||||
Pid p = new Pid();
|
||||
p.id = pid;
|
||||
p.provenance = provenance;
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
public static Pid newInstance(ControlledField pid) {
|
||||
Pid p = new Pid();
|
||||
p.id = pid;
|
||||
|
||||
return p;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,51 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* This class to store the common information about the project that will be dumped for community and for the whole
|
||||
* graph - private String id to store the id of the project (OpenAIRE id) - private String code to store the grant
|
||||
* agreement of the project - private String acronym to store the acronym of the project - private String title to store
|
||||
* the tile of the project
|
||||
*/
|
||||
public class Project implements Serializable {
|
||||
protected String id;// OpenAIRE id
|
||||
protected String code;
|
||||
|
||||
protected String acronym;
|
||||
|
||||
protected String title;
|
||||
|
||||
public String getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public void setId(String id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public String getCode() {
|
||||
return code;
|
||||
}
|
||||
|
||||
public void setCode(String code) {
|
||||
this.code = code;
|
||||
}
|
||||
|
||||
public String getAcronym() {
|
||||
return acronym;
|
||||
}
|
||||
|
||||
public void setAcronym(String acronym) {
|
||||
this.acronym = acronym;
|
||||
}
|
||||
|
||||
public String getTitle() {
|
||||
return title;
|
||||
}
|
||||
|
||||
public void setTitle(String title) {
|
||||
this.title = title;
|
||||
}
|
||||
}
|
|
@ -1,68 +0,0 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import eu.dnetlib.dhp.schema.oaf.Project;
|
||||
|
||||
public class Projects {
|
||||
|
||||
private String id;// OpenAIRE id
|
||||
private String code;
|
||||
|
||||
private String acronym;
|
||||
|
||||
private String title;
|
||||
|
||||
private Funder funder;
|
||||
|
||||
public String getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public void setId(String id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public String getCode() {
|
||||
return code;
|
||||
}
|
||||
|
||||
public void setCode(String code) {
|
||||
this.code = code;
|
||||
}
|
||||
|
||||
public String getAcronym() {
|
||||
return acronym;
|
||||
}
|
||||
|
||||
public void setAcronym(String acronym) {
|
||||
this.acronym = acronym;
|
||||
}
|
||||
|
||||
public String getTitle() {
|
||||
return title;
|
||||
}
|
||||
|
||||
public void setTitle(String title) {
|
||||
this.title = title;
|
||||
}
|
||||
|
||||
public Funder getFunder() {
|
||||
return funder;
|
||||
}
|
||||
|
||||
public void setFunder(Funder funders) {
|
||||
this.funder = funders;
|
||||
}
|
||||
|
||||
public static Projects newInstance(String id, String code, String acronym, String title, Funder funder) {
|
||||
Projects projects = new Projects();
|
||||
projects.setAcronym(acronym);
|
||||
projects.setCode(code);
|
||||
projects.setFunder(funder);
|
||||
projects.setId(id);
|
||||
projects.setTitle(title);
|
||||
return projects;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,41 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* Indicates the process that produced (or provided) the information, and the trust associated to the information. It
|
||||
* has two parameters: - provenance of type String to store the provenance of the information, - trust of type String to
|
||||
* store the trust associated to the information
|
||||
*/
|
||||
public class Provenance implements Serializable {
|
||||
private String provenance;
|
||||
private String trust;
|
||||
|
||||
public String getProvenance() {
|
||||
return provenance;
|
||||
}
|
||||
|
||||
public void setProvenance(String provenance) {
|
||||
this.provenance = provenance;
|
||||
}
|
||||
|
||||
public String getTrust() {
|
||||
return trust;
|
||||
}
|
||||
|
||||
public void setTrust(String trust) {
|
||||
this.trust = trust;
|
||||
}
|
||||
|
||||
public static Provenance newInstance(String provenance, String trust) {
|
||||
Provenance p = new Provenance();
|
||||
p.provenance = provenance;
|
||||
p.trust = trust;
|
||||
return p;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return provenance + trust;
|
||||
}
|
||||
}
|
|
@ -7,6 +7,11 @@ import org.apache.commons.lang3.StringUtils;
|
|||
|
||||
import com.fasterxml.jackson.annotation.JsonIgnore;
|
||||
|
||||
/**
|
||||
* To represent the information described by a code and a value It has two parameters: - code to store the code
|
||||
* (generally the classid of the eu.dnetlib.dhp.schema.oaf.Qualifier element) - label to store the label (generally the
|
||||
* classname of the eu.dnetlib.dhp.schema.oaf.Qualifier element
|
||||
*/
|
||||
public class Qualifier implements Serializable {
|
||||
|
||||
private String code; // the classid in the Qualifier
|
||||
|
|
|
@ -4,7 +4,66 @@ package eu.dnetlib.dhp.schema.dump.oaf;
|
|||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
|
||||
public class Result extends OafEntity implements Serializable {
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.community.Project;
|
||||
|
||||
/**
|
||||
* To represent the dumped result. It will be extended in the dump for Research Communities - Research
|
||||
* Initiative/Infrastructures. It has the following parameters: - author of type
|
||||
* List<eu.dnetlib.dhpschema.dump.oaf.Author> to describe the authors of a result. For each author in the result
|
||||
* represented in the internal model one author in the esternal model is produced. - type of type String to represent
|
||||
* the category of the result. Possible values are publication, dataset, software, other. It corresponds to
|
||||
* resulttype.classname of the dumped result - language of type eu.dnetlib.dhp.schema.dump.oaf.Qualifier to store
|
||||
* information about the language of the result. It is dumped as - code corresponds to language.classid - value
|
||||
* corresponds to language.classname - country of type List<eu.dnetlib.dhp.schema.dump.oaf.Country> to store the country
|
||||
* list to which the result is associated. For each country in the result respresented in the internal model one country
|
||||
* in the external model is produces - subjects of type List<eu.dnetlib.dhp.dump.oaf.Subject> to store the subjects for
|
||||
* the result. For each subject in the result represented in the internal model one subject in the external model is
|
||||
* produced - maintitle of type String to store the main title of the result. It corresponds to the value of the first
|
||||
* title in the resul to be dumped having classid equals to "main title" - subtitle of type String to store the subtitle
|
||||
* of the result. It corresponds to the value of the first title in the resul to be dumped having classid equals to
|
||||
* "subtitle" - description of type List<String> to store the description of the result. It corresponds to the list of
|
||||
* description.value in the result represented in the internal model - publicationdate of type String to store the
|
||||
* pubblication date. It corresponds to dateofacceptance.value in the result represented in the internal model -
|
||||
* publisher of type String to store information about the publisher. It corresponds to publisher.value of the result
|
||||
* represented in the intrenal model - embargoenddate of type String to store the embargo end date. It corresponds to
|
||||
* embargoenddate.value of the result represented in the internal model - source of type List<String> See definition of
|
||||
* Dublin Core field dc:source. It corresponds to the list of source.value in the result represented in the internal
|
||||
* model - format of type List<String> It corresponds to the list of format.value in the result represented in the
|
||||
* internal model - contributor of type List<String> to represent contributors for this result. It corresponds to the
|
||||
* list of contributor.value in the result represented in the internal model - coverage of type String. It corresponds
|
||||
* to the list of coverage.value in the result represented in the internal model - bestaccessright of type
|
||||
* eu.dnetlib.dhp.schema.dump.oaf.AccessRight to store informatin about the openest access right associated to the
|
||||
* manifestations of this research results. It corresponds to the same parameter in the result represented in the
|
||||
* internal model - container of type eu.dnetlib.dhp.schema/dump.oaf.Container (only for result of type publication). It
|
||||
* corresponds to the parameter journal of the result represented in the internal model - documentationUrl of type
|
||||
* List<String> (only for results of type software) to store the URLs to the software documentation. It corresponds to
|
||||
* the list of documentationUrl.value of the result represented in the internal model - codeRepositoryUrl of type String
|
||||
* (only for results of type software) to store the URL to the repository with the source code. It corresponds to
|
||||
* codeRepositoryUrl.value of the result represented in the internal model - programmingLanguage of type String (only
|
||||
* for results of type software) to store the programming language. It corresponds to programmingLanguaga.classid of the
|
||||
* result represented in the internal model - contactperson of type List<String> (only for results of type other) to
|
||||
* store the contact person for this result. It corresponds to the list of contactperson.value of the result represented
|
||||
* in the internal model - contactgroup of type List<String> (only for results of type other) to store the information
|
||||
* for the contact group. It corresponds to the list of contactgroup.value of the result represented in the internal
|
||||
* model - tool of type List<String> (only fro results of type other) to store information about tool useful for the
|
||||
* interpretation and/or re-used of the research product. It corresponds to the list of tool.value in the result
|
||||
* represented in the internal modelt - size of type String (only for results of type dataset) to store the size of the
|
||||
* dataset. It corresponds to size.value in the result represented in the internal model - version of type String (only
|
||||
* for results of type dataset) to store the version. It corresponds to version.value of the result represented in the
|
||||
* internal model - geolocation fo type List<eu.dnetlib.dhp.schema.dump.oaf.GeoLocation> (only for results of type
|
||||
* dataset) to store geolocation information. For each geolocation element in the result represented in the internal
|
||||
* model a GeoLocation in the external model il produced - id of type String to store the OpenAIRE id of the result. It
|
||||
* corresponds to the id of the result represented in the internal model - originalId of type List<String> to store the
|
||||
* original ids of the result. It corresponds to the originalId of the result represented in the internal model - pid of
|
||||
* type List<eu.dnetlib.dhp.schema.dump.oaf.ControlledField> to store the persistent identifiers for the result. For
|
||||
* each pid in the results represented in the internal model one pid in the external model is produced. The value
|
||||
* correspondence is: - scheme corresponds to pid.qualifier.classid of the result represented in the internal model -
|
||||
* value corresponds to the pid.value of the result represented in the internal model - dateofcollection of type String
|
||||
* to store information about the time OpenAIRE collected the record. It corresponds to dateofcollection of the result
|
||||
* represented in the internal model - lasteupdatetimestamp of type String to store the timestamp of the last update of
|
||||
* the record. It corresponds to lastupdatetimestamp of the resord represented in the internal model
|
||||
*/
|
||||
public class Result implements Serializable {
|
||||
|
||||
private List<Author> author;
|
||||
|
||||
|
@ -16,7 +75,7 @@ public class Result extends OafEntity implements Serializable {
|
|||
|
||||
private List<Country> country;
|
||||
|
||||
private List<ControlledField> subject;
|
||||
private List<Subject> subjects;
|
||||
|
||||
private String maintitle;
|
||||
|
||||
|
@ -40,12 +99,6 @@ public class Result extends OafEntity implements Serializable {
|
|||
|
||||
private AccessRight bestaccessright;
|
||||
|
||||
private List<Context> context;
|
||||
|
||||
private List<ExternalReference> externalReference;
|
||||
|
||||
private List<Instance> instance;
|
||||
|
||||
private Container container;// Journal
|
||||
|
||||
private List<String> documentationUrl; // software
|
||||
|
@ -66,6 +119,56 @@ public class Result extends OafEntity implements Serializable {
|
|||
|
||||
private List<GeoLocation> geolocation; // dataset
|
||||
|
||||
private String id;
|
||||
|
||||
private List<String> originalId;
|
||||
|
||||
private List<ControlledField> pid;
|
||||
|
||||
private String dateofcollection;
|
||||
|
||||
private Long lastupdatetimestamp;
|
||||
|
||||
public Long getLastupdatetimestamp() {
|
||||
return lastupdatetimestamp;
|
||||
}
|
||||
|
||||
public void setLastupdatetimestamp(Long lastupdatetimestamp) {
|
||||
this.lastupdatetimestamp = lastupdatetimestamp;
|
||||
}
|
||||
|
||||
public String getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public void setId(String id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public List<String> getOriginalId() {
|
||||
return originalId;
|
||||
}
|
||||
|
||||
public void setOriginalId(List<String> originalId) {
|
||||
this.originalId = originalId;
|
||||
}
|
||||
|
||||
public List<ControlledField> getPid() {
|
||||
return pid;
|
||||
}
|
||||
|
||||
public void setPid(List<ControlledField> pid) {
|
||||
this.pid = pid;
|
||||
}
|
||||
|
||||
public String getDateofcollection() {
|
||||
return dateofcollection;
|
||||
}
|
||||
|
||||
public void setDateofcollection(String dateofcollection) {
|
||||
this.dateofcollection = dateofcollection;
|
||||
}
|
||||
|
||||
public List<Author> getAuthor() {
|
||||
return author;
|
||||
}
|
||||
|
@ -106,12 +209,12 @@ public class Result extends OafEntity implements Serializable {
|
|||
this.country = country;
|
||||
}
|
||||
|
||||
public List<ControlledField> getSubject() {
|
||||
return subject;
|
||||
public List<Subject> getSubjects() {
|
||||
return subjects;
|
||||
}
|
||||
|
||||
public void setSubject(List<ControlledField> subject) {
|
||||
this.subject = subject;
|
||||
public void setSubjects(List<Subject> subjects) {
|
||||
this.subjects = subjects;
|
||||
}
|
||||
|
||||
public String getMaintitle() {
|
||||
|
@ -202,30 +305,6 @@ public class Result extends OafEntity implements Serializable {
|
|||
this.bestaccessright = bestaccessright;
|
||||
}
|
||||
|
||||
public List<Context> getContext() {
|
||||
return context;
|
||||
}
|
||||
|
||||
public void setContext(List<Context> context) {
|
||||
this.context = context;
|
||||
}
|
||||
|
||||
public List<ExternalReference> getExternalReference() {
|
||||
return externalReference;
|
||||
}
|
||||
|
||||
public void setExternalReference(List<ExternalReference> externalReference) {
|
||||
this.externalReference = externalReference;
|
||||
}
|
||||
|
||||
public List<Instance> getInstance() {
|
||||
return instance;
|
||||
}
|
||||
|
||||
public void setInstance(List<Instance> instance) {
|
||||
this.instance = instance;
|
||||
}
|
||||
|
||||
public List<String> getDocumentationUrl() {
|
||||
return documentationUrl;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,34 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* To represent keywords associated to the result. It has two parameters: - subject of type
|
||||
* eu.dnetlib.dhp.schema.dump.oaf.ControlledField to describe the subject. It mapped as: - schema it corresponds to
|
||||
* qualifier.classid of the dumped subject - value it corresponds to the subject value - provenance of type
|
||||
* eu.dnetlib.dhp.schema.dump.oaf.Provenance to represent the provenance of the subject. It is dumped only if dataInfo
|
||||
* is not null. In this case: - provenance corresponds to dataInfo.provenanceaction.classname - trust corresponds to
|
||||
* dataInfo.trust
|
||||
*/
|
||||
public class Subject implements Serializable {
|
||||
private ControlledField subject;
|
||||
private Provenance provenance;
|
||||
|
||||
public ControlledField getSubject() {
|
||||
return subject;
|
||||
}
|
||||
|
||||
public void setSubject(ControlledField subject) {
|
||||
this.subject = subject;
|
||||
}
|
||||
|
||||
public Provenance getProvenance() {
|
||||
return provenance;
|
||||
}
|
||||
|
||||
public void setProvenance(Provenance provenance) {
|
||||
this.provenance = provenance;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,36 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf.community;
|
||||
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.Instance;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.KeyValue;
|
||||
|
||||
/**
|
||||
* It extends eu.dnetlib.dhp.dump.oaf.Instance with values related to the community dump. In the Result dump this
|
||||
* information is not present because it is dumped as a set of relations between the result and the datasource. -
|
||||
* hostedby of type eu.dnetlib.dhp.schema.dump.oaf.KeyValue to store the information about the source from which the
|
||||
* instance can be viewed or downloaded. It is mapped against the hostedby parameter of the instance to be dumped and -
|
||||
* key corresponds to hostedby.key - value corresponds to hostedby.value - collectedfrom of type
|
||||
* eu.dnetlib.dhp.schema.dump.oaf.KeyValue to store the information about the source from which the instance has been
|
||||
* collected. It is mapped against the collectedfrom parameter of the instance to be dumped and - key corresponds to
|
||||
* collectedfrom.key - value corresponds to collectedfrom.value
|
||||
*/
|
||||
public class CommunityInstance extends Instance {
|
||||
private KeyValue hostedby;
|
||||
private KeyValue collectedfrom;
|
||||
|
||||
public KeyValue getHostedby() {
|
||||
return hostedby;
|
||||
}
|
||||
|
||||
public void setHostedby(KeyValue hostedby) {
|
||||
this.hostedby = hostedby;
|
||||
}
|
||||
|
||||
public KeyValue getCollectedfrom() {
|
||||
return collectedfrom;
|
||||
}
|
||||
|
||||
public void setCollectedfrom(KeyValue collectedfrom) {
|
||||
this.collectedfrom = collectedfrom;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,63 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf.community;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.KeyValue;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.Result;
|
||||
|
||||
/**
|
||||
* extends eu.dnetlib.dhp.schema.dump.oaf.Result with the following parameters: - projects of type
|
||||
* List<eu.dnetlib.dhp.schema.dump.oaf.community.Project> to store the list of projects related to the result. The
|
||||
* information is added after the result is mapped to the external model - context of type
|
||||
* List<eu.dnetlib.dhp.schema.dump.oaf.community.Context> to store information about the RC RI related to the result.
|
||||
* For each context in the result represented in the internal model one context in the external model is produced -
|
||||
* collectedfrom of type List<eu.dnetliv.dhp.schema.dump.oaf.KeyValue> to store information about the sources from which
|
||||
* the record has been collected. For each collectedfrom in the result represented in the internal model one
|
||||
* collectedfrom in the external model is produced - instance of type
|
||||
* List<eu.dnetlib.dhp.schema.dump.oaf.community.CommunityInstance> to store all the instances associated to the result.
|
||||
* It corresponds to the same parameter in the result represented in the internal model
|
||||
*/
|
||||
public class CommunityResult extends Result {
|
||||
|
||||
private List<Project> projects;
|
||||
|
||||
private List<Context> context;
|
||||
|
||||
protected List<KeyValue> collectedfrom;
|
||||
|
||||
private List<CommunityInstance> instance;
|
||||
|
||||
public List<CommunityInstance> getInstance() {
|
||||
return instance;
|
||||
}
|
||||
|
||||
public void setInstance(List<CommunityInstance> instance) {
|
||||
this.instance = instance;
|
||||
}
|
||||
|
||||
public List<KeyValue> getCollectedfrom() {
|
||||
return collectedfrom;
|
||||
}
|
||||
|
||||
public void setCollectedfrom(List<KeyValue> collectedfrom) {
|
||||
this.collectedfrom = collectedfrom;
|
||||
}
|
||||
|
||||
public List<Project> getProjects() {
|
||||
return projects;
|
||||
}
|
||||
|
||||
public void setProjects(List<Project> projects) {
|
||||
this.projects = projects;
|
||||
}
|
||||
|
||||
public List<Context> getContext() {
|
||||
return context;
|
||||
}
|
||||
|
||||
public void setContext(List<Context> context) {
|
||||
this.context = context;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,40 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf.community;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.Provenance;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.Qualifier;
|
||||
|
||||
/**
|
||||
* Reference to a relevant research infrastructure, initiative or community (RI/RC) among those collaborating with
|
||||
* OpenAIRE. It extend eu.dnetlib.dhp.shema.dump.oaf.Qualifier with a parameter provenance of type
|
||||
* List<eu.dnetlib.dhp.schema.dump.oaf.Provenance> to store the provenances of the association between the result and
|
||||
* the RC/RI. The values for this element correspond to: - code: it corresponds to the id of the context in the result
|
||||
* to be mapped. If the context id refers to a RC/RI and contains '::' only the part of the id before the first "::"
|
||||
* will be used as value for code - label it corresponds to the label associated to the id. The information id taken
|
||||
* from the profile of the RC/RI - provenance it is set only if the dataInfo associated to the contenxt element of the
|
||||
* result to be dumped is not null. For each dataInfo one instance of type eu.dnetlib.dhp.schema.dump.oaf.Provenance is
|
||||
* instantiated if the element datainfo.provenanceaction is not null. In this case - provenance corresponds to
|
||||
* dataInfo.provenanceaction.classname - trust corresponds to dataInfo.trust
|
||||
*/
|
||||
public class Context extends Qualifier {
|
||||
private List<Provenance> provenance;
|
||||
|
||||
public List<Provenance> getProvenance() {
|
||||
return provenance;
|
||||
}
|
||||
|
||||
public void setProvenance(List<Provenance> provenance) {
|
||||
this.provenance = provenance;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
String provenance = new String();
|
||||
this.provenance.forEach(p -> provenance.concat(p.toString()));
|
||||
return Objects.hash(getCode(), getLabel(), provenance);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,23 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf.community;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* To store information about the funder funding the project related to the result. It has the following parameters: -
|
||||
* shortName of type String to store the funder short name (e.c. AKA). - name of type String to store the funder name
|
||||
* (e.c. Akademy of Finland) - fundingStream of type String to store the funding stream - jurisdiction of type String to
|
||||
* store the jurisdiction of the funder
|
||||
*/
|
||||
public class Funder extends eu.dnetlib.dhp.schema.dump.oaf.Funder {
|
||||
|
||||
private String fundingStream;
|
||||
|
||||
public String getFundingStream() {
|
||||
return fundingStream;
|
||||
}
|
||||
|
||||
public void setFundingStream(String fundingStream) {
|
||||
this.fundingStream = fundingStream;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,47 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf.community;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.Provenance;
|
||||
|
||||
/**
|
||||
* To store information about the project related to the result. This information is not directly mapped from the result
|
||||
* represented in the internal model because it is not there. The mapped result will be enriched with project
|
||||
* information derived by relation between results and projects. Project extends eu.dnetlib.dhp.schema.dump.oaf.Project
|
||||
* with the following parameters: - funder of type eu.dnetlib.dhp.schema.dump.oaf.community.Funder to store information
|
||||
* about the funder funding the project - provenance of type eu.dnetlib.dhp.schema.dump.oaf.Provenance to store
|
||||
* information about the. provenance of the association between the result and the project
|
||||
*/
|
||||
public class Project extends eu.dnetlib.dhp.schema.dump.oaf.Project {
|
||||
|
||||
private Funder funder;
|
||||
|
||||
private Provenance provenance;
|
||||
|
||||
public Provenance getProvenance() {
|
||||
return provenance;
|
||||
}
|
||||
|
||||
public void setProvenance(Provenance provenance) {
|
||||
this.provenance = provenance;
|
||||
}
|
||||
|
||||
public Funder getFunder() {
|
||||
return funder;
|
||||
}
|
||||
|
||||
public void setFunder(Funder funders) {
|
||||
this.funder = funders;
|
||||
}
|
||||
|
||||
public static Project newInstance(String id, String code, String acronym, String title, Funder funder) {
|
||||
Project project = new Project();
|
||||
project.setAcronym(acronym);
|
||||
project.setCode(code);
|
||||
project.setFunder(funder);
|
||||
project.setId(id);
|
||||
project.setTitle(title);
|
||||
return project;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,21 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
public class Constants implements Serializable {
|
||||
// collectedFrom va con isProvidedBy -> becco da ModelSupport
|
||||
|
||||
public static final String HOSTED_BY = "isHostedBy";
|
||||
public static final String HOSTS = "hosts";
|
||||
|
||||
// community result uso isrelatedto
|
||||
|
||||
public static final String RESULT_ENTITY = "result";
|
||||
public static final String DATASOURCE_ENTITY = "datasource";
|
||||
public static final String CONTEXT_ENTITY = "context";
|
||||
|
||||
public static final String CONTEXT_ID = "60";
|
||||
public static final String CONTEXT_NS_PREFIX = "context____";
|
||||
|
||||
}
|
|
@ -0,0 +1,316 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.Container;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.ControlledField;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.KeyValue;
|
||||
|
||||
/**
|
||||
* To store information about the datasource OpenAIRE collects information from. It contains the following parameters: -
|
||||
* id of type String to store the OpenAIRE id for the datasource. It corresponds to the parameter id of the datasource
|
||||
* represented in the internal model - originalId of type List<String> to store the list of original ids associated to
|
||||
* the datasource. It corresponds to the parameter originalId of the datasource represented in the internal model. The
|
||||
* null values are filtered out - pid of type List<eu.dnetlib.shp.schema.dump.oaf.ControlledField> to store the
|
||||
* persistent identifiers for the datasource. For each pid in the datasource represented in the internal model one pid
|
||||
* in the external model is produced as : - schema corresponds to pid.qualifier.classid of the datasource represented in
|
||||
* the internal model - value corresponds to pid.value of the datasource represented in the internal model -
|
||||
* datasourceType of type eu.dnetlib.dhp.schema.dump.oaf.ControlledField to store the datasource type (e.g.
|
||||
* pubsrepository::institutional, Institutional Repository) as in the dnet vocabulary dnet:datasource_typologies. It
|
||||
* corresponds to datasourcetype of the datasource represented in the internal model and : - code corresponds to
|
||||
* datasourcetype.classid - value corresponds to datasourcetype.classname - openairecompatibility of type String to
|
||||
* store information about the OpenAIRE compatibility of the ingested results (which guidelines they are compliant to).
|
||||
* It corresponds to openairecompatibility.classname of the datasource represented in the internal model - officialname
|
||||
* of type Sgtring to store the official name of the datasource. It correspond to officialname.value of the datasource
|
||||
* represented in the internal model - englishname of type String to store the English name of the datasource. It
|
||||
* corresponds to englishname.value of the datasource represented in the internal model - websiteurl of type String to
|
||||
* store the URL of the website of the datasource. It corresponds to websiteurl.value of the datasource represented in
|
||||
* the internal model - logourl of type String to store the URL of the logo for the datasource. It corresponds to
|
||||
* logourl.value of the datasource represented in the internal model - dateofvalidation of type String to store the data
|
||||
* of validation against the guidelines for the datasource records. It corresponds to dateofvalidation.value of the
|
||||
* datasource represented in the internal model - description of type String to store the description for the
|
||||
* datasource. It corresponds to description.value of the datasource represented in the internal model
|
||||
*/
|
||||
public class Datasource implements Serializable {
|
||||
|
||||
private String id; // string
|
||||
|
||||
private List<String> originalId; // list string
|
||||
|
||||
private List<ControlledField> pid; // list<String>
|
||||
|
||||
private ControlledField datasourcetype; // value
|
||||
|
||||
private String openairecompatibility; // value
|
||||
|
||||
private String officialname; // string
|
||||
|
||||
private String englishname; // string
|
||||
|
||||
private String websiteurl; // string
|
||||
|
||||
private String logourl; // string
|
||||
|
||||
private String dateofvalidation; // string
|
||||
|
||||
private String description; // description
|
||||
|
||||
private List<String> subjects; // List<String>
|
||||
|
||||
// opendoar specific fields (od*)
|
||||
|
||||
private List<String> languages; // odlanguages List<String>
|
||||
|
||||
private List<String> contenttypes; // odcontent types List<String>
|
||||
|
||||
// re3data fields
|
||||
private String releasestartdate; // string
|
||||
|
||||
private String releaseenddate; // string
|
||||
|
||||
private String missionstatementurl; // string
|
||||
|
||||
// {open, restricted or closed}
|
||||
private String accessrights; // databaseaccesstype string
|
||||
|
||||
// {open, restricted or closed}
|
||||
private String uploadrights; // datauploadtype string
|
||||
|
||||
// {feeRequired, registration, other}
|
||||
private String databaseaccessrestriction; // string
|
||||
|
||||
// {feeRequired, registration, other}
|
||||
private String datauploadrestriction; // string
|
||||
|
||||
private Boolean versioning; // boolean
|
||||
|
||||
private String citationguidelineurl; // string
|
||||
|
||||
// {yes, no, uknown}
|
||||
|
||||
private String pidsystems; // string
|
||||
|
||||
private String certificates; // string
|
||||
|
||||
private List<Object> policies; //
|
||||
|
||||
private Container journal; // issn etc del Journal
|
||||
|
||||
public String getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public void setId(String id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public List<String> getOriginalId() {
|
||||
return originalId;
|
||||
}
|
||||
|
||||
public void setOriginalId(List<String> originalId) {
|
||||
this.originalId = originalId;
|
||||
}
|
||||
|
||||
public List<ControlledField> getPid() {
|
||||
return pid;
|
||||
}
|
||||
|
||||
public void setPid(List<ControlledField> pid) {
|
||||
this.pid = pid;
|
||||
}
|
||||
|
||||
public ControlledField getDatasourcetype() {
|
||||
return datasourcetype;
|
||||
}
|
||||
|
||||
public void setDatasourcetype(ControlledField datasourcetype) {
|
||||
this.datasourcetype = datasourcetype;
|
||||
}
|
||||
|
||||
public String getOpenairecompatibility() {
|
||||
return openairecompatibility;
|
||||
}
|
||||
|
||||
public void setOpenairecompatibility(String openairecompatibility) {
|
||||
this.openairecompatibility = openairecompatibility;
|
||||
}
|
||||
|
||||
public String getOfficialname() {
|
||||
return officialname;
|
||||
}
|
||||
|
||||
public void setOfficialname(String officialname) {
|
||||
this.officialname = officialname;
|
||||
}
|
||||
|
||||
public String getEnglishname() {
|
||||
return englishname;
|
||||
}
|
||||
|
||||
public void setEnglishname(String englishname) {
|
||||
this.englishname = englishname;
|
||||
}
|
||||
|
||||
public String getWebsiteurl() {
|
||||
return websiteurl;
|
||||
}
|
||||
|
||||
public void setWebsiteurl(String websiteurl) {
|
||||
this.websiteurl = websiteurl;
|
||||
}
|
||||
|
||||
public String getLogourl() {
|
||||
return logourl;
|
||||
}
|
||||
|
||||
public void setLogourl(String logourl) {
|
||||
this.logourl = logourl;
|
||||
}
|
||||
|
||||
public String getDateofvalidation() {
|
||||
return dateofvalidation;
|
||||
}
|
||||
|
||||
public void setDateofvalidation(String dateofvalidation) {
|
||||
this.dateofvalidation = dateofvalidation;
|
||||
}
|
||||
|
||||
public String getDescription() {
|
||||
return description;
|
||||
}
|
||||
|
||||
public void setDescription(String description) {
|
||||
this.description = description;
|
||||
}
|
||||
|
||||
public List<String> getSubjects() {
|
||||
return subjects;
|
||||
}
|
||||
|
||||
public void setSubjects(List<String> subjects) {
|
||||
this.subjects = subjects;
|
||||
}
|
||||
|
||||
public List<String> getLanguages() {
|
||||
return languages;
|
||||
}
|
||||
|
||||
public void setLanguages(List<String> languages) {
|
||||
this.languages = languages;
|
||||
}
|
||||
|
||||
public List<String> getContenttypes() {
|
||||
return contenttypes;
|
||||
}
|
||||
|
||||
public void setContenttypes(List<String> contenttypes) {
|
||||
this.contenttypes = contenttypes;
|
||||
}
|
||||
|
||||
public String getReleasestartdate() {
|
||||
return releasestartdate;
|
||||
}
|
||||
|
||||
public void setReleasestartdate(String releasestartdate) {
|
||||
this.releasestartdate = releasestartdate;
|
||||
}
|
||||
|
||||
public String getReleaseenddate() {
|
||||
return releaseenddate;
|
||||
}
|
||||
|
||||
public void setReleaseenddate(String releaseenddate) {
|
||||
this.releaseenddate = releaseenddate;
|
||||
}
|
||||
|
||||
public String getMissionstatementurl() {
|
||||
return missionstatementurl;
|
||||
}
|
||||
|
||||
public void setMissionstatementurl(String missionstatementurl) {
|
||||
this.missionstatementurl = missionstatementurl;
|
||||
}
|
||||
|
||||
public String getAccessrights() {
|
||||
return accessrights;
|
||||
}
|
||||
|
||||
public void setAccessrights(String accessrights) {
|
||||
this.accessrights = accessrights;
|
||||
}
|
||||
|
||||
public String getUploadrights() {
|
||||
return uploadrights;
|
||||
}
|
||||
|
||||
public void setUploadrights(String uploadrights) {
|
||||
this.uploadrights = uploadrights;
|
||||
}
|
||||
|
||||
public String getDatabaseaccessrestriction() {
|
||||
return databaseaccessrestriction;
|
||||
}
|
||||
|
||||
public void setDatabaseaccessrestriction(String databaseaccessrestriction) {
|
||||
this.databaseaccessrestriction = databaseaccessrestriction;
|
||||
}
|
||||
|
||||
public String getDatauploadrestriction() {
|
||||
return datauploadrestriction;
|
||||
}
|
||||
|
||||
public void setDatauploadrestriction(String datauploadrestriction) {
|
||||
this.datauploadrestriction = datauploadrestriction;
|
||||
}
|
||||
|
||||
public Boolean getVersioning() {
|
||||
return versioning;
|
||||
}
|
||||
|
||||
public void setVersioning(Boolean versioning) {
|
||||
this.versioning = versioning;
|
||||
}
|
||||
|
||||
public String getCitationguidelineurl() {
|
||||
return citationguidelineurl;
|
||||
}
|
||||
|
||||
public void setCitationguidelineurl(String citationguidelineurl) {
|
||||
this.citationguidelineurl = citationguidelineurl;
|
||||
}
|
||||
|
||||
public String getPidsystems() {
|
||||
return pidsystems;
|
||||
}
|
||||
|
||||
public void setPidsystems(String pidsystems) {
|
||||
this.pidsystems = pidsystems;
|
||||
}
|
||||
|
||||
public String getCertificates() {
|
||||
return certificates;
|
||||
}
|
||||
|
||||
public void setCertificates(String certificates) {
|
||||
this.certificates = certificates;
|
||||
}
|
||||
|
||||
public List<Object> getPolicies() {
|
||||
return policies;
|
||||
}
|
||||
|
||||
public void setPolicies(List<Object> policiesr3) {
|
||||
this.policies = policiesr3;
|
||||
}
|
||||
|
||||
public Container getJournal() {
|
||||
return journal;
|
||||
}
|
||||
|
||||
public void setJournal(Container journal) {
|
||||
this.journal = journal;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,22 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* To store information about the funder funding the project related to the result. It extends
|
||||
* eu.dnetlib.dhp.schema.dump.oaf.Funder with the following parameter: - - private
|
||||
* eu.dnetdlib.dhp.schema.dump.oaf.graph.Fundings funding_stream to store the fundingstream
|
||||
*/
|
||||
public class Funder extends eu.dnetlib.dhp.schema.dump.oaf.Funder {
|
||||
|
||||
private Fundings funding_stream;
|
||||
|
||||
public Fundings getFunding_stream() {
|
||||
return funding_stream;
|
||||
}
|
||||
|
||||
public void setFunding_stream(Fundings funding_stream) {
|
||||
this.funding_stream = funding_stream;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,35 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* To store inforamtion about the funding stream. It has two parameters: - private String id to store the id of the
|
||||
* fundings stream. The id is created by appending the shortname of the funder to the name of each level in the xml
|
||||
* representing the fundng stream. For example: if the funder is the European Commission, the funding level 0 name is
|
||||
* FP7, the funding level 1 name is SP3 and the funding level 2 name is PEOPLE then the id will be: EC::FP7::SP3::PEOPLE
|
||||
* - private String description to describe the funding stream. It is created by concatenating the description of each
|
||||
* funding level so for the example above the description would be: SEVENTH FRAMEWORK PROGRAMME - SP3-People -
|
||||
* Marie-Curie Actions
|
||||
*/
|
||||
public class Fundings implements Serializable {
|
||||
|
||||
private String id;
|
||||
private String description;
|
||||
|
||||
public String getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public void setId(String id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public String getDescription() {
|
||||
return description;
|
||||
}
|
||||
|
||||
public void setDescription(String description) {
|
||||
this.description = description;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,55 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.Optional;
|
||||
|
||||
/**
|
||||
* To describe the funded amount. It has the following parameters: - private String currency to store the currency of
|
||||
* the fund - private float totalcost to store the total cost of the project - private float fundedamount to store the
|
||||
* funded amount by the funder
|
||||
*/
|
||||
public class Granted implements Serializable {
|
||||
private String currency;
|
||||
private float totalcost;
|
||||
private float fundedamount;
|
||||
|
||||
public String getCurrency() {
|
||||
return currency;
|
||||
}
|
||||
|
||||
public void setCurrency(String currency) {
|
||||
this.currency = currency;
|
||||
}
|
||||
|
||||
public float getTotalcost() {
|
||||
return totalcost;
|
||||
}
|
||||
|
||||
public void setTotalcost(float totalcost) {
|
||||
this.totalcost = totalcost;
|
||||
}
|
||||
|
||||
public float getFundedamount() {
|
||||
return fundedamount;
|
||||
}
|
||||
|
||||
public void setFundedamount(float fundedamount) {
|
||||
this.fundedamount = fundedamount;
|
||||
}
|
||||
|
||||
public static Granted newInstance(String currency, float totalcost, float fundedamount) {
|
||||
Granted granted = new Granted();
|
||||
granted.currency = currency;
|
||||
granted.totalcost = totalcost;
|
||||
granted.fundedamount = fundedamount;
|
||||
return granted;
|
||||
}
|
||||
|
||||
public static Granted newInstance(String currency, float fundedamount) {
|
||||
Granted granted = new Granted();
|
||||
granted.currency = currency;
|
||||
granted.fundedamount = fundedamount;
|
||||
return granted;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,24 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.Instance;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.Result;
|
||||
|
||||
/**
|
||||
* It extends the eu.dnetlib.dhp.schema.dump.oaf.Result with - instance of type
|
||||
* List<eu.dnetlib.dhp.schema.dump.oaf.Instance> to store all the instances associated to the result. It corresponds to
|
||||
* the same parameter in the result represented in the internal model
|
||||
*/
|
||||
public class GraphResult extends Result {
|
||||
private List<Instance> instance;
|
||||
|
||||
public List<Instance> getInstance() {
|
||||
return instance;
|
||||
}
|
||||
|
||||
public void setInstance(List<Instance> instance) {
|
||||
this.instance = instance;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,82 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* To store information about the classification for the project. The classification depends on the programme. For example
|
||||
* H2020-EU.3.4.5.3 can be classified as
|
||||
* H2020-EU.3. => Societal Challenges (level1)
|
||||
* H2020-EU.3.4. => Transport (level2)
|
||||
* H2020-EU.3.4.5. => CLEANSKY2 (level3)
|
||||
* H2020-EU.3.4.5.3. => IADP Fast Rotorcraft (level4)
|
||||
*
|
||||
* We decided to explicitly represent up to three levels in the classification.
|
||||
*
|
||||
* H2020Classification has the following parameters:
|
||||
* - private Programme programme to store the information about the programme related to this classification
|
||||
* - private String level1 to store the information about the level 1 of the classification (Priority or Pillar of the EC)
|
||||
* - private String level2 to store the information about the level2 af the classification (Objectives (?))
|
||||
* - private String level3 to store the information about the level3 of the classification
|
||||
* - private String classification to store the entire classification related to the programme
|
||||
*/
|
||||
public class H2020Classification implements Serializable {
|
||||
private Programme programme;
|
||||
|
||||
private String level1;
|
||||
private String level2;
|
||||
private String level3;
|
||||
|
||||
private String classification;
|
||||
|
||||
public Programme getProgramme() {
|
||||
return programme;
|
||||
}
|
||||
|
||||
public void setProgramme(Programme programme) {
|
||||
this.programme = programme;
|
||||
}
|
||||
|
||||
public String getLevel1() {
|
||||
return level1;
|
||||
}
|
||||
|
||||
public void setLevel1(String level1) {
|
||||
this.level1 = level1;
|
||||
}
|
||||
|
||||
public String getLevel2() {
|
||||
return level2;
|
||||
}
|
||||
|
||||
public void setLevel2(String level2) {
|
||||
this.level2 = level2;
|
||||
}
|
||||
|
||||
public String getLevel3() {
|
||||
return level3;
|
||||
}
|
||||
|
||||
public void setLevel3(String level3) {
|
||||
this.level3 = level3;
|
||||
}
|
||||
|
||||
public String getClassification() {
|
||||
return classification;
|
||||
}
|
||||
|
||||
public void setClassification(String classification) {
|
||||
this.classification = classification;
|
||||
}
|
||||
|
||||
public static H2020Classification newInstance(String programme_code, String programme_description, String level1,
|
||||
String level2, String level3, String classification) {
|
||||
H2020Classification h2020classification = new H2020Classification();
|
||||
h2020classification.programme = Programme.newInstance(programme_code, programme_description);
|
||||
h2020classification.level1 = level1;
|
||||
h2020classification.level2 = level2;
|
||||
h2020classification.level3 = level3;
|
||||
h2020classification.classification = classification;
|
||||
return h2020classification;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,38 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* To represent the generic node in a relation. It has the following parameters: - private String id the openaire id of
|
||||
* the entity in the relation - private String type the type of the entity in the relation. Consider the generic
|
||||
* relation between a Result R and a Project P, the node representing R will have as id the id of R and as type result,
|
||||
* while the node representing the project will have as id the id of the project and as type project
|
||||
*/
|
||||
public class Node implements Serializable {
|
||||
private String id;
|
||||
private String type;
|
||||
|
||||
public String getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public void setId(String id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public String getType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
public void setType(String type) {
|
||||
this.type = type;
|
||||
}
|
||||
|
||||
public static Node newInstance(String id, String type) {
|
||||
Node node = new Node();
|
||||
node.id = id;
|
||||
node.type = type;
|
||||
return node;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,86 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.ControlledField;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.Country;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.KeyValue;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.Qualifier;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.community.Project;
|
||||
|
||||
/**
|
||||
* To represent the generic organizaiton. It has the following parameters: - private String legalshortname to store the
|
||||
* legalshortname of the organizaiton - private String legalname to store the legal name of the organization - private
|
||||
* String websiteurl to store the websiteurl of the organization - private List<String> alternativenames to store the
|
||||
* alternative names of the organization - private Qualifier country to store the country of the organization - private
|
||||
* String id to store the id of the organization - private List<ControlledField> pid to store the list of pids for the
|
||||
* organization
|
||||
*/
|
||||
public class Organization implements Serializable {
|
||||
private String legalshortname;
|
||||
private String legalname;
|
||||
private String websiteurl;
|
||||
private List<String> alternativenames;
|
||||
private Qualifier country;
|
||||
private String id;
|
||||
private List<ControlledField> pid;
|
||||
|
||||
public String getLegalshortname() {
|
||||
return legalshortname;
|
||||
}
|
||||
|
||||
public void setLegalshortname(String legalshortname) {
|
||||
this.legalshortname = legalshortname;
|
||||
}
|
||||
|
||||
public String getLegalname() {
|
||||
return legalname;
|
||||
}
|
||||
|
||||
public void setLegalname(String legalname) {
|
||||
this.legalname = legalname;
|
||||
}
|
||||
|
||||
public String getWebsiteurl() {
|
||||
return websiteurl;
|
||||
}
|
||||
|
||||
public void setWebsiteurl(String websiteurl) {
|
||||
this.websiteurl = websiteurl;
|
||||
}
|
||||
|
||||
public List<String> getAlternativenames() {
|
||||
return alternativenames;
|
||||
}
|
||||
|
||||
public void setAlternativenames(List<String> alternativenames) {
|
||||
this.alternativenames = alternativenames;
|
||||
}
|
||||
|
||||
public Qualifier getCountry() {
|
||||
return country;
|
||||
}
|
||||
|
||||
public void setCountry(Qualifier country) {
|
||||
this.country = country;
|
||||
}
|
||||
|
||||
public String getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public void setId(String id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public List<ControlledField> getPid() {
|
||||
return pid;
|
||||
}
|
||||
|
||||
public void setPid(List<ControlledField> pid) {
|
||||
this.pid = pid;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,36 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* To store information about the ec programme for the project. It has the following parameters: - private String code
|
||||
* to store the code of the programme - private String description to store the description of the programme
|
||||
*/
|
||||
public class Programme implements Serializable {
|
||||
private String code;
|
||||
private String description;
|
||||
|
||||
public String getCode() {
|
||||
return code;
|
||||
}
|
||||
|
||||
public void setCode(String code) {
|
||||
this.code = code;
|
||||
}
|
||||
|
||||
public String getDescription() {
|
||||
return description;
|
||||
}
|
||||
|
||||
public void setDescription(String description) {
|
||||
this.description = description;
|
||||
}
|
||||
|
||||
public static Programme newInstance(String code, String description) {
|
||||
Programme p = new Programme();
|
||||
p.code = code;
|
||||
p.description = description;
|
||||
return p;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,192 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* This is the class representing the Project in the model used for the dumps of the whole graph. At the moment the dump
|
||||
* of the Projects differs from the other dumps because we do not create relations between Funders (Organization) and
|
||||
* Projects but we put the information about the Funder within the Project representation. We also removed the
|
||||
* collected from element from the Project. No relation between the Project and the Datasource entity from which it is
|
||||
* collected will be created. We will never create relations between Project and Datasource. In case some relation will
|
||||
* be extracted from the Project they will refer the Funder and will be of type ( organization -> funds -> project,
|
||||
* project -> isFundedBy -> organization) We also removed the duration parameter because the most of times it is set to
|
||||
* 0. It has the following parameters:
|
||||
* - private String id to store the id of the project (OpenAIRE id)
|
||||
* - private String websiteurl to store the websiteurl of the project
|
||||
* - private String code to store the grant agreement of the project
|
||||
* - private String acronym to store the acronym of the project
|
||||
* - private String title to store the tile of the project
|
||||
* - private String startdate to store the start date
|
||||
* - private String enddate to store the end date
|
||||
* - private String callidentifier to store the call indentifier
|
||||
* - private String keywords to store the keywords
|
||||
* - private boolean openaccessmandateforpublications to store if the project must accomplish to the open access mandate
|
||||
* for publications. This value will be set to true if one of the field in the project represented in the internal model
|
||||
* is set to true
|
||||
* - private boolean openaccessmandatefordataset to store if the project must accomplish to the open access mandate for
|
||||
* dataset. It is set to the value in the corresponding filed of the project represented in the internal model
|
||||
* - private List<String> subject to store the list of subjects of the project
|
||||
* - private List<Funder> funding to store the list of funder of the project
|
||||
* - private String summary to store the summary of the project
|
||||
* - private Granted granted to store the granted amount
|
||||
* - private List<Programme> h2020programme to store the list of programmes the project is related to
|
||||
*/
|
||||
|
||||
public class Project implements Serializable {
|
||||
private String id;
|
||||
|
||||
private String websiteurl;
|
||||
private String code;
|
||||
private String acronym;
|
||||
private String title;
|
||||
private String startdate;
|
||||
|
||||
private String enddate;
|
||||
|
||||
private String callidentifier;
|
||||
|
||||
private String keywords;
|
||||
|
||||
private boolean openaccessmandateforpublications;
|
||||
|
||||
private boolean openaccessmandatefordataset;
|
||||
private List<String> subject;
|
||||
|
||||
private List<Funder> funding;
|
||||
|
||||
private String summary;
|
||||
|
||||
private Granted granted;
|
||||
|
||||
private List<Programme> h2020programme;
|
||||
|
||||
public String getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public void setId(String id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public String getWebsiteurl() {
|
||||
return websiteurl;
|
||||
}
|
||||
|
||||
public void setWebsiteurl(String websiteurl) {
|
||||
this.websiteurl = websiteurl;
|
||||
}
|
||||
|
||||
public String getCode() {
|
||||
return code;
|
||||
}
|
||||
|
||||
public void setCode(String code) {
|
||||
this.code = code;
|
||||
}
|
||||
|
||||
public String getAcronym() {
|
||||
return acronym;
|
||||
}
|
||||
|
||||
public void setAcronym(String acronym) {
|
||||
this.acronym = acronym;
|
||||
}
|
||||
|
||||
public String getTitle() {
|
||||
return title;
|
||||
}
|
||||
|
||||
public void setTitle(String title) {
|
||||
this.title = title;
|
||||
}
|
||||
|
||||
public String getStartdate() {
|
||||
return startdate;
|
||||
}
|
||||
|
||||
public void setStartdate(String startdate) {
|
||||
this.startdate = startdate;
|
||||
}
|
||||
|
||||
public String getEnddate() {
|
||||
return enddate;
|
||||
}
|
||||
|
||||
public void setEnddate(String enddate) {
|
||||
this.enddate = enddate;
|
||||
}
|
||||
|
||||
public String getCallidentifier() {
|
||||
return callidentifier;
|
||||
}
|
||||
|
||||
public void setCallidentifier(String callidentifier) {
|
||||
this.callidentifier = callidentifier;
|
||||
}
|
||||
|
||||
public String getKeywords() {
|
||||
return keywords;
|
||||
}
|
||||
|
||||
public void setKeywords(String keywords) {
|
||||
this.keywords = keywords;
|
||||
}
|
||||
|
||||
public boolean isOpenaccessmandateforpublications() {
|
||||
return openaccessmandateforpublications;
|
||||
}
|
||||
|
||||
public void setOpenaccessmandateforpublications(boolean openaccessmandateforpublications) {
|
||||
this.openaccessmandateforpublications = openaccessmandateforpublications;
|
||||
}
|
||||
|
||||
public boolean isOpenaccessmandatefordataset() {
|
||||
return openaccessmandatefordataset;
|
||||
}
|
||||
|
||||
public void setOpenaccessmandatefordataset(boolean openaccessmandatefordataset) {
|
||||
this.openaccessmandatefordataset = openaccessmandatefordataset;
|
||||
}
|
||||
|
||||
public List<String> getSubject() {
|
||||
return subject;
|
||||
}
|
||||
|
||||
public void setSubject(List<String> subject) {
|
||||
this.subject = subject;
|
||||
}
|
||||
|
||||
public List<Funder> getFunding() {
|
||||
return funding;
|
||||
}
|
||||
|
||||
public void setFunding(List<Funder> funding) {
|
||||
this.funding = funding;
|
||||
}
|
||||
|
||||
public String getSummary() {
|
||||
return summary;
|
||||
}
|
||||
|
||||
public void setSummary(String summary) {
|
||||
this.summary = summary;
|
||||
}
|
||||
|
||||
public Granted getGranted() {
|
||||
return granted;
|
||||
}
|
||||
|
||||
public void setGranted(Granted granted) {
|
||||
this.granted = granted;
|
||||
}
|
||||
|
||||
public List<Programme> getH2020programme() {
|
||||
return h2020programme;
|
||||
}
|
||||
|
||||
public void setH2020programme(List<Programme> h2020programme) {
|
||||
this.h2020programme = h2020programme;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,39 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* To represent the semantics of the generic relation between two entities. It has the following parameters: - private
|
||||
* String name to store the semantics of the relation (i.e. isAuthorInstitutionOf). It corresponds to the relclass
|
||||
* parameter in the relation represented in the internal model represented in the internal model - private String type
|
||||
* to store the type of the relation (i.e. affiliation). It corresponds to the subreltype parameter of the relation
|
||||
* represented in theinternal model
|
||||
*/
|
||||
public class RelType implements Serializable {
|
||||
private String name; // relclass
|
||||
private String type; // subreltype
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public void setName(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
public String getType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
public void setType(String type) {
|
||||
this.type = type;
|
||||
}
|
||||
|
||||
public static RelType newInstance(String name, String type) {
|
||||
RelType rel = new RelType();
|
||||
rel.name = name;
|
||||
rel.type = type;
|
||||
return rel;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,67 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.Objects;
|
||||
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.Provenance;
|
||||
|
||||
/**
|
||||
* To represent the gereric relation between two entities. It has the following parameters: - private Node source to
|
||||
* represent the entity source of the relation - private Node target to represent the entity target of the relation -
|
||||
* private RelType reltype to represent the semantics of the relation - private Provenance provenance to represent the
|
||||
* provenance of the relation
|
||||
*/
|
||||
public class Relation implements Serializable {
|
||||
private Node source;
|
||||
private Node target;
|
||||
private RelType reltype;
|
||||
private Provenance provenance;
|
||||
|
||||
public Node getSource() {
|
||||
return source;
|
||||
}
|
||||
|
||||
public void setSource(Node source) {
|
||||
this.source = source;
|
||||
}
|
||||
|
||||
public Node getTarget() {
|
||||
return target;
|
||||
}
|
||||
|
||||
public void setTarget(Node target) {
|
||||
this.target = target;
|
||||
}
|
||||
|
||||
public RelType getReltype() {
|
||||
return reltype;
|
||||
}
|
||||
|
||||
public void setReltype(RelType reltype) {
|
||||
this.reltype = reltype;
|
||||
}
|
||||
|
||||
public Provenance getProvenance() {
|
||||
return provenance;
|
||||
}
|
||||
|
||||
public void setProvenance(Provenance provenance) {
|
||||
this.provenance = provenance;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
|
||||
return Objects.hash(source.getId(), target.getId(), reltype.getType() + ":" + reltype.getName());
|
||||
}
|
||||
|
||||
public static Relation newInstance(Node source, Node target, RelType reltype, Provenance provenance) {
|
||||
Relation relation = new Relation();
|
||||
relation.source = source;
|
||||
relation.target = target;
|
||||
relation.reltype = reltype;
|
||||
relation.provenance = provenance;
|
||||
return relation;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,20 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* To represent RC entities. It extends eu.dnetlib.dhp.dump.oaf.grap.ResearchInitiative by adding the parameter subject
|
||||
* to store the list of subjects related to the community
|
||||
*/
|
||||
public class ResearchCommunity extends ResearchInitiative {
|
||||
private List<String> subject;
|
||||
|
||||
public List<String> getSubject() {
|
||||
return subject;
|
||||
}
|
||||
|
||||
public void setSubject(List<String> subject) {
|
||||
this.subject = subject;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,75 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* To represent entity of type RC/RI. It has the following parameters, which are mostly derived by the profile
|
||||
* - private
|
||||
* String id to store the openaire id for the entity. Is has as code 00 and will be created as
|
||||
* 00|context_____::md5(originalId) private
|
||||
* String originalId to store the id of the context as provided in the profile
|
||||
* (i.e. mes)
|
||||
* - private String name to store the name of the context (got from the label attribute in the context
|
||||
* definition)
|
||||
* - private String type to store the type of the context (i.e.: research initiative or research community)
|
||||
* - private String description to store the description of the context as given in the profile
|
||||
* -private String
|
||||
* zenodo_community to store the zenodo community associated to the context (main zenodo community)
|
||||
*/
|
||||
public class ResearchInitiative implements Serializable {
|
||||
private String id; // openaireId
|
||||
private String originalId; // context id
|
||||
private String name; // context name
|
||||
private String type; // context type: research initiative or research community
|
||||
private String description;
|
||||
private String zenodo_community;
|
||||
|
||||
public String getZenodo_community() {
|
||||
return zenodo_community;
|
||||
}
|
||||
|
||||
public void setZenodo_community(String zenodo_community) {
|
||||
this.zenodo_community = zenodo_community;
|
||||
}
|
||||
|
||||
public String getType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
public void setType(String type) {
|
||||
this.type = type;
|
||||
}
|
||||
|
||||
public String getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public void setId(String id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public void setName(String label) {
|
||||
this.name = label;
|
||||
}
|
||||
|
||||
public String getOriginalId() {
|
||||
return originalId;
|
||||
}
|
||||
|
||||
public void setOriginalId(String originalId) {
|
||||
this.originalId = originalId;
|
||||
}
|
||||
|
||||
public String getDescription() {
|
||||
return description;
|
||||
}
|
||||
|
||||
public void setDescription(String description) {
|
||||
this.description = description;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,88 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.oaf;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* To store information about the classification for the project. The classification depends on the programme. For example
|
||||
* H2020-EU.3.4.5.3 can be classified as
|
||||
* H2020-EU.3. => Societal Challenges (level1)
|
||||
* H2020-EU.3.4. => Transport (level2)
|
||||
* H2020-EU.3.4.5. => CLEANSKY2 (level3)
|
||||
* H2020-EU.3.4.5.3. => IADP Fast Rotorcraft (level4)
|
||||
*
|
||||
* We decided to explicitly represent up to three levels in the classification.
|
||||
*
|
||||
* H2020Classification has the following parameters:
|
||||
* - private Programme programme to store the information about the programme related to this classification
|
||||
* - private String level1 to store the information about the level 1 of the classification (Priority or Pillar of the EC)
|
||||
* - private String level2 to store the information about the level2 af the classification (Objectives (?))
|
||||
* - private String level3 to store the information about the level3 of the classification
|
||||
* - private String classification to store the entire classification related to the programme
|
||||
*/
|
||||
|
||||
public class H2020Classification implements Serializable {
|
||||
private H2020Programme h2020Programme;
|
||||
private String level1;
|
||||
private String level2;
|
||||
private String level3;
|
||||
|
||||
private String classification;
|
||||
|
||||
public H2020Programme getH2020Programme() {
|
||||
return h2020Programme;
|
||||
}
|
||||
|
||||
public void setH2020Programme(H2020Programme h2020Programme) {
|
||||
this.h2020Programme = h2020Programme;
|
||||
}
|
||||
|
||||
public String getLevel1() {
|
||||
return level1;
|
||||
}
|
||||
|
||||
public void setLevel1(String level1) {
|
||||
this.level1 = level1;
|
||||
}
|
||||
|
||||
public String getLevel2() {
|
||||
return level2;
|
||||
}
|
||||
|
||||
public void setLevel2(String level2) {
|
||||
this.level2 = level2;
|
||||
}
|
||||
|
||||
public String getLevel3() {
|
||||
return level3;
|
||||
}
|
||||
|
||||
public void setLevel3(String level3) {
|
||||
this.level3 = level3;
|
||||
}
|
||||
|
||||
public String getClassification() {
|
||||
return classification;
|
||||
}
|
||||
|
||||
public void setClassification(String classification) {
|
||||
this.classification = classification;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o)
|
||||
return true;
|
||||
if (o == null || getClass() != o.getClass())
|
||||
return false;
|
||||
|
||||
H2020Classification h2020classification = (H2020Classification) o;
|
||||
|
||||
return Objects.equals(level1, h2020classification.level1) &&
|
||||
Objects.equals(level2, h2020classification.level2) &&
|
||||
Objects.equals(level3, h2020classification.level3) &&
|
||||
Objects.equals(classification, h2020classification.classification) &&
|
||||
h2020Programme.equals(h2020classification.h2020Programme);
|
||||
}
|
||||
}
|
|
@ -4,7 +4,13 @@ package eu.dnetlib.dhp.schema.oaf;
|
|||
import java.io.Serializable;
|
||||
import java.util.Objects;
|
||||
|
||||
public class Programme implements Serializable {
|
||||
/**
|
||||
* To store information about the ec programme for the project. It has the following parameters:
|
||||
* - private String code to store the code of the programme
|
||||
* - private String description to store the description of the programme
|
||||
*/
|
||||
|
||||
public class H2020Programme implements Serializable {
|
||||
private String code;
|
||||
private String description;
|
||||
|
||||
|
@ -31,8 +37,8 @@ public class Programme implements Serializable {
|
|||
if (o == null || getClass() != o.getClass())
|
||||
return false;
|
||||
|
||||
Programme programme = (Programme) o;
|
||||
return Objects.equals(code, programme.code);
|
||||
H2020Programme h2020Programme = (H2020Programme) o;
|
||||
return Objects.equals(code, h2020Programme.code);
|
||||
}
|
||||
|
||||
}
|
|
@ -58,7 +58,35 @@ public class Project extends OafEntity implements Serializable {
|
|||
|
||||
private Float fundedamount;
|
||||
|
||||
private List<Programme> programme;
|
||||
private String h2020topiccode;
|
||||
|
||||
private String h2020topicdescription;
|
||||
|
||||
private List<H2020Classification> h2020classification;
|
||||
|
||||
public String getH2020topicdescription() {
|
||||
return h2020topicdescription;
|
||||
}
|
||||
|
||||
public void setH2020topicdescription(String h2020topicdescription) {
|
||||
this.h2020topicdescription = h2020topicdescription;
|
||||
}
|
||||
|
||||
public String getH2020topiccode() {
|
||||
return h2020topiccode;
|
||||
}
|
||||
|
||||
public void setH2020topiccode(String h2020topiccode) {
|
||||
this.h2020topiccode = h2020topiccode;
|
||||
}
|
||||
|
||||
public List<H2020Classification> getH2020classification() {
|
||||
return h2020classification;
|
||||
}
|
||||
|
||||
public void setH2020classification(List<H2020Classification> h2020classification) {
|
||||
this.h2020classification = h2020classification;
|
||||
}
|
||||
|
||||
public Field<String> getWebsiteurl() {
|
||||
return websiteurl;
|
||||
|
@ -268,14 +296,6 @@ public class Project extends OafEntity implements Serializable {
|
|||
this.fundedamount = fundedamount;
|
||||
}
|
||||
|
||||
public List<Programme> getProgramme() {
|
||||
return programme;
|
||||
}
|
||||
|
||||
public void setProgramme(List<Programme> programme) {
|
||||
this.programme = programme;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void mergeFrom(OafEntity e) {
|
||||
super.mergeFrom(e);
|
||||
|
@ -331,7 +351,9 @@ public class Project extends OafEntity implements Serializable {
|
|||
? p.getFundedamount()
|
||||
: fundedamount;
|
||||
|
||||
programme = mergeLists(programme, p.getProgramme());
|
||||
// programme = mergeLists(programme, p.getProgramme());
|
||||
|
||||
h2020classification = mergeLists(h2020classification, p.getH2020classification());
|
||||
|
||||
mergeOAFDataInfo(e);
|
||||
}
|
||||
|
|
|
@ -7,6 +7,8 @@ import java.util.Comparator;
|
|||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import eu.dnetlib.dhp.schema.common.LicenseComparator;
|
||||
|
||||
public class Result extends OafEntity implements Serializable {
|
||||
|
||||
private List<Measure> measures;
|
||||
|
@ -245,7 +247,8 @@ public class Result extends OafEntity implements Serializable {
|
|||
|
||||
instance = mergeLists(instance, r.getInstance());
|
||||
|
||||
if (r.getBestaccessright() != null && compareTrust(this, r) < 0)
|
||||
if (r.getBestaccessright() != null
|
||||
&& new LicenseComparator().compare(r.getBestaccessright(), bestaccessright) < 0)
|
||||
bestaccessright = r.getBestaccessright();
|
||||
|
||||
if (r.getResulttype() != null && compareTrust(this, r) < 0)
|
||||
|
|
|
@ -1,7 +1,14 @@
|
|||
|
||||
package eu.dnetlib.doiboost.orcid.model;
|
||||
package eu.dnetlib.dhp.schema.orcid;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
/**
|
||||
* This class models the data that are retrieved from orcid publication
|
||||
*/
|
||||
|
||||
public class AuthorData implements Serializable {
|
||||
|
||||
|
@ -10,6 +17,7 @@ public class AuthorData implements Serializable {
|
|||
private String surname;
|
||||
private String creditName;
|
||||
private String errorCode;
|
||||
private List<String> otherNames;
|
||||
|
||||
public String getErrorCode() {
|
||||
return errorCode;
|
||||
|
@ -50,4 +58,15 @@ public class AuthorData implements Serializable {
|
|||
public void setOid(String oid) {
|
||||
this.oid = oid;
|
||||
}
|
||||
|
||||
public List<String> getOtherNames() {
|
||||
return otherNames;
|
||||
}
|
||||
|
||||
public void setOtherNames(List<String> otherNames) {
|
||||
if (this.otherNames == null) {
|
||||
this.otherNames = Lists.newArrayList();
|
||||
}
|
||||
this.otherNames = otherNames;
|
||||
}
|
||||
}
|
|
@ -1,30 +0,0 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.scholexplorer;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
|
||||
public class DLIRelation extends Relation {
|
||||
|
||||
private String dateOfCollection;
|
||||
|
||||
private List<KeyValue> collectedFrom;
|
||||
|
||||
public List<KeyValue> getCollectedFrom() {
|
||||
return collectedFrom;
|
||||
}
|
||||
|
||||
public void setCollectedFrom(List<KeyValue> collectedFrom) {
|
||||
this.collectedFrom = collectedFrom;
|
||||
}
|
||||
|
||||
public String getDateOfCollection() {
|
||||
return dateOfCollection;
|
||||
}
|
||||
|
||||
public void setDateOfCollection(String dateOfCollection) {
|
||||
this.dateOfCollection = dateOfCollection;
|
||||
}
|
||||
}
|
|
@ -2,10 +2,8 @@
|
|||
package eu.dnetlib.dhp.schema.scholexplorer;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
|
@ -78,6 +76,25 @@ public class DLIUnknown extends Oaf implements Serializable {
|
|||
if ("complete".equalsIgnoreCase(p.completionStatus))
|
||||
completionStatus = "complete";
|
||||
dlicollectedfrom = mergeProvenance(dlicollectedfrom, p.getDlicollectedfrom());
|
||||
if (StringUtils.isEmpty(id) && StringUtils.isNoneEmpty(p.getId()))
|
||||
id = p.getId();
|
||||
if (StringUtils.isEmpty(dateofcollection) && StringUtils.isNoneEmpty(p.getDateofcollection()))
|
||||
dateofcollection = p.getDateofcollection();
|
||||
|
||||
if (StringUtils.isEmpty(dateoftransformation) && StringUtils.isNoneEmpty(p.getDateoftransformation()))
|
||||
dateofcollection = p.getDateoftransformation();
|
||||
pid = mergeLists(pid, p.getPid());
|
||||
}
|
||||
|
||||
protected <T> List<T> mergeLists(final List<T>... lists) {
|
||||
|
||||
return Arrays
|
||||
.stream(lists)
|
||||
.filter(Objects::nonNull)
|
||||
.flatMap(List::stream)
|
||||
.filter(Objects::nonNull)
|
||||
.distinct()
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
private List<ProvenaceInfo> mergeProvenance(
|
||||
|
|
|
@ -0,0 +1,90 @@
|
|||
package eu.dnetlib.dhp.schema.scholexplorer
|
||||
|
||||
import eu.dnetlib.dhp.schema.oaf.{DataInfo, Field, KeyValue, Qualifier, StructuredProperty}
|
||||
|
||||
object OafUtils {
|
||||
|
||||
|
||||
|
||||
def generateKeyValue(key: String, value: String): KeyValue = {
|
||||
val kv: KeyValue = new KeyValue()
|
||||
kv.setKey(key)
|
||||
kv.setValue(value)
|
||||
kv.setDataInfo(generateDataInfo("0.9"))
|
||||
kv
|
||||
}
|
||||
|
||||
|
||||
def generateDataInfo(trust: String = "0.9", invisibile: Boolean = false): DataInfo = {
|
||||
val di = new DataInfo
|
||||
di.setDeletedbyinference(false)
|
||||
di.setInferred(false)
|
||||
di.setInvisible(false)
|
||||
di.setTrust(trust)
|
||||
di.setProvenanceaction(createQualifier("sysimport:actionset", "dnet:provenanceActions"))
|
||||
di
|
||||
}
|
||||
|
||||
def createQualifier(cls: String, sch: String): Qualifier = {
|
||||
createQualifier(cls, cls, sch, sch)
|
||||
}
|
||||
|
||||
|
||||
def createQualifier(classId: String, className: String, schemeId: String, schemeName: String): Qualifier = {
|
||||
val q: Qualifier = new Qualifier
|
||||
q.setClassid(classId)
|
||||
q.setClassname(className)
|
||||
q.setSchemeid(schemeId)
|
||||
q.setSchemename(schemeName)
|
||||
q
|
||||
}
|
||||
|
||||
|
||||
def asField[T](value: T): Field[T] = {
|
||||
val tmp = new Field[T]
|
||||
tmp.setValue(value)
|
||||
tmp
|
||||
|
||||
|
||||
}
|
||||
|
||||
def createSP(value: String, classId: String,className:String, schemeId: String, schemeName:String): StructuredProperty = {
|
||||
val sp = new StructuredProperty
|
||||
sp.setQualifier(createQualifier(classId,className, schemeId, schemeName))
|
||||
sp.setValue(value)
|
||||
sp
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
def createSP(value: String, classId: String,className:String, schemeId: String, schemeName:String, dataInfo: DataInfo): StructuredProperty = {
|
||||
val sp = new StructuredProperty
|
||||
sp.setQualifier(createQualifier(classId,className, schemeId, schemeName))
|
||||
sp.setValue(value)
|
||||
sp.setDataInfo(dataInfo)
|
||||
sp
|
||||
|
||||
}
|
||||
|
||||
def createSP(value: String, classId: String, schemeId: String): StructuredProperty = {
|
||||
val sp = new StructuredProperty
|
||||
sp.setQualifier(createQualifier(classId, schemeId))
|
||||
sp.setValue(value)
|
||||
sp
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
def createSP(value: String, classId: String, schemeId: String, dataInfo: DataInfo): StructuredProperty = {
|
||||
val sp = new StructuredProperty
|
||||
sp.setQualifier(createQualifier(classId, schemeId))
|
||||
sp.setValue(value)
|
||||
sp.setDataInfo(dataInfo)
|
||||
sp
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
|
@ -66,6 +66,19 @@
|
|||
</dependency>
|
||||
|
||||
|
||||
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml -->
|
||||
<dependency>
|
||||
<groupId>org.apache.poi</groupId>
|
||||
<artifactId>poi-ooxml</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- https://mvnrepository.com/artifact/org.apache.commons/commons-compress -->
|
||||
<dependency>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
<artifactId>commons-compress</artifactId>
|
||||
</dependency>
|
||||
|
||||
|
||||
|
||||
</dependencies>
|
||||
|
||||
|
|
|
@ -4,11 +4,15 @@ package eu.dnetlib.dhp.actionmanager.project;
|
|||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.sql.*;
|
||||
import org.slf4j.Logger;
|
||||
|
@ -16,11 +20,79 @@ import org.slf4j.LoggerFactory;
|
|||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.actionmanager.project.csvutils.CSVProgramme;
|
||||
import eu.dnetlib.dhp.actionmanager.project.utils.CSVProgramme;
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||
import scala.Tuple2;
|
||||
|
||||
/**
|
||||
* Among all the programmes provided in the csv file, selects those in H2020 framework that have an english title.
|
||||
*
|
||||
* The title is then handled to get the programme description at a certain level. The set of programme titles will then
|
||||
* be used to associate a classification for the programme.
|
||||
*
|
||||
* The programme code describes an hierarchy that can be exploited to provide the classification. To determine the hierarchy
|
||||
* the code can be split by '.'. If the length of the splitted code is less than or equal to 2 it can be directly used
|
||||
* as the classification: H2020-EU -> Horizon 2020 Framework Programme (It will never be repeated),
|
||||
* H2020-EU.1. -> Excellent science, H2020-EU.2. -> Industrial leadership etc.
|
||||
*
|
||||
* The codes are ordered and for all of them the concatenation of all the titles (from the element in position 1 of
|
||||
* the splitted code) handled as below is used to create the classification. For example:
|
||||
*
|
||||
* H2020-EU.1.1 -> Excellent science | European Research Council (ERC)
|
||||
* from H2020-EU.1. -> Excellence science and H2020-EU.1.1. -> European Research Council (ERC)
|
||||
*
|
||||
* H2020-EU.3.1.3.1. -> Societal challenges | Health, demographic change and well-being | Treating and managing disease | Treating disease, including developing regenerative medicine
|
||||
* from H2020-EU.3. -> Societal challenges,
|
||||
* H2020-EU.3.1. -> Health, demographic change and well-being
|
||||
* H2020-EU.3.1.3 -> Treating and managing disease
|
||||
* H2020-EU.3.1.3.1. -> Treating disease, including developing regenerative medicine
|
||||
*
|
||||
* The classification up to level three, will be split in dedicated variables, while the complete classification will be stored
|
||||
* in a variable called classification and provided as shown above.
|
||||
*
|
||||
* The programme title is not give in a standardized way:
|
||||
*
|
||||
* - Sometimes associated to the higher level in the hierarchy we can find Priority in title other times it is not the
|
||||
* case. Since it is not uniform, we removed priority from the handled titles:
|
||||
*
|
||||
* H2020-EU.1. -> PRIORITY 'Excellent science'
|
||||
* H2020-EU.2. -> PRIORITY 'Industrial leadership'
|
||||
* H2020-EU.3. -> PRIORITY 'Societal challenges
|
||||
*
|
||||
* will become
|
||||
*
|
||||
* H2020-EU.1. -> Excellent science
|
||||
* H2020-EU.2. -> Industrial leadership
|
||||
* H2020-EU.3. -> Societal challenges
|
||||
*
|
||||
* - Sometimes the title of the parent is repeated in the title for the code, but it is not always the case, so, titles
|
||||
* associated to previous levels in the hierarchy are removed from the code title.
|
||||
*
|
||||
* H2020-EU.1.2. -> EXCELLENT SCIENCE - Future and Emerging Technologies (FET)
|
||||
* H2020-EU.2.2. -> INDUSTRIAL LEADERSHIP - Access to risk finance
|
||||
* H2020-EU.3.4. -> SOCIETAL CHALLENGES - Smart, Green And Integrated Transport
|
||||
*
|
||||
* will become
|
||||
*
|
||||
* H2020-EU.1.2. -> Future and Emerging Technologies (FET)
|
||||
* H2020-EU.2.2. -> Access to risk finance
|
||||
* H2020-EU.3.4. -> Smart, Green And Integrated Transport
|
||||
*
|
||||
* This holds at all levels in the hierarchy. Hence
|
||||
*
|
||||
* H2020-EU.2.1.2. -> INDUSTRIAL LEADERSHIP - Leadership in enabling and industrial technologies – Nanotechnologies
|
||||
*
|
||||
* will become
|
||||
*
|
||||
* H2020-EU.2.1.2. -> Nanotechnologies
|
||||
*
|
||||
* - Euratom is not given in the way the other programmes are: H2020-EU. but H2020-Euratom- . So we need to write
|
||||
* specific code for it
|
||||
*
|
||||
*
|
||||
*
|
||||
*/
|
||||
public class PrepareProgramme {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(PrepareProgramme.class);
|
||||
|
@ -69,49 +141,149 @@ public class PrepareProgramme {
|
|||
private static void exec(SparkSession spark, String programmePath, String outputPath) {
|
||||
Dataset<CSVProgramme> programme = readPath(spark, programmePath, CSVProgramme.class);
|
||||
|
||||
programme
|
||||
JavaRDD<CSVProgramme> h2020Programmes = programme
|
||||
.toJavaRDD()
|
||||
.filter(p -> !p.getCode().contains("FP7"))
|
||||
.filter(p -> p.getFrameworkProgramme().trim().equalsIgnoreCase("H2020"))
|
||||
.mapToPair(csvProgramme -> new Tuple2<>(csvProgramme.getCode(), csvProgramme))
|
||||
.reduceByKey((a, b) -> {
|
||||
if (StringUtils.isEmpty(a.getShortTitle())) {
|
||||
if (StringUtils.isEmpty(b.getShortTitle())) {
|
||||
if (StringUtils.isEmpty(a.getTitle())) {
|
||||
if (StringUtils.isNotEmpty(b.getTitle())) {
|
||||
a.setShortTitle(b.getTitle());
|
||||
a.setLanguage(b.getLanguage());
|
||||
}
|
||||
} else {// notIsEmpty a.getTitle
|
||||
if (StringUtils.isEmpty(b.getTitle())) {
|
||||
a.setShortTitle(a.getTitle());
|
||||
} else {
|
||||
if (b.getLanguage().equalsIgnoreCase("en")) {
|
||||
a.setShortTitle(b.getTitle());
|
||||
a.setLanguage(b.getLanguage());
|
||||
} else {
|
||||
a.setShortTitle(a.getTitle());
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {// not isEmpty b.getShortTitle
|
||||
a.setShortTitle(b.getShortTitle());
|
||||
// a.setLanguage(b.getLanguage());
|
||||
if (!a.getLanguage().equals("en")) {
|
||||
if (b.getLanguage().equalsIgnoreCase("en")) {
|
||||
a.setTitle(b.getTitle());
|
||||
a.setLanguage(b.getLanguage());
|
||||
}
|
||||
}
|
||||
if (StringUtils.isEmpty(a.getShortTitle())) {
|
||||
if (!StringUtils.isEmpty(b.getShortTitle())) {
|
||||
a.setShortTitle(b.getShortTitle());
|
||||
}
|
||||
}
|
||||
|
||||
return a;
|
||||
|
||||
})
|
||||
.map(p -> {
|
||||
CSVProgramme csvProgramme = p._2();
|
||||
if (StringUtils.isEmpty(csvProgramme.getShortTitle())) {
|
||||
csvProgramme.setShortTitle(csvProgramme.getTitle());
|
||||
String programmeTitle = csvProgramme.getTitle().trim();
|
||||
if (programmeTitle.length() > 8 && programmeTitle.substring(0, 8).equalsIgnoreCase("PRIORITY")) {
|
||||
programmeTitle = programmeTitle.substring(9);
|
||||
if (programmeTitle.charAt(0) == '\'') {
|
||||
programmeTitle = programmeTitle.substring(1);
|
||||
}
|
||||
if (programmeTitle.charAt(programmeTitle.length() - 1) == '\'') {
|
||||
programmeTitle = programmeTitle.substring(0, programmeTitle.length() - 1);
|
||||
}
|
||||
csvProgramme.setTitle(programmeTitle);
|
||||
}
|
||||
return OBJECT_MAPPER.writeValueAsString(csvProgramme);
|
||||
return csvProgramme;
|
||||
});
|
||||
|
||||
// prepareClassification(h2020Programmes);
|
||||
|
||||
JavaSparkContext jsc = new JavaSparkContext(spark.sparkContext());
|
||||
|
||||
JavaRDD<CSVProgramme> rdd = jsc.parallelize(prepareClassification(h2020Programmes), 1);
|
||||
rdd
|
||||
.map(csvProgramme -> {
|
||||
String tmp = OBJECT_MAPPER.writeValueAsString(csvProgramme);
|
||||
return tmp;
|
||||
})
|
||||
.saveAsTextFile(outputPath);
|
||||
|
||||
}
|
||||
|
||||
private static List<CSVProgramme> prepareClassification(JavaRDD<CSVProgramme> h2020Programmes) {
|
||||
Object[] codedescription = h2020Programmes
|
||||
.map(
|
||||
value -> new Tuple2<>(value.getCode(),
|
||||
new Tuple2<String, String>(value.getTitle(), value.getShortTitle())))
|
||||
.collect()
|
||||
.toArray();
|
||||
|
||||
for (int i = 0; i < codedescription.length - 1; i++) {
|
||||
for (int j = i + 1; j < codedescription.length; j++) {
|
||||
Tuple2<String, Tuple2<String, String>> t2i = (Tuple2<String, Tuple2<String, String>>) codedescription[i];
|
||||
Tuple2<String, Tuple2<String, String>> t2j = (Tuple2<String, Tuple2<String, String>>) codedescription[j];
|
||||
if (t2i._1().compareTo(t2j._1()) > 0) {
|
||||
Tuple2<String, Tuple2<String, String>> temp = t2i;
|
||||
codedescription[i] = t2j;
|
||||
codedescription[j] = temp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Map<String, Tuple2<String, String>> map = new HashMap<>();
|
||||
for (int j = 0; j < codedescription.length; j++) {
|
||||
Tuple2<String, Tuple2<String, String>> entry = (Tuple2<String, Tuple2<String, String>>) codedescription[j];
|
||||
String ent = entry._1();
|
||||
if (ent.contains("Euratom-")) {
|
||||
ent = ent.replace("-Euratom-", ".Euratom.");
|
||||
}
|
||||
String[] tmp = ent.split("\\.");
|
||||
if (tmp.length <= 2) {
|
||||
if (StringUtils.isEmpty(entry._2()._2())) {
|
||||
map.put(entry._1(), new Tuple2<String, String>(entry._2()._1(), entry._2()._1()));
|
||||
} else {
|
||||
map.put(entry._1(), entry._2());
|
||||
}
|
||||
} else {
|
||||
if (ent.endsWith(".")) {
|
||||
ent = ent.substring(0, ent.length() - 1);
|
||||
}
|
||||
String key = ent.substring(0, ent.lastIndexOf(".") + 1);
|
||||
if (key.contains("Euratom")) {
|
||||
key = key.replace(".Euratom.", "-Euratom-");
|
||||
ent = ent.replace(".Euratom.", "-Euratom-");
|
||||
if (key.endsWith("-")) {
|
||||
key = key.substring(0, key.length() - 1);
|
||||
}
|
||||
}
|
||||
String current = entry._2()._1();
|
||||
if (!ent.contains("Euratom")) {
|
||||
|
||||
String parent;
|
||||
String tmp_key = tmp[0] + ".";
|
||||
for (int i = 1; i < tmp.length - 1; i++) {
|
||||
tmp_key += tmp[i] + ".";
|
||||
parent = map.get(tmp_key)._1().toLowerCase().trim();
|
||||
if (parent.contains("|")) {
|
||||
parent = parent.substring(parent.lastIndexOf("|") + 1).trim();
|
||||
}
|
||||
if (current.trim().length() > parent.length()
|
||||
&& current.toLowerCase().trim().substring(0, parent.length()).equals(parent)) {
|
||||
current = current.substring(parent.length() + 1);
|
||||
if (current.trim().charAt(0) == '-' || current.trim().charAt(0) == '–') {
|
||||
current = current.trim().substring(1).trim();
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
String shortTitle = entry._2()._2();
|
||||
if (StringUtils.isEmpty(shortTitle)) {
|
||||
shortTitle = current;
|
||||
}
|
||||
Tuple2<String, String> newEntry = new Tuple2<>(map.get(key)._1() + " | " + current,
|
||||
map.get(key)._2() + " | " + shortTitle);
|
||||
map.put(ent + ".", newEntry);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
return h2020Programmes.map(csvProgramme -> {
|
||||
|
||||
String code = csvProgramme.getCode();
|
||||
if (!code.endsWith(".") && !code.contains("Euratom")
|
||||
&& !code.equals("H2020-EC"))
|
||||
code += ".";
|
||||
|
||||
csvProgramme.setClassification(map.get(code)._1());
|
||||
csvProgramme.setClassification_short(map.get(code)._2());
|
||||
|
||||
return csvProgramme;
|
||||
}).collect();
|
||||
}
|
||||
|
||||
public static <R> Dataset<R> readPath(
|
||||
SparkSession spark, String inputPath, Class<R> clazz) {
|
||||
return spark
|
||||
|
|
|
@ -6,9 +6,7 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
|||
import java.util.*;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.api.java.function.FlatMapFunction;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
|
@ -20,12 +18,16 @@ import org.slf4j.LoggerFactory;
|
|||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.actionmanager.project.csvutils.CSVProgramme;
|
||||
import eu.dnetlib.dhp.actionmanager.project.csvutils.CSVProject;
|
||||
import eu.dnetlib.dhp.actionmanager.project.utils.CSVProgramme;
|
||||
import eu.dnetlib.dhp.actionmanager.project.utils.CSVProject;
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||
import scala.Tuple2;
|
||||
|
||||
/**
|
||||
* Selects only the relevant information collected with the projects: project grant agreement, project programme code and
|
||||
* project topic code for the projects that are also collected from OpenAIRE.
|
||||
*/
|
||||
public class PrepareProjects {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(PrepareProgramme.class);
|
||||
|
@ -97,10 +99,14 @@ public class PrepareProjects {
|
|||
if (csvProject.isPresent()) {
|
||||
|
||||
String[] programme = csvProject.get().getProgramme().split(";");
|
||||
String topic = csvProject.get().getTopics();
|
||||
|
||||
Arrays
|
||||
.stream(programme)
|
||||
.forEach(p -> {
|
||||
CSVProject proj = new CSVProject();
|
||||
proj.setTopics(topic);
|
||||
|
||||
proj.setProgramme(p);
|
||||
proj.setId(csvProject.get().getId());
|
||||
csvProjectList.add(proj);
|
||||
|
|
|
@ -3,6 +3,9 @@ package eu.dnetlib.dhp.actionmanager.project;
|
|||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* Class to store the grande agreement (code) of the collected projects
|
||||
*/
|
||||
public class ProjectSubset implements Serializable {
|
||||
|
||||
private String code;
|
||||
|
@ -14,4 +17,5 @@ public class ProjectSubset implements Serializable {
|
|||
public void setCode(String code) {
|
||||
this.code = code;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -25,6 +25,10 @@ import com.fasterxml.jackson.databind.ObjectMapper;
|
|||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.common.DbClient;
|
||||
|
||||
/**
|
||||
* queries the OpenAIRE database to get the grant agreement of projects collected from corda__h2020. The code collected
|
||||
* are written on hdfs using the ProjectSubset model
|
||||
*/
|
||||
public class ReadProjectsFromDB implements Closeable {
|
||||
|
||||
private final DbClient dbClient;
|
||||
|
@ -33,7 +37,7 @@ public class ReadProjectsFromDB implements Closeable {
|
|||
private final BufferedWriter writer;
|
||||
private final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||
|
||||
private final static String query = "SELECT code " +
|
||||
private final static String query = "SELECT code " +
|
||||
"from projects where id like 'corda__h2020%' ";
|
||||
|
||||
public static void main(final String[] args) throws Exception {
|
||||
|
@ -72,7 +76,6 @@ public class ReadProjectsFromDB implements Closeable {
|
|||
try {
|
||||
ProjectSubset p = new ProjectSubset();
|
||||
p.setCode(rs.getString("code"));
|
||||
|
||||
return Arrays.asList(p);
|
||||
|
||||
} catch (final Exception e) {
|
||||
|
|
|
@ -3,47 +3,53 @@ package eu.dnetlib.dhp.actionmanager.project;
|
|||
|
||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
import java.util.function.Consumer;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.io.IntWritable;
|
||||
import org.apache.hadoop.io.SequenceFile;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
|
||||
import org.apache.hadoop.mapred.TextOutputFormat;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.api.java.function.MapGroupsFunction;
|
||||
import org.apache.spark.rdd.SequenceFileRDDFunctions;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.SaveMode;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.actionmanager.project.csvutils.CSVProgramme;
|
||||
import eu.dnetlib.dhp.actionmanager.project.csvutils.CSVProject;
|
||||
import eu.dnetlib.dhp.actionmanager.project.utils.CSVProgramme;
|
||||
import eu.dnetlib.dhp.actionmanager.project.utils.CSVProject;
|
||||
import eu.dnetlib.dhp.actionmanager.project.utils.EXCELTopic;
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||
import eu.dnetlib.dhp.schema.action.AtomicAction;
|
||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||
import eu.dnetlib.dhp.schema.oaf.Programme;
|
||||
import eu.dnetlib.dhp.schema.oaf.H2020Classification;
|
||||
import eu.dnetlib.dhp.schema.oaf.H2020Programme;
|
||||
import eu.dnetlib.dhp.schema.oaf.Project;
|
||||
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||
import scala.Function1;
|
||||
import scala.Tuple2;
|
||||
import scala.runtime.BoxedUnit;
|
||||
|
||||
/**
|
||||
* Class that makes the ActionSet. To prepare the AS two joins are needed
|
||||
*
|
||||
* 1. join betweem the collected project subset and the programme extenden with the classification on the grant agreement.
|
||||
* For each entry a
|
||||
* eu.dnetlib.dhp.Project entity is created and the information about H2020Classification is set together with the
|
||||
* h2020topiccode variable
|
||||
* 2. join between the output of the previous step and the topic information on the topic code. Each time a match is
|
||||
* found the h2020topicdescription variable is set.
|
||||
*
|
||||
* To produce one single entry for each project code a step of groupoing is needed: each project can be associated to more
|
||||
* than one programme.
|
||||
*
|
||||
*
|
||||
*/
|
||||
public class SparkAtomicActionJob {
|
||||
private static final Logger log = LoggerFactory.getLogger(SparkAtomicActionJob.class);
|
||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||
|
@ -77,6 +83,9 @@ public class SparkAtomicActionJob {
|
|||
final String programmePath = parser.get("programmePath");
|
||||
log.info("programmePath {}: ", programmePath);
|
||||
|
||||
final String topicPath = parser.get("topicPath");
|
||||
log.info("topic path {}: ", topicPath);
|
||||
|
||||
SparkConf conf = new SparkConf();
|
||||
|
||||
runWithSparkSession(
|
||||
|
@ -88,6 +97,7 @@ public class SparkAtomicActionJob {
|
|||
spark,
|
||||
projectPath,
|
||||
programmePath,
|
||||
topicPath,
|
||||
outputPath);
|
||||
});
|
||||
}
|
||||
|
@ -98,31 +108,54 @@ public class SparkAtomicActionJob {
|
|||
|
||||
private static void getAtomicActions(SparkSession spark, String projectPatH,
|
||||
String programmePath,
|
||||
String topicPath,
|
||||
String outputPath) {
|
||||
|
||||
Dataset<CSVProject> project = readPath(spark, projectPatH, CSVProject.class);
|
||||
Dataset<CSVProgramme> programme = readPath(spark, programmePath, CSVProgramme.class);
|
||||
Dataset<EXCELTopic> topic = readPath(spark, topicPath, EXCELTopic.class);
|
||||
|
||||
project
|
||||
Dataset<Project> aaproject = project
|
||||
.joinWith(programme, project.col("programme").equalTo(programme.col("code")), "left")
|
||||
.map(c -> {
|
||||
CSVProject csvProject = c._1();
|
||||
Optional<CSVProgramme> csvProgramme = Optional.ofNullable(c._2());
|
||||
if (csvProgramme.isPresent()) {
|
||||
Project p = new Project();
|
||||
p
|
||||
.setId(
|
||||
createOpenaireId(
|
||||
ModelSupport.entityIdPrefix.get("project"),
|
||||
"corda__h2020", csvProject.getId()));
|
||||
Programme pm = new Programme();
|
||||
pm.setCode(csvProject.getProgramme());
|
||||
pm.setDescription(csvProgramme.get().getShortTitle());
|
||||
p.setProgramme(Arrays.asList(pm));
|
||||
return p;
|
||||
}
|
||||
.map((MapFunction<Tuple2<CSVProject, CSVProgramme>, Project>) c -> {
|
||||
|
||||
return null;
|
||||
CSVProject csvProject = c._1();
|
||||
Optional<CSVProgramme> ocsvProgramme = Optional.ofNullable(c._2());
|
||||
|
||||
return Optional
|
||||
.ofNullable(c._2())
|
||||
.map(csvProgramme -> {
|
||||
Project pp = new Project();
|
||||
pp
|
||||
.setId(
|
||||
createOpenaireId(
|
||||
ModelSupport.entityIdPrefix.get("project"),
|
||||
"corda__h2020", csvProject.getId()));
|
||||
pp.setH2020topiccode(csvProject.getTopics());
|
||||
H2020Programme pm = new H2020Programme();
|
||||
H2020Classification h2020classification = new H2020Classification();
|
||||
pm.setCode(csvProject.getProgramme());
|
||||
h2020classification.setClassification(ocsvProgramme.get().getClassification());
|
||||
h2020classification.setH2020Programme(pm);
|
||||
setLevelsandProgramme(h2020classification, ocsvProgramme.get().getClassification_short());
|
||||
// setProgramme(h2020classification, ocsvProgramme.get().getClassification());
|
||||
pp.setH2020classification(Arrays.asList(h2020classification));
|
||||
|
||||
return pp;
|
||||
})
|
||||
.orElse(null);
|
||||
|
||||
}, Encoders.bean(Project.class));
|
||||
|
||||
aaproject
|
||||
.joinWith(topic, aaproject.col("h2020topiccode").equalTo(topic.col("code")))
|
||||
.map((MapFunction<Tuple2<Project, EXCELTopic>, Project>) p -> {
|
||||
Optional<EXCELTopic> op = Optional.ofNullable(p._2());
|
||||
Project rp = p._1();
|
||||
if (op.isPresent()) {
|
||||
rp.setH2020topicdescription(op.get().getTitle());
|
||||
}
|
||||
return rp;
|
||||
}, Encoders.bean(Project.class))
|
||||
.filter(Objects::nonNull)
|
||||
.groupByKey(
|
||||
|
@ -144,6 +177,24 @@ public class SparkAtomicActionJob {
|
|||
|
||||
}
|
||||
|
||||
private static void setLevelsandProgramme(H2020Classification h2020Classification, String classification_short) {
|
||||
String[] tmp = classification_short.split(" \\| ");
|
||||
h2020Classification.setLevel1(tmp[0]);
|
||||
if (tmp.length > 1) {
|
||||
h2020Classification.setLevel2(tmp[1]);
|
||||
}
|
||||
if (tmp.length > 2) {
|
||||
h2020Classification.setLevel3(tmp[2]);
|
||||
}
|
||||
h2020Classification.getH2020Programme().setDescription(tmp[tmp.length - 1]);
|
||||
}
|
||||
|
||||
// private static void setProgramme(H2020Classification h2020Classification, String classification) {
|
||||
// String[] tmp = classification.split(" \\| ");
|
||||
//
|
||||
// h2020Classification.getH2020Programme().setDescription(tmp[tmp.length - 1]);
|
||||
// }
|
||||
|
||||
public static <R> Dataset<R> readPath(
|
||||
SparkSession spark, String inputPath, Class<R> clazz) {
|
||||
return spark
|
||||
|
|
|
@ -1,52 +0,0 @@
|
|||
|
||||
package eu.dnetlib.dhp.actionmanager.project.csvutils;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
public class CSVProgramme implements Serializable {
|
||||
private String rcn;
|
||||
private String code;
|
||||
private String title;
|
||||
private String shortTitle;
|
||||
private String language;
|
||||
|
||||
public String getRcn() {
|
||||
return rcn;
|
||||
}
|
||||
|
||||
public void setRcn(String rcn) {
|
||||
this.rcn = rcn;
|
||||
}
|
||||
|
||||
public String getCode() {
|
||||
return code;
|
||||
}
|
||||
|
||||
public void setCode(String code) {
|
||||
this.code = code;
|
||||
}
|
||||
|
||||
public String getTitle() {
|
||||
return title;
|
||||
}
|
||||
|
||||
public void setTitle(String title) {
|
||||
this.title = title;
|
||||
}
|
||||
|
||||
public String getShortTitle() {
|
||||
return shortTitle;
|
||||
}
|
||||
|
||||
public void setShortTitle(String shortTitle) {
|
||||
this.shortTitle = shortTitle;
|
||||
}
|
||||
|
||||
public String getLanguage() {
|
||||
return language;
|
||||
}
|
||||
|
||||
public void setLanguage(String language) {
|
||||
this.language = language;
|
||||
}
|
||||
}
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
package eu.dnetlib.dhp.actionmanager.project.csvutils;
|
||||
package eu.dnetlib.dhp.actionmanager.project.utils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
|
@ -10,6 +10,9 @@ import org.apache.commons.csv.CSVFormat;
|
|||
import org.apache.commons.csv.CSVRecord;
|
||||
import org.apache.commons.lang.reflect.FieldUtils;
|
||||
|
||||
/**
|
||||
* Reads a generic csv and maps it into classes that mirror its schema
|
||||
*/
|
||||
public class CSVParser {
|
||||
|
||||
public <R> List<R> parse(String csvFile, String classForName)
|
|
@ -0,0 +1,146 @@
|
|||
|
||||
package eu.dnetlib.dhp.actionmanager.project.utils;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* The model for the programme csv file
|
||||
*/
|
||||
public class CSVProgramme implements Serializable {
|
||||
private String parentProgramme;
|
||||
private String frameworkProgramme;
|
||||
private String startDate;
|
||||
private String endDate;
|
||||
private String objective;
|
||||
private String subjects;
|
||||
private String legalBasis;
|
||||
private String call;
|
||||
private String rcn;
|
||||
private String code;
|
||||
|
||||
private String title;
|
||||
private String shortTitle;
|
||||
private String language;
|
||||
private String classification;
|
||||
private String classification_short;
|
||||
|
||||
public String getClassification_short() {
|
||||
return classification_short;
|
||||
}
|
||||
|
||||
public void setClassification_short(String classification_short) {
|
||||
this.classification_short = classification_short;
|
||||
}
|
||||
|
||||
public String getClassification() {
|
||||
return classification;
|
||||
}
|
||||
|
||||
public void setClassification(String classification) {
|
||||
this.classification = classification;
|
||||
}
|
||||
|
||||
public String getRcn() {
|
||||
return rcn;
|
||||
}
|
||||
|
||||
public void setRcn(String rcn) {
|
||||
this.rcn = rcn;
|
||||
}
|
||||
|
||||
public String getCode() {
|
||||
return code;
|
||||
}
|
||||
|
||||
public void setCode(String code) {
|
||||
this.code = code;
|
||||
}
|
||||
|
||||
public String getTitle() {
|
||||
return title;
|
||||
}
|
||||
|
||||
public void setTitle(String title) {
|
||||
this.title = title;
|
||||
}
|
||||
|
||||
public String getShortTitle() {
|
||||
return shortTitle;
|
||||
}
|
||||
|
||||
public void setShortTitle(String shortTitle) {
|
||||
this.shortTitle = shortTitle;
|
||||
}
|
||||
|
||||
public String getLanguage() {
|
||||
return language;
|
||||
}
|
||||
|
||||
public void setLanguage(String language) {
|
||||
this.language = language;
|
||||
}
|
||||
|
||||
public String getParentProgramme() {
|
||||
return parentProgramme;
|
||||
}
|
||||
|
||||
public void setParentProgramme(String parentProgramme) {
|
||||
this.parentProgramme = parentProgramme;
|
||||
}
|
||||
|
||||
public String getFrameworkProgramme() {
|
||||
return frameworkProgramme;
|
||||
}
|
||||
|
||||
public void setFrameworkProgramme(String frameworkProgramme) {
|
||||
this.frameworkProgramme = frameworkProgramme;
|
||||
}
|
||||
|
||||
public String getStartDate() {
|
||||
return startDate;
|
||||
}
|
||||
|
||||
public void setStartDate(String startDate) {
|
||||
this.startDate = startDate;
|
||||
}
|
||||
|
||||
public String getEndDate() {
|
||||
return endDate;
|
||||
}
|
||||
|
||||
public void setEndDate(String endDate) {
|
||||
this.endDate = endDate;
|
||||
}
|
||||
|
||||
public String getObjective() {
|
||||
return objective;
|
||||
}
|
||||
|
||||
public void setObjective(String objective) {
|
||||
this.objective = objective;
|
||||
}
|
||||
|
||||
public String getSubjects() {
|
||||
return subjects;
|
||||
}
|
||||
|
||||
public void setSubjects(String subjects) {
|
||||
this.subjects = subjects;
|
||||
}
|
||||
|
||||
public String getLegalBasis() {
|
||||
return legalBasis;
|
||||
}
|
||||
|
||||
public void setLegalBasis(String legalBasis) {
|
||||
this.legalBasis = legalBasis;
|
||||
}
|
||||
|
||||
public String getCall() {
|
||||
return call;
|
||||
}
|
||||
|
||||
public void setCall(String call) {
|
||||
this.call = call;
|
||||
}
|
||||
}
|
|
@ -1,8 +1,11 @@
|
|||
|
||||
package eu.dnetlib.dhp.actionmanager.project.csvutils;
|
||||
package eu.dnetlib.dhp.actionmanager.project.utils;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* the mmodel for the projects csv file
|
||||
*/
|
||||
public class CSVProject implements Serializable {
|
||||
private String rcn;
|
||||
private String id;
|
|
@ -0,0 +1,75 @@
|
|||
|
||||
package eu.dnetlib.dhp.actionmanager.project.utils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.apache.commons.lang.reflect.FieldUtils;
|
||||
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
|
||||
import org.apache.poi.openxml4j.opc.OPCPackage;
|
||||
import org.apache.poi.ss.usermodel.Cell;
|
||||
import org.apache.poi.ss.usermodel.DataFormatter;
|
||||
import org.apache.poi.ss.usermodel.Row;
|
||||
import org.apache.poi.xssf.usermodel.XSSFSheet;
|
||||
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
|
||||
|
||||
/**
|
||||
* Reads a generic excel file and maps it into classes that mirror its schema
|
||||
*/
|
||||
public class EXCELParser {
|
||||
|
||||
public <R> List<R> parse(InputStream file, String classForName)
|
||||
throws ClassNotFoundException, IOException, IllegalAccessException, InstantiationException,
|
||||
InvalidFormatException {
|
||||
|
||||
// OPCPackage pkg = OPCPackage.open(httpConnector.getInputSourceAsStream(URL));
|
||||
OPCPackage pkg = OPCPackage.open(file);
|
||||
XSSFWorkbook wb = new XSSFWorkbook(pkg);
|
||||
|
||||
XSSFSheet sheet = wb.getSheet("cordisref-H2020topics");
|
||||
|
||||
List<R> ret = new ArrayList<>();
|
||||
|
||||
DataFormatter dataFormatter = new DataFormatter();
|
||||
Iterator<Row> rowIterator = sheet.rowIterator();
|
||||
List<String> headers = new ArrayList<>();
|
||||
int count = 0;
|
||||
while (rowIterator.hasNext()) {
|
||||
Row row = rowIterator.next();
|
||||
|
||||
if (count == 0) {
|
||||
Iterator<Cell> cellIterator = row.cellIterator();
|
||||
|
||||
while (cellIterator.hasNext()) {
|
||||
Cell cell = cellIterator.next();
|
||||
headers.add(dataFormatter.formatCellValue(cell));
|
||||
}
|
||||
} else {
|
||||
Class<?> clazz = Class.forName("eu.dnetlib.dhp.actionmanager.project.utils.EXCELTopic");
|
||||
final Object cc = clazz.newInstance();
|
||||
|
||||
for (int i = 0; i < headers.size(); i++) {
|
||||
Cell cell = row.getCell(i);
|
||||
String value = dataFormatter.formatCellValue(cell);
|
||||
FieldUtils.writeField(cc, headers.get(i), dataFormatter.formatCellValue(cell), true);
|
||||
|
||||
}
|
||||
|
||||
EXCELTopic et = (EXCELTopic) cc;
|
||||
if (StringUtils.isNotBlank(et.getRcn())) {
|
||||
ret.add((R) cc);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
count += 1;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,127 @@
|
|||
|
||||
package eu.dnetlib.dhp.actionmanager.project.utils;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* the model class for the topic excel file
|
||||
*/
|
||||
public class EXCELTopic implements Serializable {
|
||||
private String rcn;
|
||||
private String language;
|
||||
private String code;
|
||||
private String parentProgramme;
|
||||
private String frameworkProgramme;
|
||||
private String startDate;
|
||||
private String endDate;
|
||||
private String title;
|
||||
private String shortTitle;
|
||||
private String objective;
|
||||
private String subjects;
|
||||
private String legalBasis;
|
||||
private String call;
|
||||
|
||||
public String getRcn() {
|
||||
return rcn;
|
||||
}
|
||||
|
||||
public void setRcn(String rcn) {
|
||||
this.rcn = rcn;
|
||||
}
|
||||
|
||||
public String getLanguage() {
|
||||
return language;
|
||||
}
|
||||
|
||||
public void setLanguage(String language) {
|
||||
this.language = language;
|
||||
}
|
||||
|
||||
public String getCode() {
|
||||
return code;
|
||||
}
|
||||
|
||||
public void setCode(String code) {
|
||||
this.code = code;
|
||||
}
|
||||
|
||||
public String getParentProgramme() {
|
||||
return parentProgramme;
|
||||
}
|
||||
|
||||
public void setParentProgramme(String parentProgramme) {
|
||||
this.parentProgramme = parentProgramme;
|
||||
}
|
||||
|
||||
public String getFrameworkProgramme() {
|
||||
return frameworkProgramme;
|
||||
}
|
||||
|
||||
public void setFrameworkProgramme(String frameworkProgramme) {
|
||||
this.frameworkProgramme = frameworkProgramme;
|
||||
}
|
||||
|
||||
public String getStartDate() {
|
||||
return startDate;
|
||||
}
|
||||
|
||||
public void setStartDate(String startDate) {
|
||||
this.startDate = startDate;
|
||||
}
|
||||
|
||||
public String getEndDate() {
|
||||
return endDate;
|
||||
}
|
||||
|
||||
public void setEndDate(String endDate) {
|
||||
this.endDate = endDate;
|
||||
}
|
||||
|
||||
public String getTitle() {
|
||||
return title;
|
||||
}
|
||||
|
||||
public void setTitle(String title) {
|
||||
this.title = title;
|
||||
}
|
||||
|
||||
public String getShortTitle() {
|
||||
return shortTitle;
|
||||
}
|
||||
|
||||
public void setShortTitle(String shortTitle) {
|
||||
this.shortTitle = shortTitle;
|
||||
}
|
||||
|
||||
public String getObjective() {
|
||||
return objective;
|
||||
}
|
||||
|
||||
public void setObjective(String objective) {
|
||||
this.objective = objective;
|
||||
}
|
||||
|
||||
public String getSubjects() {
|
||||
return subjects;
|
||||
}
|
||||
|
||||
public void setSubjects(String subjects) {
|
||||
this.subjects = subjects;
|
||||
}
|
||||
|
||||
public String getLegalBasis() {
|
||||
return legalBasis;
|
||||
}
|
||||
|
||||
public void setLegalBasis(String legalBasis) {
|
||||
this.legalBasis = legalBasis;
|
||||
}
|
||||
|
||||
public String getCall() {
|
||||
return call;
|
||||
}
|
||||
|
||||
public void setCall(String call) {
|
||||
this.call = call;
|
||||
}
|
||||
}
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
package eu.dnetlib.dhp.actionmanager.project.csvutils;
|
||||
package eu.dnetlib.dhp.actionmanager.project.utils;
|
||||
|
||||
import java.io.BufferedWriter;
|
||||
import java.io.Closeable;
|
||||
|
@ -20,6 +20,9 @@ import com.fasterxml.jackson.databind.ObjectMapper;
|
|||
import eu.dnetlib.dhp.actionmanager.project.httpconnector.HttpConnector;
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
|
||||
/**
|
||||
* Applies the parsing of a csv file and writes the Serialization of it in hdfs
|
||||
*/
|
||||
public class ReadCSV implements Closeable {
|
||||
private static final Log log = LogFactory.getLog(ReadCSV.class);
|
||||
private final Configuration conf;
|
|
@ -0,0 +1,98 @@
|
|||
|
||||
package eu.dnetlib.dhp.actionmanager.project.utils;
|
||||
|
||||
import java.io.*;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.actionmanager.project.httpconnector.HttpConnector;
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
|
||||
/**
|
||||
* Applies the parsing of an excel file and writes the Serialization of it in hdfs
|
||||
*/
|
||||
|
||||
public class ReadExcel implements Closeable {
|
||||
private static final Log log = LogFactory.getLog(ReadCSV.class);
|
||||
private final Configuration conf;
|
||||
private final BufferedWriter writer;
|
||||
private final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||
private InputStream excelFile;
|
||||
|
||||
public static void main(final String[] args) throws Exception {
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||
IOUtils
|
||||
.toString(
|
||||
ReadCSV.class
|
||||
.getResourceAsStream(
|
||||
"/eu/dnetlib/dhp/actionmanager/project/parameters.json")));
|
||||
|
||||
parser.parseArgument(args);
|
||||
|
||||
final String fileURL = parser.get("fileURL");
|
||||
final String hdfsPath = parser.get("hdfsPath");
|
||||
final String hdfsNameNode = parser.get("hdfsNameNode");
|
||||
final String classForName = parser.get("classForName");
|
||||
|
||||
try (final ReadExcel readExcel = new ReadExcel(hdfsPath, hdfsNameNode, fileURL)) {
|
||||
|
||||
log.info("Getting Excel file...");
|
||||
readExcel.execute(classForName);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
public void execute(final String classForName) throws Exception {
|
||||
EXCELParser excelParser = new EXCELParser();
|
||||
excelParser
|
||||
.parse(excelFile, classForName)
|
||||
.stream()
|
||||
.forEach(p -> write(p));
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
writer.close();
|
||||
}
|
||||
|
||||
public ReadExcel(
|
||||
final String hdfsPath,
|
||||
final String hdfsNameNode,
|
||||
final String fileURL)
|
||||
throws Exception {
|
||||
this.conf = new Configuration();
|
||||
this.conf.set("fs.defaultFS", hdfsNameNode);
|
||||
HttpConnector httpConnector = new HttpConnector();
|
||||
FileSystem fileSystem = FileSystem.get(this.conf);
|
||||
Path hdfsWritePath = new Path(hdfsPath);
|
||||
FSDataOutputStream fsDataOutputStream = null;
|
||||
if (fileSystem.exists(hdfsWritePath)) {
|
||||
fileSystem.delete(hdfsWritePath, false);
|
||||
}
|
||||
fsDataOutputStream = fileSystem.create(hdfsWritePath);
|
||||
|
||||
this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8));
|
||||
this.excelFile = httpConnector.getInputSourceAsStream(fileURL);
|
||||
;
|
||||
}
|
||||
|
||||
protected void write(final Object p) {
|
||||
try {
|
||||
writer.write(OBJECT_MAPPER.writeValueAsString(p));
|
||||
writer.newLine();
|
||||
} catch (final Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -17,6 +17,12 @@
|
|||
"paramDescription": "the URL from where to get the programme file",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "tp",
|
||||
"paramLongName": "topicPath",
|
||||
"paramDescription": "the URL from where to get the topic file",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "o",
|
||||
"paramLongName": "outputPath",
|
||||
|
|
|
@ -31,6 +31,10 @@
|
|||
<name>spark2SqlQueryExecutionListeners</name>
|
||||
<value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.launcher.mapreduce.user.classpath.first</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkExecutorNumber</name>
|
||||
<value>4</value>
|
||||
|
|
|
@ -10,6 +10,10 @@
|
|||
<description>the url where to get the programme file</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>topicFileURL</name>
|
||||
<description>the url where to get the topic file</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>outputPath</name>
|
||||
<description>path where to store the action set</description>
|
||||
|
@ -33,11 +37,11 @@
|
|||
|
||||
<action name="get_project_file">
|
||||
<java>
|
||||
<main-class>eu.dnetlib.dhp.actionmanager.project.csvutils.ReadCSV</main-class>
|
||||
<main-class>eu.dnetlib.dhp.actionmanager.project.utils.ReadCSV</main-class>
|
||||
<arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
|
||||
<arg>--fileURL</arg><arg>${projectFileURL}</arg>
|
||||
<arg>--hdfsPath</arg><arg>${workingDir}/projects</arg>
|
||||
<arg>--classForName</arg><arg>eu.dnetlib.dhp.actionmanager.project.csvutils.CSVProject</arg>
|
||||
<arg>--classForName</arg><arg>eu.dnetlib.dhp.actionmanager.project.utils.CSVProject</arg>
|
||||
</java>
|
||||
<ok to="get_programme_file"/>
|
||||
<error to="Kill"/>
|
||||
|
@ -45,11 +49,23 @@
|
|||
|
||||
<action name="get_programme_file">
|
||||
<java>
|
||||
<main-class>eu.dnetlib.dhp.actionmanager.project.csvutils.ReadCSV</main-class>
|
||||
<main-class>eu.dnetlib.dhp.actionmanager.project.utils.ReadCSV</main-class>
|
||||
<arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
|
||||
<arg>--fileURL</arg><arg>${programmeFileURL}</arg>
|
||||
<arg>--hdfsPath</arg><arg>${workingDir}/programme</arg>
|
||||
<arg>--classForName</arg><arg>eu.dnetlib.dhp.actionmanager.project.csvutils.CSVProgramme</arg>
|
||||
<arg>--classForName</arg><arg>eu.dnetlib.dhp.actionmanager.project.utils.CSVProgramme</arg>
|
||||
</java>
|
||||
<ok to="get_topic_file"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="get_topic_file">
|
||||
<java>
|
||||
<main-class>eu.dnetlib.dhp.actionmanager.project.utils.ReadExcel</main-class>
|
||||
<arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
|
||||
<arg>--fileURL</arg><arg>${topicFileURL}</arg>
|
||||
<arg>--hdfsPath</arg><arg>${workingDir}/topic</arg>
|
||||
<arg>--classForName</arg><arg>eu.dnetlib.dhp.actionmanager.project.utils.EXCELTopic</arg>
|
||||
</java>
|
||||
<ok to="read_projects"/>
|
||||
<error to="Kill"/>
|
||||
|
@ -136,6 +152,7 @@
|
|||
</spark-opts>
|
||||
<arg>--projectPath</arg><arg>${workingDir}/preparedProjects</arg>
|
||||
<arg>--programmePath</arg><arg>${workingDir}/preparedProgramme</arg>
|
||||
<arg>--topicPath</arg><arg>${workingDir}/topic</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}</arg>
|
||||
</spark>
|
||||
<ok to="End"/>
|
||||
|
|
|
@ -1,27 +1,16 @@
|
|||
|
||||
package eu.dnetlib.dhp.actionmanager.project;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.junit.jupiter.api.BeforeAll;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import eu.dnetlib.dhp.actionmanager.project.csvutils.CSVParser;
|
||||
import eu.dnetlib.dhp.actionmanager.project.utils.CSVParser;
|
||||
|
||||
public class CSVParserTest {
|
||||
|
||||
private static Path workingDir;
|
||||
|
||||
@BeforeAll
|
||||
public static void beforeAll() throws IOException {
|
||||
workingDir = Files.createTempDirectory(CSVParserTest.class.getSimpleName());
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void readProgrammeTest() throws Exception {
|
||||
|
||||
|
@ -33,9 +22,10 @@ public class CSVParserTest {
|
|||
|
||||
CSVParser csvParser = new CSVParser();
|
||||
|
||||
List<Object> pl = csvParser.parse(programmecsv, "eu.dnetlib.dhp.actionmanager.project.csvutils.CSVProgramme");
|
||||
List<Object> pl = csvParser.parse(programmecsv, "eu.dnetlib.dhp.actionmanager.project.utils.CSVProgramme");
|
||||
|
||||
System.out.println(pl.size());
|
||||
Assertions.assertEquals(24, pl.size());
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,44 @@
|
|||
|
||||
package eu.dnetlib.dhp.actionmanager.project;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.BeforeAll;
|
||||
import org.junit.jupiter.api.Disabled;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import eu.dnetlib.dhp.actionmanager.project.httpconnector.CollectorServiceException;
|
||||
import eu.dnetlib.dhp.actionmanager.project.httpconnector.HttpConnector;
|
||||
import eu.dnetlib.dhp.actionmanager.project.utils.EXCELParser;
|
||||
|
||||
@Disabled
|
||||
public class EXCELParserTest {
|
||||
|
||||
private static Path workingDir;
|
||||
private HttpConnector httpConnector = new HttpConnector();
|
||||
private static final String URL = "http://cordis.europa.eu/data/reference/cordisref-H2020topics.xlsx";
|
||||
|
||||
@BeforeAll
|
||||
public static void beforeAll() throws IOException {
|
||||
workingDir = Files.createTempDirectory(CSVParserTest.class.getSimpleName());
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test1() throws CollectorServiceException, IOException, InvalidFormatException, ClassNotFoundException,
|
||||
IllegalAccessException, InstantiationException {
|
||||
|
||||
EXCELParser excelParser = new EXCELParser();
|
||||
|
||||
List<Object> pl = excelParser
|
||||
.parse(httpConnector.getInputSourceAsStream(URL), "eu.dnetlib.dhp.actionmanager.project.utils.ExcelTopic");
|
||||
|
||||
Assertions.assertEquals(3837, pl.size());
|
||||
|
||||
}
|
||||
}
|
|
@ -21,29 +21,29 @@ import org.slf4j.LoggerFactory;
|
|||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.actionmanager.project.csvutils.CSVProgramme;
|
||||
import eu.dnetlib.dhp.actionmanager.project.utils.CSVProgramme;
|
||||
|
||||
public class PrepareProgrammeTest {
|
||||
public class PrepareH2020ProgrammeTest {
|
||||
|
||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||
|
||||
private static final ClassLoader cl = eu.dnetlib.dhp.actionmanager.project.PrepareProgrammeTest.class
|
||||
private static final ClassLoader cl = PrepareH2020ProgrammeTest.class
|
||||
.getClassLoader();
|
||||
|
||||
private static SparkSession spark;
|
||||
|
||||
private static Path workingDir;
|
||||
private static final Logger log = LoggerFactory
|
||||
.getLogger(eu.dnetlib.dhp.actionmanager.project.PrepareProgrammeTest.class);
|
||||
.getLogger(PrepareH2020ProgrammeTest.class);
|
||||
|
||||
@BeforeAll
|
||||
public static void beforeAll() throws IOException {
|
||||
workingDir = Files
|
||||
.createTempDirectory(eu.dnetlib.dhp.actionmanager.project.PrepareProgrammeTest.class.getSimpleName());
|
||||
.createTempDirectory(PrepareH2020ProgrammeTest.class.getSimpleName());
|
||||
log.info("using work dir {}", workingDir);
|
||||
|
||||
SparkConf conf = new SparkConf();
|
||||
conf.setAppName(eu.dnetlib.dhp.actionmanager.project.PrepareProgrammeTest.class.getSimpleName());
|
||||
conf.setAppName(PrepareH2020ProgrammeTest.class.getSimpleName());
|
||||
|
||||
conf.setMaster("local[*]");
|
||||
conf.set("spark.driver.host", "localhost");
|
||||
|
@ -54,7 +54,7 @@ public class PrepareProgrammeTest {
|
|||
|
||||
spark = SparkSession
|
||||
.builder()
|
||||
.appName(PrepareProgrammeTest.class.getSimpleName())
|
||||
.appName(PrepareH2020ProgrammeTest.class.getSimpleName())
|
||||
.config(conf)
|
||||
.getOrCreate();
|
||||
}
|
||||
|
@ -88,7 +88,62 @@ public class PrepareProgrammeTest {
|
|||
|
||||
Dataset<CSVProgramme> verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(CSVProgramme.class));
|
||||
|
||||
Assertions.assertEquals(0, verificationDataset.filter("shortTitle =''").count());
|
||||
Assertions.assertEquals(0, verificationDataset.filter("title =''").count());
|
||||
|
||||
Assertions.assertEquals(0, verificationDataset.filter("classification = ''").count());
|
||||
|
||||
// tmp.foreach(csvProgramme -> System.out.println(OBJECT_MAPPER.writeValueAsString(csvProgramme)));
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
"Societal challenges | Smart, Green And Integrated Transport | CLEANSKY2 | IADP Fast Rotorcraft",
|
||||
verificationDataset
|
||||
.filter("code = 'H2020-EU.3.4.5.3.'")
|
||||
.select("classification")
|
||||
.collectAsList()
|
||||
.get(0)
|
||||
.getString(0));
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
"Euratom | Indirect actions | European Fusion Development Agreement",
|
||||
verificationDataset
|
||||
.filter("code = 'H2020-Euratom-1.9.'")
|
||||
.select("classification")
|
||||
.collectAsList()
|
||||
.get(0)
|
||||
.getString(0));
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
"Industrial leadership | Leadership in enabling and industrial technologies | Advanced manufacturing and processing | New sustainable business models",
|
||||
verificationDataset
|
||||
.filter("code = 'H2020-EU.2.1.5.4.'")
|
||||
.select("classification")
|
||||
.collectAsList()
|
||||
.get(0)
|
||||
.getString(0));
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
"Excellent science | Future and Emerging Technologies (FET) | FET Open",
|
||||
verificationDataset
|
||||
.filter("code = 'H2020-EU.1.2.1.'")
|
||||
.select("classification")
|
||||
.collectAsList()
|
||||
.get(0)
|
||||
.getString(0));
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
"Industrial leadership | Leadership in enabling and industrial technologies | Biotechnology",
|
||||
verificationDataset
|
||||
.filter("code = 'H2020-EU.2.1.4.'")
|
||||
.select("classification")
|
||||
.collectAsList()
|
||||
.get(0)
|
||||
.getString(0));
|
||||
|
||||
}
|
||||
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue