forked from D-Net/dnet-hadoop
Compare commits
90 Commits
main
...
cleanup_ac
Author | SHA1 | Date |
---|---|---|
Claudio Atzori | bb12d0b4df | |
Claudio Atzori | 7d3292551b | |
Claudio Atzori | c7634c55c7 | |
Michele De Bonis | a10e8d9f05 | |
Claudio Atzori | 14539f9c8b | |
Claudio Atzori | 1bc8c5d173 | |
Claudio Atzori | 1ccf01cdb8 | |
Claudio Atzori | b79cb155ba | |
Claudio Atzori | 33a02c5b9e | |
Claudio Atzori | 1182bca9eb | |
Claudio Atzori | 1c30eacac2 | |
Claudio Atzori | 6055212f77 | |
Claudio Atzori | 0031cf849e | |
Serafeim Chatzopoulos | 9f6e16a03c | |
Lampros Smyrnaios | 66cd28f70a | |
Lampros Smyrnaios | c6b1ab2a18 | |
Miriam Baglioni | d35edac212 | |
Miriam Baglioni | 6421f8fece | |
Miriam Baglioni | ac270f795b | |
Lampros Smyrnaios | 236aed8954 | |
Claudio Atzori | dd541f8cf5 | |
Lampros Smyrnaios | ff335578ea | |
Lampros Smyrnaios | 285416c74e | |
Lampros Smyrnaios | 3095047e5e | |
Antonis Lempesis | 0456f1b788 | |
Antonis Lempesis | 38636942c7 | |
Lampros Smyrnaios | d942a1101b | |
Giambattista Bloisi | 9bf2bda1c6 | |
Giambattista Bloisi | d90cb099b8 | |
Giambattista Bloisi | 4f2a61e10f | |
Claudio Atzori | 11fe3a4fe0 | |
Claudio Atzori | a8d68c9d29 | |
Miriam Baglioni | 8fe934810f | |
Miriam Baglioni | 9da006e98c | |
Giambattista Bloisi | 85c1eae7e0 | |
Claudio Atzori | b0eba210c0 | |
Claudio Atzori | 3776327a8c | |
Claudio Atzori | 0139f23d66 | |
Michele Artini | c726572418 | |
Claudio Atzori | ec79405cc9 | |
Miriam Baglioni | 1477406ecc | |
Claudio Atzori | 92c3abd5a4 | |
Claudio Atzori | ce2364743a | |
Claudio Atzori | f70dc76b61 | |
Claudio Atzori | 73bd1938a5 | |
Claudio Atzori | da5c1e73a4 | |
Claudio Atzori | a02f3f0d2b | |
Alessia Bardi | eadfd8d71d | |
Alessia Bardi | 05ee783c07 | |
Alessia Bardi | fe9fb59c90 | |
Claudio Atzori | c272c4ad68 | |
Alessia Bardi | c5f4da16a4 | |
Alessia | 1b165a14a0 | |
Michele Artini | e996787be2 | |
Claudio Atzori | 62716141c5 | |
Miriam Baglioni | 5d85b70e1f | |
Giambattista Bloisi | 73316d8c83 | |
Miriam Baglioni | 75d5ddb999 | |
Miriam Baglioni | 87c9c61b41 | |
Miriam Baglioni | b55fed09f8 | |
Claudio Atzori | 107d958b89 | |
Claudio Atzori | 3a7a6ecc32 | |
Claudio Atzori | 1af4224d3d | |
Claudio Atzori | 0d5bdb2db0 | |
Claudio Atzori | 66548e6a83 | |
Giambattista Bloisi | 1b2357e10a | |
Sandro La Bruzzo | f1fe363b19 | |
Sandro La Bruzzo | 66c1ffc866 | |
Sandro La Bruzzo | e8a61d5dd5 | |
Sandro La Bruzzo | ca9414b737 | |
Sandro La Bruzzo | 103e2652b3 | |
Sandro La Bruzzo | a87f9ea643 | |
Sandro La Bruzzo | 6efab4d88e | |
Sandro La Bruzzo | db358ad0d2 | |
Sandro La Bruzzo | 26bf8e763a | |
Sandro La Bruzzo | a860c57bbc | |
Sandro La Bruzzo | 0646d0d064 | |
Sandro La Bruzzo | 133ead1e3e | |
Sandro La Bruzzo | 052c6aac9d | |
Sandro La Bruzzo | 9cd3bc0f10 | |
Sandro La Bruzzo | 0d628cd62b | |
Sandro La Bruzzo | 073f320c6a | |
Sandro La Bruzzo | b84ad0c06e | |
Sandro La Bruzzo | 8dd9cf84e2 | |
Sandro La Bruzzo | 342cb6189b | |
Giambattista Bloisi | 613ec5ffce | |
Sandro La Bruzzo | 52495f2cd2 | |
Sandro La Bruzzo | 8c3e9a09d3 | |
Giambattista Bloisi | 2fa78f6071 | |
Giambattista Bloisi | 326c9dc08c |
|
@ -27,3 +27,4 @@ spark-warehouse
|
||||||
/**/.factorypath
|
/**/.factorypath
|
||||||
/**/.scalafmt.conf
|
/**/.scalafmt.conf
|
||||||
/.java-version
|
/.java-version
|
||||||
|
/dhp-shade-package/dependency-reduced-pom.xml
|
||||||
|
|
|
@ -80,7 +80,15 @@ class WritePredefinedProjectPropertiesTest {
|
||||||
mojo.outputFile = testFolder;
|
mojo.outputFile = testFolder;
|
||||||
|
|
||||||
// execute
|
// execute
|
||||||
Assertions.assertThrows(MojoExecutionException.class, () -> mojo.execute());
|
try {
|
||||||
|
mojo.execute();
|
||||||
|
Assertions.assertTrue(false); // not reached
|
||||||
|
} catch (Exception e) {
|
||||||
|
Assertions
|
||||||
|
.assertTrue(
|
||||||
|
MojoExecutionException.class.isAssignableFrom(e.getClass()) ||
|
||||||
|
IllegalArgumentException.class.isAssignableFrom(e.getClass()));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
|
@ -70,10 +70,7 @@
|
||||||
<groupId>com.ibm.icu</groupId>
|
<groupId>com.ibm.icu</groupId>
|
||||||
<artifactId>icu4j</artifactId>
|
<artifactId>icu4j</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.hadoop</groupId>
|
|
||||||
<artifactId>hadoop-common</artifactId>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.github.sisyphsu</groupId>
|
<groupId>com.github.sisyphsu</groupId>
|
||||||
<artifactId>dateparser</artifactId>
|
<artifactId>dateparser</artifactId>
|
||||||
|
@ -163,7 +160,7 @@
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>eu.dnetlib.dhp</groupId>
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
<artifactId>${dhp-schemas.artifact}</artifactId>
|
<artifactId>dhp-schemas</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
|
@ -172,4 +169,23 @@
|
||||||
</dependency>
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
|
<!-- dependencies required on JDK9+ because J2EE has been removed -->
|
||||||
|
<profiles>
|
||||||
|
<profile>
|
||||||
|
<id>spark-34</id>
|
||||||
|
<dependencies>
|
||||||
|
<dependency>
|
||||||
|
<groupId>javax.xml.bind</groupId>
|
||||||
|
<artifactId>jaxb-api</artifactId>
|
||||||
|
<version>2.2.11</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.sun.xml.ws</groupId>
|
||||||
|
<artifactId>jaxws-ri</artifactId>
|
||||||
|
<version>2.3.3</version>
|
||||||
|
<type>pom</type>
|
||||||
|
</dependency>
|
||||||
|
</dependencies>
|
||||||
|
</profile>
|
||||||
|
</profiles>
|
||||||
</project>
|
</project>
|
||||||
|
|
|
@ -7,12 +7,12 @@ import java.sql.*;
|
||||||
import java.util.function.Consumer;
|
import java.util.function.Consumer;
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.slf4j.Logger;
|
import org.apache.commons.logging.Log;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
|
||||||
public class DbClient implements Closeable {
|
public class DbClient implements Closeable {
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(DbClient.class);
|
private static final Log log = LogFactory.getLog(DbClient.class);
|
||||||
|
|
||||||
private final Connection connection;
|
private final Connection connection;
|
||||||
|
|
||||||
|
@ -37,8 +37,6 @@ public class DbClient implements Closeable {
|
||||||
try (final Statement stmt = connection.createStatement()) {
|
try (final Statement stmt = connection.createStatement()) {
|
||||||
stmt.setFetchSize(100);
|
stmt.setFetchSize(100);
|
||||||
|
|
||||||
log.info("running SQL:\n\n{}\n\n", sql);
|
|
||||||
|
|
||||||
try (final ResultSet rs = stmt.executeQuery(sql)) {
|
try (final ResultSet rs = stmt.executeQuery(sql)) {
|
||||||
while (rs.next()) {
|
while (rs.next()) {
|
||||||
consumer.accept(rs);
|
consumer.accept(rs);
|
||||||
|
|
|
@ -38,7 +38,7 @@ public class PacePerson {
|
||||||
PacePerson.class
|
PacePerson.class
|
||||||
.getResourceAsStream(
|
.getResourceAsStream(
|
||||||
"/eu/dnetlib/dhp/common/name_particles.txt")));
|
"/eu/dnetlib/dhp/common/name_particles.txt")));
|
||||||
} catch (IOException e) {
|
} catch (Exception e) {
|
||||||
throw new RuntimeException(e);
|
throw new RuntimeException(e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,53 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.common.api;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
|
||||||
|
import okhttp3.MediaType;
|
||||||
|
import okhttp3.RequestBody;
|
||||||
|
import okhttp3.internal.Util;
|
||||||
|
import okio.BufferedSink;
|
||||||
|
import okio.Okio;
|
||||||
|
import okio.Source;
|
||||||
|
|
||||||
|
public class InputStreamRequestBody extends RequestBody {
|
||||||
|
|
||||||
|
private final InputStream inputStream;
|
||||||
|
private final MediaType mediaType;
|
||||||
|
private final long lenght;
|
||||||
|
|
||||||
|
public static RequestBody create(final MediaType mediaType, final InputStream inputStream, final long len) {
|
||||||
|
|
||||||
|
return new InputStreamRequestBody(inputStream, mediaType, len);
|
||||||
|
}
|
||||||
|
|
||||||
|
private InputStreamRequestBody(InputStream inputStream, MediaType mediaType, long len) {
|
||||||
|
this.inputStream = inputStream;
|
||||||
|
this.mediaType = mediaType;
|
||||||
|
this.lenght = len;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public MediaType contentType() {
|
||||||
|
return mediaType;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long contentLength() {
|
||||||
|
|
||||||
|
return lenght;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void writeTo(BufferedSink sink) throws IOException {
|
||||||
|
Source source = null;
|
||||||
|
try {
|
||||||
|
source = Okio.source(inputStream);
|
||||||
|
sink.writeAll(source);
|
||||||
|
} finally {
|
||||||
|
Util.closeQuietly(source);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,8 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.common.api;
|
||||||
|
|
||||||
|
public class MissingConceptDoiException extends Throwable {
|
||||||
|
public MissingConceptDoiException(String message) {
|
||||||
|
super(message);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,363 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.common.api;
|
||||||
|
|
||||||
|
import java.io.*;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.HttpURLConnection;
|
||||||
|
import java.net.URL;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
|
import org.apache.http.HttpHeaders;
|
||||||
|
import org.apache.http.entity.ContentType;
|
||||||
|
import org.jetbrains.annotations.NotNull;
|
||||||
|
|
||||||
|
import com.google.gson.Gson;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.common.api.zenodo.ZenodoModel;
|
||||||
|
import eu.dnetlib.dhp.common.api.zenodo.ZenodoModelList;
|
||||||
|
import okhttp3.*;
|
||||||
|
|
||||||
|
public class ZenodoAPIClient implements Serializable {
|
||||||
|
|
||||||
|
String urlString;
|
||||||
|
String bucket;
|
||||||
|
|
||||||
|
String deposition_id;
|
||||||
|
String access_token;
|
||||||
|
|
||||||
|
public static final MediaType MEDIA_TYPE_JSON = MediaType.parse("application/json; charset=utf-8");
|
||||||
|
|
||||||
|
private static final MediaType MEDIA_TYPE_ZIP = MediaType.parse("application/zip");
|
||||||
|
|
||||||
|
public String getUrlString() {
|
||||||
|
return urlString;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setUrlString(String urlString) {
|
||||||
|
this.urlString = urlString;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getBucket() {
|
||||||
|
return bucket;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setBucket(String bucket) {
|
||||||
|
this.bucket = bucket;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setDeposition_id(String deposition_id) {
|
||||||
|
this.deposition_id = deposition_id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ZenodoAPIClient(String urlString, String access_token) {
|
||||||
|
|
||||||
|
this.urlString = urlString;
|
||||||
|
this.access_token = access_token;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Brand new deposition in Zenodo. It sets the deposition_id and the bucket where to store the files to upload
|
||||||
|
*
|
||||||
|
* @return response code
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public int newDeposition() throws IOException {
|
||||||
|
String json = "{}";
|
||||||
|
|
||||||
|
URL url = new URL(urlString);
|
||||||
|
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
|
||||||
|
conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString());
|
||||||
|
conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
|
||||||
|
conn.setRequestMethod("POST");
|
||||||
|
conn.setDoOutput(true);
|
||||||
|
try (OutputStream os = conn.getOutputStream()) {
|
||||||
|
byte[] input = json.getBytes("utf-8");
|
||||||
|
os.write(input, 0, input.length);
|
||||||
|
}
|
||||||
|
|
||||||
|
String body = getBody(conn);
|
||||||
|
|
||||||
|
int responseCode = conn.getResponseCode();
|
||||||
|
conn.disconnect();
|
||||||
|
|
||||||
|
if (!checkOKStatus(responseCode))
|
||||||
|
throw new IOException("Unexpected code " + responseCode + body);
|
||||||
|
|
||||||
|
ZenodoModel newSubmission = new Gson().fromJson(body, ZenodoModel.class);
|
||||||
|
this.bucket = newSubmission.getLinks().getBucket();
|
||||||
|
this.deposition_id = newSubmission.getId();
|
||||||
|
|
||||||
|
return responseCode;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Upload files in Zenodo.
|
||||||
|
*
|
||||||
|
* @param is the inputStream for the file to upload
|
||||||
|
* @param file_name the name of the file as it will appear on Zenodo
|
||||||
|
* @return the response code
|
||||||
|
*/
|
||||||
|
public int uploadIS(InputStream is, String file_name) throws IOException {
|
||||||
|
|
||||||
|
URL url = new URL(bucket + "/" + file_name);
|
||||||
|
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
|
||||||
|
conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, "application/zip");
|
||||||
|
conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
|
||||||
|
conn.setDoOutput(true);
|
||||||
|
conn.setRequestMethod("PUT");
|
||||||
|
|
||||||
|
byte[] buf = new byte[8192];
|
||||||
|
int length;
|
||||||
|
try (OutputStream os = conn.getOutputStream()) {
|
||||||
|
while ((length = is.read(buf)) != -1) {
|
||||||
|
os.write(buf, 0, length);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
int responseCode = conn.getResponseCode();
|
||||||
|
if (!checkOKStatus(responseCode)) {
|
||||||
|
throw new IOException("Unexpected code " + responseCode + getBody(conn));
|
||||||
|
}
|
||||||
|
|
||||||
|
return responseCode;
|
||||||
|
}
|
||||||
|
|
||||||
|
@NotNull
|
||||||
|
private String getBody(HttpURLConnection conn) throws IOException {
|
||||||
|
String body = "{}";
|
||||||
|
try (BufferedReader br = new BufferedReader(
|
||||||
|
new InputStreamReader(conn.getInputStream(), "utf-8"))) {
|
||||||
|
StringBuilder response = new StringBuilder();
|
||||||
|
String responseLine = null;
|
||||||
|
while ((responseLine = br.readLine()) != null) {
|
||||||
|
response.append(responseLine.trim());
|
||||||
|
}
|
||||||
|
|
||||||
|
body = response.toString();
|
||||||
|
|
||||||
|
}
|
||||||
|
return body;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Associates metadata information to the current deposition
|
||||||
|
*
|
||||||
|
* @param metadata the metadata
|
||||||
|
* @return response code
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public int sendMretadata(String metadata) throws IOException {
|
||||||
|
|
||||||
|
URL url = new URL(urlString + "/" + deposition_id);
|
||||||
|
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
|
||||||
|
conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString());
|
||||||
|
conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
|
||||||
|
conn.setDoOutput(true);
|
||||||
|
conn.setRequestMethod("PUT");
|
||||||
|
|
||||||
|
try (OutputStream os = conn.getOutputStream()) {
|
||||||
|
byte[] input = metadata.getBytes("utf-8");
|
||||||
|
os.write(input, 0, input.length);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
final int responseCode = conn.getResponseCode();
|
||||||
|
conn.disconnect();
|
||||||
|
if (!checkOKStatus(responseCode))
|
||||||
|
throw new IOException("Unexpected code " + responseCode + getBody(conn));
|
||||||
|
|
||||||
|
return responseCode;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean checkOKStatus(int responseCode) {
|
||||||
|
|
||||||
|
if (HttpURLConnection.HTTP_OK != responseCode ||
|
||||||
|
HttpURLConnection.HTTP_CREATED != responseCode)
|
||||||
|
return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* To publish the current deposition. It works for both new deposition or new version of an old deposition
|
||||||
|
*
|
||||||
|
* @return response code
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
@Deprecated
|
||||||
|
public int publish() throws IOException {
|
||||||
|
|
||||||
|
String json = "{}";
|
||||||
|
|
||||||
|
OkHttpClient httpClient = new OkHttpClient.Builder().connectTimeout(600, TimeUnit.SECONDS).build();
|
||||||
|
|
||||||
|
RequestBody body = RequestBody.create(json, MEDIA_TYPE_JSON);
|
||||||
|
|
||||||
|
Request request = new Request.Builder()
|
||||||
|
.url(urlString + "/" + deposition_id + "/actions/publish")
|
||||||
|
.addHeader("Authorization", "Bearer " + access_token)
|
||||||
|
.post(body)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
try (Response response = httpClient.newCall(request).execute()) {
|
||||||
|
|
||||||
|
if (!response.isSuccessful())
|
||||||
|
throw new IOException("Unexpected code " + response + response.body().string());
|
||||||
|
|
||||||
|
return response.code();
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* To create a new version of an already published deposition. It sets the deposition_id and the bucket to be used
|
||||||
|
* for the new version.
|
||||||
|
*
|
||||||
|
* @param concept_rec_id the concept record id of the deposition for which to create a new version. It is the last
|
||||||
|
* part of the url for the DOI Zenodo suggests to use to cite all versions: DOI: 10.xxx/zenodo.656930
|
||||||
|
* concept_rec_id = 656930
|
||||||
|
* @return response code
|
||||||
|
*/
|
||||||
|
public int newVersion(String concept_rec_id) throws IOException, MissingConceptDoiException {
|
||||||
|
setDepositionId(concept_rec_id, 1);
|
||||||
|
String json = "{}";
|
||||||
|
|
||||||
|
URL url = new URL(urlString + "/" + deposition_id + "/actions/newversion");
|
||||||
|
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
|
||||||
|
|
||||||
|
conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
|
||||||
|
conn.setDoOutput(true);
|
||||||
|
conn.setRequestMethod("POST");
|
||||||
|
|
||||||
|
try (OutputStream os = conn.getOutputStream()) {
|
||||||
|
byte[] input = json.getBytes("utf-8");
|
||||||
|
os.write(input, 0, input.length);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
String body = getBody(conn);
|
||||||
|
|
||||||
|
int responseCode = conn.getResponseCode();
|
||||||
|
|
||||||
|
conn.disconnect();
|
||||||
|
if (!checkOKStatus(responseCode))
|
||||||
|
throw new IOException("Unexpected code " + responseCode + body);
|
||||||
|
|
||||||
|
ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class);
|
||||||
|
String latest_draft = zenodoModel.getLinks().getLatest_draft();
|
||||||
|
deposition_id = latest_draft.substring(latest_draft.lastIndexOf("/") + 1);
|
||||||
|
bucket = getBucket(latest_draft);
|
||||||
|
|
||||||
|
return responseCode;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* To finish uploading a version or new deposition not published
|
||||||
|
* It sets the deposition_id and the bucket to be used
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* @param deposition_id the deposition id of the not yet published upload
|
||||||
|
* concept_rec_id = 656930
|
||||||
|
* @return response code
|
||||||
|
* @throws IOException
|
||||||
|
* @throws MissingConceptDoiException
|
||||||
|
*/
|
||||||
|
public int uploadOpenDeposition(String deposition_id) throws IOException, MissingConceptDoiException {
|
||||||
|
|
||||||
|
this.deposition_id = deposition_id;
|
||||||
|
|
||||||
|
String json = "{}";
|
||||||
|
|
||||||
|
URL url = new URL(urlString + "/" + deposition_id);
|
||||||
|
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
|
||||||
|
|
||||||
|
conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
|
||||||
|
conn.setRequestMethod("POST");
|
||||||
|
conn.setDoOutput(true);
|
||||||
|
try (OutputStream os = conn.getOutputStream()) {
|
||||||
|
byte[] input = json.getBytes("utf-8");
|
||||||
|
os.write(input, 0, input.length);
|
||||||
|
}
|
||||||
|
|
||||||
|
String body = getBody(conn);
|
||||||
|
|
||||||
|
int responseCode = conn.getResponseCode();
|
||||||
|
conn.disconnect();
|
||||||
|
|
||||||
|
if (!checkOKStatus(responseCode))
|
||||||
|
throw new IOException("Unexpected code " + responseCode + body);
|
||||||
|
|
||||||
|
ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class);
|
||||||
|
bucket = zenodoModel.getLinks().getBucket();
|
||||||
|
|
||||||
|
return responseCode;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private void setDepositionId(String concept_rec_id, Integer page) throws IOException, MissingConceptDoiException {
|
||||||
|
|
||||||
|
ZenodoModelList zenodoModelList = new Gson()
|
||||||
|
.fromJson(getPrevDepositions(String.valueOf(page)), ZenodoModelList.class);
|
||||||
|
|
||||||
|
for (ZenodoModel zm : zenodoModelList) {
|
||||||
|
if (zm.getConceptrecid().equals(concept_rec_id)) {
|
||||||
|
deposition_id = zm.getId();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (zenodoModelList.size() == 0)
|
||||||
|
throw new MissingConceptDoiException(
|
||||||
|
"The concept record id specified was missing in the list of depositions");
|
||||||
|
setDepositionId(concept_rec_id, page + 1);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private String getPrevDepositions(String page) throws IOException {
|
||||||
|
|
||||||
|
HttpUrl.Builder urlBuilder = HttpUrl.parse(urlString).newBuilder();
|
||||||
|
urlBuilder.addQueryParameter("page", page);
|
||||||
|
|
||||||
|
URL url = new URL(urlBuilder.build().toString());
|
||||||
|
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
|
||||||
|
conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString());
|
||||||
|
conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
|
||||||
|
conn.setDoOutput(true);
|
||||||
|
conn.setRequestMethod("GET");
|
||||||
|
|
||||||
|
String body = getBody(conn);
|
||||||
|
|
||||||
|
int responseCode = conn.getResponseCode();
|
||||||
|
|
||||||
|
conn.disconnect();
|
||||||
|
if (!checkOKStatus(responseCode))
|
||||||
|
throw new IOException("Unexpected code " + responseCode + body);
|
||||||
|
|
||||||
|
return body;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private String getBucket(String inputUurl) throws IOException {
|
||||||
|
|
||||||
|
URL url = new URL(inputUurl);
|
||||||
|
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
|
||||||
|
conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString());
|
||||||
|
conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
|
||||||
|
conn.setDoOutput(true);
|
||||||
|
conn.setRequestMethod("GET");
|
||||||
|
|
||||||
|
String body = getBody(conn);
|
||||||
|
|
||||||
|
int responseCode = conn.getResponseCode();
|
||||||
|
|
||||||
|
conn.disconnect();
|
||||||
|
if (!checkOKStatus(responseCode))
|
||||||
|
throw new IOException("Unexpected code " + responseCode + body);
|
||||||
|
|
||||||
|
ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class);
|
||||||
|
|
||||||
|
return zenodoModel.getLinks().getBucket();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,14 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.common.api.zenodo;
|
||||||
|
|
||||||
|
public class Community {
|
||||||
|
private String identifier;
|
||||||
|
|
||||||
|
public String getIdentifier() {
|
||||||
|
return identifier;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setIdentifier(String identifier) {
|
||||||
|
this.identifier = identifier;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,47 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.common.api.zenodo;
|
||||||
|
|
||||||
|
public class Creator {
|
||||||
|
private String affiliation;
|
||||||
|
private String name;
|
||||||
|
private String orcid;
|
||||||
|
|
||||||
|
public String getAffiliation() {
|
||||||
|
return affiliation;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setAffiliation(String affiliation) {
|
||||||
|
this.affiliation = affiliation;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setName(String name) {
|
||||||
|
this.name = name;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getOrcid() {
|
||||||
|
return orcid;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setOrcid(String orcid) {
|
||||||
|
this.orcid = orcid;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Creator newInstance(String name, String affiliation, String orcid) {
|
||||||
|
Creator c = new Creator();
|
||||||
|
if (name != null) {
|
||||||
|
c.name = name;
|
||||||
|
}
|
||||||
|
if (affiliation != null) {
|
||||||
|
c.affiliation = affiliation;
|
||||||
|
}
|
||||||
|
if (orcid != null) {
|
||||||
|
c.orcid = orcid;
|
||||||
|
}
|
||||||
|
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,44 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.common.api.zenodo;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
public class File implements Serializable {
|
||||||
|
private String checksum;
|
||||||
|
private String filename;
|
||||||
|
private long filesize;
|
||||||
|
private String id;
|
||||||
|
|
||||||
|
public String getChecksum() {
|
||||||
|
return checksum;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setChecksum(String checksum) {
|
||||||
|
this.checksum = checksum;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getFilename() {
|
||||||
|
return filename;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setFilename(String filename) {
|
||||||
|
this.filename = filename;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getFilesize() {
|
||||||
|
return filesize;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setFilesize(long filesize) {
|
||||||
|
this.filesize = filesize;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getId() {
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setId(String id) {
|
||||||
|
this.id = id;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,23 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.common.api.zenodo;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
public class Grant implements Serializable {
|
||||||
|
private String id;
|
||||||
|
|
||||||
|
public String getId() {
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setId(String id) {
|
||||||
|
this.id = id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Grant newInstance(String id) {
|
||||||
|
Grant g = new Grant();
|
||||||
|
g.id = id;
|
||||||
|
|
||||||
|
return g;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,92 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.common.api.zenodo;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
public class Links implements Serializable {
|
||||||
|
|
||||||
|
private String bucket;
|
||||||
|
|
||||||
|
private String discard;
|
||||||
|
|
||||||
|
private String edit;
|
||||||
|
private String files;
|
||||||
|
private String html;
|
||||||
|
private String latest_draft;
|
||||||
|
private String latest_draft_html;
|
||||||
|
private String publish;
|
||||||
|
|
||||||
|
private String self;
|
||||||
|
|
||||||
|
public String getBucket() {
|
||||||
|
return bucket;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setBucket(String bucket) {
|
||||||
|
this.bucket = bucket;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getDiscard() {
|
||||||
|
return discard;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setDiscard(String discard) {
|
||||||
|
this.discard = discard;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getEdit() {
|
||||||
|
return edit;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setEdit(String edit) {
|
||||||
|
this.edit = edit;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getFiles() {
|
||||||
|
return files;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setFiles(String files) {
|
||||||
|
this.files = files;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getHtml() {
|
||||||
|
return html;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setHtml(String html) {
|
||||||
|
this.html = html;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getLatest_draft() {
|
||||||
|
return latest_draft;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setLatest_draft(String latest_draft) {
|
||||||
|
this.latest_draft = latest_draft;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getLatest_draft_html() {
|
||||||
|
return latest_draft_html;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setLatest_draft_html(String latest_draft_html) {
|
||||||
|
this.latest_draft_html = latest_draft_html;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getPublish() {
|
||||||
|
return publish;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setPublish(String publish) {
|
||||||
|
this.publish = publish;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getSelf() {
|
||||||
|
return self;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setSelf(String self) {
|
||||||
|
this.self = self;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,153 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.common.api.zenodo;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class Metadata implements Serializable {
|
||||||
|
|
||||||
|
private String access_right;
|
||||||
|
private List<Community> communities;
|
||||||
|
private List<Creator> creators;
|
||||||
|
private String description;
|
||||||
|
private String doi;
|
||||||
|
private List<Grant> grants;
|
||||||
|
private List<String> keywords;
|
||||||
|
private String language;
|
||||||
|
private String license;
|
||||||
|
private PrereserveDoi prereserve_doi;
|
||||||
|
private String publication_date;
|
||||||
|
private List<String> references;
|
||||||
|
private List<RelatedIdentifier> related_identifiers;
|
||||||
|
private String title;
|
||||||
|
private String upload_type;
|
||||||
|
private String version;
|
||||||
|
|
||||||
|
public String getUpload_type() {
|
||||||
|
return upload_type;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setUpload_type(String upload_type) {
|
||||||
|
this.upload_type = upload_type;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getVersion() {
|
||||||
|
return version;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setVersion(String version) {
|
||||||
|
this.version = version;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getAccess_right() {
|
||||||
|
return access_right;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setAccess_right(String access_right) {
|
||||||
|
this.access_right = access_right;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<Community> getCommunities() {
|
||||||
|
return communities;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setCommunities(List<Community> communities) {
|
||||||
|
this.communities = communities;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<Creator> getCreators() {
|
||||||
|
return creators;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setCreators(List<Creator> creators) {
|
||||||
|
this.creators = creators;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getDescription() {
|
||||||
|
return description;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setDescription(String description) {
|
||||||
|
this.description = description;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getDoi() {
|
||||||
|
return doi;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setDoi(String doi) {
|
||||||
|
this.doi = doi;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<Grant> getGrants() {
|
||||||
|
return grants;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setGrants(List<Grant> grants) {
|
||||||
|
this.grants = grants;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getKeywords() {
|
||||||
|
return keywords;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setKeywords(List<String> keywords) {
|
||||||
|
this.keywords = keywords;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getLanguage() {
|
||||||
|
return language;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setLanguage(String language) {
|
||||||
|
this.language = language;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getLicense() {
|
||||||
|
return license;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setLicense(String license) {
|
||||||
|
this.license = license;
|
||||||
|
}
|
||||||
|
|
||||||
|
public PrereserveDoi getPrereserve_doi() {
|
||||||
|
return prereserve_doi;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setPrereserve_doi(PrereserveDoi prereserve_doi) {
|
||||||
|
this.prereserve_doi = prereserve_doi;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getPublication_date() {
|
||||||
|
return publication_date;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setPublication_date(String publication_date) {
|
||||||
|
this.publication_date = publication_date;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getReferences() {
|
||||||
|
return references;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setReferences(List<String> references) {
|
||||||
|
this.references = references;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<RelatedIdentifier> getRelated_identifiers() {
|
||||||
|
return related_identifiers;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setRelated_identifiers(List<RelatedIdentifier> related_identifiers) {
|
||||||
|
this.related_identifiers = related_identifiers;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getTitle() {
|
||||||
|
return title;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setTitle(String title) {
|
||||||
|
this.title = title;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,25 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.common.api.zenodo;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
public class PrereserveDoi implements Serializable {
|
||||||
|
private String doi;
|
||||||
|
private String recid;
|
||||||
|
|
||||||
|
public String getDoi() {
|
||||||
|
return doi;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setDoi(String doi) {
|
||||||
|
this.doi = doi;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getRecid() {
|
||||||
|
return recid;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setRecid(String recid) {
|
||||||
|
this.recid = recid;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,43 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.common.api.zenodo;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
public class RelatedIdentifier implements Serializable {
|
||||||
|
private String identifier;
|
||||||
|
private String relation;
|
||||||
|
private String resource_type;
|
||||||
|
private String scheme;
|
||||||
|
|
||||||
|
public String getIdentifier() {
|
||||||
|
return identifier;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setIdentifier(String identifier) {
|
||||||
|
this.identifier = identifier;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getRelation() {
|
||||||
|
return relation;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setRelation(String relation) {
|
||||||
|
this.relation = relation;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getResource_type() {
|
||||||
|
return resource_type;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setResource_type(String resource_type) {
|
||||||
|
this.resource_type = resource_type;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getScheme() {
|
||||||
|
return scheme;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setScheme(String scheme) {
|
||||||
|
this.scheme = scheme;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,118 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.common.api.zenodo;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class ZenodoModel implements Serializable {
|
||||||
|
|
||||||
|
private String conceptrecid;
|
||||||
|
private String created;
|
||||||
|
|
||||||
|
private List<File> files;
|
||||||
|
private String id;
|
||||||
|
private Links links;
|
||||||
|
private Metadata metadata;
|
||||||
|
private String modified;
|
||||||
|
private String owner;
|
||||||
|
private String record_id;
|
||||||
|
private String state;
|
||||||
|
private boolean submitted;
|
||||||
|
private String title;
|
||||||
|
|
||||||
|
public String getConceptrecid() {
|
||||||
|
return conceptrecid;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setConceptrecid(String conceptrecid) {
|
||||||
|
this.conceptrecid = conceptrecid;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getCreated() {
|
||||||
|
return created;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setCreated(String created) {
|
||||||
|
this.created = created;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<File> getFiles() {
|
||||||
|
return files;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setFiles(List<File> files) {
|
||||||
|
this.files = files;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getId() {
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setId(String id) {
|
||||||
|
this.id = id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Links getLinks() {
|
||||||
|
return links;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setLinks(Links links) {
|
||||||
|
this.links = links;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Metadata getMetadata() {
|
||||||
|
return metadata;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setMetadata(Metadata metadata) {
|
||||||
|
this.metadata = metadata;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getModified() {
|
||||||
|
return modified;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setModified(String modified) {
|
||||||
|
this.modified = modified;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getOwner() {
|
||||||
|
return owner;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setOwner(String owner) {
|
||||||
|
this.owner = owner;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getRecord_id() {
|
||||||
|
return record_id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setRecord_id(String record_id) {
|
||||||
|
this.record_id = record_id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getState() {
|
||||||
|
return state;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setState(String state) {
|
||||||
|
this.state = state;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isSubmitted() {
|
||||||
|
return submitted;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setSubmitted(boolean submitted) {
|
||||||
|
this.submitted = submitted;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getTitle() {
|
||||||
|
return title;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setTitle(String title) {
|
||||||
|
this.title = title;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,7 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.common.api.zenodo;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
|
||||||
|
public class ZenodoModelList extends ArrayList<ZenodoModel> {
|
||||||
|
}
|
|
@ -12,9 +12,7 @@ import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.commons.lang3.math.NumberUtils;
|
import org.apache.commons.lang3.math.NumberUtils;
|
||||||
import org.apache.commons.lang3.time.DateUtils;
|
|
||||||
import org.apache.http.HttpHeaders;
|
import org.apache.http.HttpHeaders;
|
||||||
import org.joda.time.Instant;
|
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
|
|
@ -119,7 +119,7 @@ public class GraphCleaningFunctions extends CleaningFunctions {
|
||||||
.getContext()
|
.getContext()
|
||||||
.stream()
|
.stream()
|
||||||
.filter(c -> !StringUtils.startsWith(c.getId().toLowerCase(), contextId))
|
.filter(c -> !StringUtils.startsWith(c.getId().toLowerCase(), contextId))
|
||||||
.collect(Collectors.toCollection(ArrayList::new)));
|
.collect(Collectors.toList()));
|
||||||
}
|
}
|
||||||
return (T) res;
|
return (T) res;
|
||||||
} else {
|
} else {
|
||||||
|
@ -1003,41 +1003,4 @@ public class GraphCleaningFunctions extends CleaningFunctions {
|
||||||
.orElse(null);
|
.orElse(null);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Implements bad and ugly things that we should get rid of ASAP.
|
|
||||||
*
|
|
||||||
* @param value
|
|
||||||
* @return
|
|
||||||
* @param <T>
|
|
||||||
*/
|
|
||||||
public static <T extends Oaf> T dedicatedUglyHacks(T value) {
|
|
||||||
if (value instanceof OafEntity) {
|
|
||||||
if (value instanceof Result) {
|
|
||||||
final Result r = (Result) value;
|
|
||||||
|
|
||||||
// Fix for AMS Acta
|
|
||||||
Optional
|
|
||||||
.ofNullable(r.getInstance())
|
|
||||||
.map(
|
|
||||||
instance -> instance
|
|
||||||
.stream()
|
|
||||||
.filter(
|
|
||||||
i -> Optional
|
|
||||||
.ofNullable(i.getHostedby())
|
|
||||||
.map(KeyValue::getKey)
|
|
||||||
.map(dsId -> dsId.equals("10|re3data_____::4cc76bed7ce2fb95fd8e7a2dfde16016"))
|
|
||||||
.orElse(false)))
|
|
||||||
.ifPresent(instance -> instance.forEach(i -> {
|
|
||||||
if (Optional
|
|
||||||
.ofNullable(i.getPid())
|
|
||||||
.map(pid -> pid.stream().noneMatch(p -> p.getValue().startsWith("10.6092/unibo/amsacta")))
|
|
||||||
.orElse(false)) {
|
|
||||||
i.setHostedby(UNKNOWN_REPOSITORY);
|
|
||||||
}
|
|
||||||
}));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return value;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -328,7 +328,7 @@ public class MergeUtils {
|
||||||
final T merged = mergeOafFields(original, enrich, trust);
|
final T merged = mergeOafFields(original, enrich, trust);
|
||||||
|
|
||||||
merged.setOriginalId(unionDistinctListOfString(merged.getOriginalId(), enrich.getOriginalId()));
|
merged.setOriginalId(unionDistinctListOfString(merged.getOriginalId(), enrich.getOriginalId()));
|
||||||
merged.setPid(mergeLists(merged.getPid(), enrich.getPid(), trust, MergeUtils::spKeyExtractor, (p1, p2) -> p1));
|
merged.setPid(unionDistinctLists(merged.getPid(), enrich.getPid(), trust));
|
||||||
merged.setDateofcollection(LocalDateTime.now().toString());
|
merged.setDateofcollection(LocalDateTime.now().toString());
|
||||||
merged
|
merged
|
||||||
.setDateoftransformation(
|
.setDateoftransformation(
|
||||||
|
@ -432,10 +432,7 @@ public class MergeUtils {
|
||||||
|
|
||||||
// merge datainfo for same context id
|
// merge datainfo for same context id
|
||||||
merge.setContext(mergeLists(merge.getContext(), enrich.getContext(), trust, Context::getId, (r, l) -> {
|
merge.setContext(mergeLists(merge.getContext(), enrich.getContext(), trust, Context::getId, (r, l) -> {
|
||||||
ArrayList<DataInfo> di = new ArrayList<>();
|
r.getDataInfo().addAll(l.getDataInfo());
|
||||||
di.addAll(r.getDataInfo());
|
|
||||||
di.addAll(l.getDataInfo());
|
|
||||||
r.setDataInfo(di);
|
|
||||||
return r;
|
return r;
|
||||||
}));
|
}));
|
||||||
|
|
||||||
|
|
|
@ -154,5 +154,13 @@
|
||||||
"unknown":{
|
"unknown":{
|
||||||
"original":"Unknown",
|
"original":"Unknown",
|
||||||
"inverse":"Unknown"
|
"inverse":"Unknown"
|
||||||
|
},
|
||||||
|
"isamongtopnsimilardocuments": {
|
||||||
|
"original": "IsAmongTopNSimilarDocuments",
|
||||||
|
"inverse": "HasAmongTopNSimilarDocuments"
|
||||||
|
},
|
||||||
|
"hasamongtopnsimilardocuments": {
|
||||||
|
"original": "HasAmongTopNSimilarDocuments",
|
||||||
|
"inverse": "IsAmongTopNSimilarDocuments"
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -65,12 +65,13 @@ abstract class AbstractScalaApplication(
|
||||||
val conf: SparkConf = new SparkConf()
|
val conf: SparkConf = new SparkConf()
|
||||||
val master = parser.get("master")
|
val master = parser.get("master")
|
||||||
log.info(s"Creating Spark session: Master: $master")
|
log.info(s"Creating Spark session: Master: $master")
|
||||||
SparkSession
|
val b = SparkSession
|
||||||
.builder()
|
.builder()
|
||||||
.config(conf)
|
.config(conf)
|
||||||
.appName(getClass.getSimpleName)
|
.appName(getClass.getSimpleName)
|
||||||
.master(master)
|
if (master != null)
|
||||||
.getOrCreate()
|
b.master(master)
|
||||||
|
b.getOrCreate()
|
||||||
}
|
}
|
||||||
|
|
||||||
def reportTotalSize(targetPath: String, outputBasePath: String): Unit = {
|
def reportTotalSize(targetPath: String, outputBasePath: String): Unit = {
|
||||||
|
|
|
@ -65,7 +65,11 @@ object ScholixUtils extends Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
def generateScholixResourceFromResult(r: Result): ScholixResource = {
|
def generateScholixResourceFromResult(r: Result): ScholixResource = {
|
||||||
generateScholixResourceFromSummary(ScholixUtils.resultToSummary(r))
|
val sum = ScholixUtils.resultToSummary(r)
|
||||||
|
if (sum != null)
|
||||||
|
generateScholixResourceFromSummary(ScholixUtils.resultToSummary(r))
|
||||||
|
else
|
||||||
|
null
|
||||||
}
|
}
|
||||||
|
|
||||||
val statsAggregator: Aggregator[(String, String, Long), RelatedEntities, RelatedEntities] =
|
val statsAggregator: Aggregator[(String, String, Long), RelatedEntities, RelatedEntities] =
|
||||||
|
@ -153,6 +157,14 @@ object ScholixUtils extends Serializable {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def invRel(rel: String): String = {
|
||||||
|
val semanticRelation = relations.getOrElse(rel.toLowerCase, null)
|
||||||
|
if (semanticRelation != null)
|
||||||
|
semanticRelation.inverse
|
||||||
|
else
|
||||||
|
null
|
||||||
|
}
|
||||||
|
|
||||||
def extractCollectedFrom(summary: ScholixResource): List[ScholixEntityId] = {
|
def extractCollectedFrom(summary: ScholixResource): List[ScholixEntityId] = {
|
||||||
if (summary.getCollectedFrom != null && !summary.getCollectedFrom.isEmpty) {
|
if (summary.getCollectedFrom != null && !summary.getCollectedFrom.isEmpty) {
|
||||||
val l: List[ScholixEntityId] = summary.getCollectedFrom.asScala.map { d =>
|
val l: List[ScholixEntityId] = summary.getCollectedFrom.asScala.map { d =>
|
||||||
|
@ -377,10 +389,7 @@ object ScholixUtils extends Serializable {
|
||||||
if (persistentIdentifiers.isEmpty)
|
if (persistentIdentifiers.isEmpty)
|
||||||
return null
|
return null
|
||||||
s.setLocalIdentifier(persistentIdentifiers.asJava)
|
s.setLocalIdentifier(persistentIdentifiers.asJava)
|
||||||
if (r.isInstanceOf[Publication])
|
// s.setTypology(r.getResulttype.getClassid)
|
||||||
s.setTypology(Typology.publication)
|
|
||||||
else
|
|
||||||
s.setTypology(Typology.dataset)
|
|
||||||
|
|
||||||
s.setSubType(r.getInstance().get(0).getInstancetype.getClassname)
|
s.setSubType(r.getInstance().get(0).getInstancetype.getClassname)
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,109 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.common.api;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileInputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.junit.jupiter.api.Assertions;
|
||||||
|
import org.junit.jupiter.api.Disabled;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
@Disabled
|
||||||
|
class ZenodoAPIClientTest {
|
||||||
|
|
||||||
|
private final String URL_STRING = "https://sandbox.zenodo.org/api/deposit/depositions";
|
||||||
|
private final String ACCESS_TOKEN = "";
|
||||||
|
|
||||||
|
private final String CONCEPT_REC_ID = "657113";
|
||||||
|
|
||||||
|
private final String depositionId = "674915";
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testUploadOldDeposition() throws IOException, MissingConceptDoiException {
|
||||||
|
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
|
||||||
|
ACCESS_TOKEN);
|
||||||
|
Assertions.assertEquals(200, client.uploadOpenDeposition(depositionId));
|
||||||
|
|
||||||
|
File file = new File(getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/common/api/COVID-19.json.gz")
|
||||||
|
.getPath());
|
||||||
|
|
||||||
|
InputStream is = new FileInputStream(file);
|
||||||
|
|
||||||
|
Assertions.assertEquals(200, client.uploadIS(is, "COVID-19.json.gz"));
|
||||||
|
|
||||||
|
String metadata = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/common/api/metadata.json"));
|
||||||
|
|
||||||
|
Assertions.assertEquals(200, client.sendMretadata(metadata));
|
||||||
|
|
||||||
|
Assertions.assertEquals(202, client.publish());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testNewDeposition() throws IOException {
|
||||||
|
|
||||||
|
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
|
||||||
|
ACCESS_TOKEN);
|
||||||
|
Assertions.assertEquals(201, client.newDeposition());
|
||||||
|
|
||||||
|
File file = new File(getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/common/api/COVID-19.json.gz")
|
||||||
|
.getPath());
|
||||||
|
|
||||||
|
InputStream is = new FileInputStream(file);
|
||||||
|
|
||||||
|
Assertions.assertEquals(200, client.uploadIS(is, "COVID-19.json.gz"));
|
||||||
|
|
||||||
|
String metadata = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/common/api/metadata.json"));
|
||||||
|
|
||||||
|
Assertions.assertEquals(200, client.sendMretadata(metadata));
|
||||||
|
|
||||||
|
Assertions.assertEquals(202, client.publish());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testNewVersionNewName() throws IOException, MissingConceptDoiException {
|
||||||
|
|
||||||
|
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
|
||||||
|
ACCESS_TOKEN);
|
||||||
|
|
||||||
|
Assertions.assertEquals(201, client.newVersion(CONCEPT_REC_ID));
|
||||||
|
|
||||||
|
File file = new File(getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/common/api/newVersion")
|
||||||
|
.getPath());
|
||||||
|
|
||||||
|
InputStream is = new FileInputStream(file);
|
||||||
|
|
||||||
|
Assertions.assertEquals(200, client.uploadIS(is, "newVersion_deposition"));
|
||||||
|
|
||||||
|
Assertions.assertEquals(202, client.publish());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testNewVersionOldName() throws IOException, MissingConceptDoiException {
|
||||||
|
|
||||||
|
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
|
||||||
|
ACCESS_TOKEN);
|
||||||
|
|
||||||
|
Assertions.assertEquals(201, client.newVersion(CONCEPT_REC_ID));
|
||||||
|
|
||||||
|
File file = new File(getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/common/api/newVersion2")
|
||||||
|
.getPath());
|
||||||
|
|
||||||
|
InputStream is = new FileInputStream(file);
|
||||||
|
|
||||||
|
Assertions.assertEquals(200, client.uploadIS(is, "newVersion_deposition"));
|
||||||
|
|
||||||
|
Assertions.assertEquals(202, client.publish());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -24,7 +24,7 @@
|
||||||
<executions>
|
<executions>
|
||||||
<execution>
|
<execution>
|
||||||
<id>scala-compile-first</id>
|
<id>scala-compile-first</id>
|
||||||
<phase>initialize</phase>
|
<phase>process-resources</phase>
|
||||||
<goals>
|
<goals>
|
||||||
<goal>add-source</goal>
|
<goal>add-source</goal>
|
||||||
<goal>compile</goal>
|
<goal>compile</goal>
|
||||||
|
@ -59,14 +59,6 @@
|
||||||
<groupId>edu.cmu</groupId>
|
<groupId>edu.cmu</groupId>
|
||||||
<artifactId>secondstring</artifactId>
|
<artifactId>secondstring</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
|
||||||
<groupId>com.google.guava</groupId>
|
|
||||||
<artifactId>guava</artifactId>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>com.google.code.gson</groupId>
|
|
||||||
<artifactId>gson</artifactId>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.commons</groupId>
|
<groupId>org.apache.commons</groupId>
|
||||||
<artifactId>commons-lang3</artifactId>
|
<artifactId>commons-lang3</artifactId>
|
||||||
|
@ -91,10 +83,6 @@
|
||||||
<groupId>com.fasterxml.jackson.core</groupId>
|
<groupId>com.fasterxml.jackson.core</groupId>
|
||||||
<artifactId>jackson-databind</artifactId>
|
<artifactId>jackson-databind</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.commons</groupId>
|
|
||||||
<artifactId>commons-math3</artifactId>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.jayway.jsonpath</groupId>
|
<groupId>com.jayway.jsonpath</groupId>
|
||||||
<artifactId>json-path</artifactId>
|
<artifactId>json-path</artifactId>
|
||||||
|
@ -113,4 +101,90 @@
|
||||||
</dependency>
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
|
<profiles>
|
||||||
|
<profile>
|
||||||
|
<id>spark-24</id>
|
||||||
|
<activation>
|
||||||
|
<activeByDefault>true</activeByDefault>
|
||||||
|
</activation>
|
||||||
|
|
||||||
|
<build>
|
||||||
|
<plugins>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.codehaus.mojo</groupId>
|
||||||
|
<artifactId>build-helper-maven-plugin</artifactId>
|
||||||
|
<version>3.4.0</version>
|
||||||
|
<executions>
|
||||||
|
<execution>
|
||||||
|
<phase>generate-sources</phase>
|
||||||
|
<goals>
|
||||||
|
<goal>add-source</goal>
|
||||||
|
</goals>
|
||||||
|
<configuration>
|
||||||
|
<sources>
|
||||||
|
<source>src/main/spark-2</source>
|
||||||
|
</sources>
|
||||||
|
</configuration>
|
||||||
|
</execution>
|
||||||
|
</executions>
|
||||||
|
</plugin>
|
||||||
|
</plugins>
|
||||||
|
</build>
|
||||||
|
</profile>
|
||||||
|
|
||||||
|
<profile>
|
||||||
|
<id>spark-34</id>
|
||||||
|
|
||||||
|
<build>
|
||||||
|
<plugins>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.codehaus.mojo</groupId>
|
||||||
|
<artifactId>build-helper-maven-plugin</artifactId>
|
||||||
|
<version>3.4.0</version>
|
||||||
|
<executions>
|
||||||
|
<execution>
|
||||||
|
<phase>generate-sources</phase>
|
||||||
|
<goals>
|
||||||
|
<goal>add-source</goal>
|
||||||
|
</goals>
|
||||||
|
<configuration>
|
||||||
|
<sources>
|
||||||
|
<source>src/main/spark-2</source>
|
||||||
|
</sources>
|
||||||
|
</configuration>
|
||||||
|
</execution>
|
||||||
|
</executions>
|
||||||
|
</plugin>
|
||||||
|
</plugins>
|
||||||
|
</build>
|
||||||
|
</profile>
|
||||||
|
|
||||||
|
<profile>
|
||||||
|
<id>spark-35</id>
|
||||||
|
|
||||||
|
<build>
|
||||||
|
<plugins>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.codehaus.mojo</groupId>
|
||||||
|
<artifactId>build-helper-maven-plugin</artifactId>
|
||||||
|
<version>3.4.0</version>
|
||||||
|
<executions>
|
||||||
|
<execution>
|
||||||
|
<phase>generate-sources</phase>
|
||||||
|
<goals>
|
||||||
|
<goal>add-source</goal>
|
||||||
|
</goals>
|
||||||
|
<configuration>
|
||||||
|
<sources>
|
||||||
|
<source>src/main/spark-35</source>
|
||||||
|
</sources>
|
||||||
|
</configuration>
|
||||||
|
</execution>
|
||||||
|
</executions>
|
||||||
|
</plugin>
|
||||||
|
</plugins>
|
||||||
|
</build>
|
||||||
|
</profile>
|
||||||
|
</profiles>
|
||||||
|
|
||||||
</project>
|
</project>
|
||||||
|
|
|
@ -3,7 +3,7 @@ package eu.dnetlib.pace.model
|
||||||
import com.jayway.jsonpath.{Configuration, JsonPath}
|
import com.jayway.jsonpath.{Configuration, JsonPath}
|
||||||
import eu.dnetlib.pace.common.AbstractPaceFunctions
|
import eu.dnetlib.pace.common.AbstractPaceFunctions
|
||||||
import eu.dnetlib.pace.config.{DedupConfig, Type}
|
import eu.dnetlib.pace.config.{DedupConfig, Type}
|
||||||
import eu.dnetlib.pace.util.MapDocumentUtil
|
import eu.dnetlib.pace.util.{MapDocumentUtil, SparkCompatUtils}
|
||||||
import org.apache.commons.lang3.StringUtils
|
import org.apache.commons.lang3.StringUtils
|
||||||
import org.apache.spark.sql.catalyst.encoders.RowEncoder
|
import org.apache.spark.sql.catalyst.encoders.RowEncoder
|
||||||
import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema
|
import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema
|
||||||
|
@ -52,7 +52,7 @@ case class SparkModel(conf: DedupConfig) {
|
||||||
val orderingFieldPosition: Int = schema.fieldIndex(orderingFieldName)
|
val orderingFieldPosition: Int = schema.fieldIndex(orderingFieldName)
|
||||||
|
|
||||||
val parseJsonDataset: (Dataset[String] => Dataset[Row]) = df => {
|
val parseJsonDataset: (Dataset[String] => Dataset[Row]) = df => {
|
||||||
df.map(r => rowFromJson(r))(RowEncoder(schema))
|
df.map(r => rowFromJson(r))(SparkCompatUtils.encoderFor(schema))
|
||||||
}
|
}
|
||||||
|
|
||||||
def rowFromJson(json: String): Row = {
|
def rowFromJson(json: String): Row = {
|
||||||
|
|
|
@ -1,4 +1,3 @@
|
||||||
|
|
||||||
package eu.dnetlib.pace.tree;
|
package eu.dnetlib.pace.tree;
|
||||||
|
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
@ -12,37 +11,37 @@ import eu.dnetlib.pace.tree.support.ComparatorClass;
|
||||||
@ComparatorClass("countryMatch")
|
@ComparatorClass("countryMatch")
|
||||||
public class CountryMatch extends AbstractStringComparator {
|
public class CountryMatch extends AbstractStringComparator {
|
||||||
|
|
||||||
public CountryMatch(Map<String, String> params) {
|
public CountryMatch(Map<String, String> params) {
|
||||||
super(params, new com.wcohen.ss.JaroWinkler());
|
super(params, new com.wcohen.ss.JaroWinkler());
|
||||||
}
|
}
|
||||||
|
|
||||||
public CountryMatch(final double weight) {
|
public CountryMatch(final double weight) {
|
||||||
super(weight, new com.wcohen.ss.JaroWinkler());
|
super(weight, new com.wcohen.ss.JaroWinkler());
|
||||||
}
|
}
|
||||||
|
|
||||||
protected CountryMatch(final double weight, final AbstractStringDistance ssalgo) {
|
protected CountryMatch(final double weight, final AbstractStringDistance ssalgo) {
|
||||||
super(weight, ssalgo);
|
super(weight, ssalgo);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public double distance(final String a, final String b, final Config conf) {
|
public double distance(final String a, final String b, final Config conf) {
|
||||||
if (a.isEmpty() || b.isEmpty()) {
|
if (a.isEmpty() || b.isEmpty()) {
|
||||||
return -1.0; // return -1 if a field is missing
|
return -1.0; // return -1 if a field is missing
|
||||||
}
|
}
|
||||||
if (a.equalsIgnoreCase("unknown") || b.equalsIgnoreCase("unknown")) {
|
if (a.equalsIgnoreCase("unknown") || b.equalsIgnoreCase("unknown")) {
|
||||||
return -1.0; // return -1 if a country is UNKNOWN
|
return -1.0; // return -1 if a country is UNKNOWN
|
||||||
}
|
}
|
||||||
|
|
||||||
return a.equals(b) ? 1.0 : 0;
|
return a.equals(b) ? 1.0 : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public double getWeight() {
|
public double getWeight() {
|
||||||
return super.weight;
|
return super.weight;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected double normalize(final double d) {
|
protected double normalize(final double d) {
|
||||||
return d;
|
return d;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,12 @@
|
||||||
|
package eu.dnetlib.pace.util
|
||||||
|
|
||||||
|
import org.apache.spark.sql.Row
|
||||||
|
import org.apache.spark.sql.catalyst.encoders.{ExpressionEncoder, RowEncoder}
|
||||||
|
import org.apache.spark.sql.types.StructType
|
||||||
|
|
||||||
|
object SparkCompatUtils {
|
||||||
|
|
||||||
|
def encoderFor(schema: StructType): ExpressionEncoder[Row] = {
|
||||||
|
RowEncoder(schema)
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,12 @@
|
||||||
|
package eu.dnetlib.pace.util
|
||||||
|
|
||||||
|
import org.apache.spark.sql.Row
|
||||||
|
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
|
||||||
|
import org.apache.spark.sql.types.StructType
|
||||||
|
|
||||||
|
object SparkCompatUtils {
|
||||||
|
|
||||||
|
def encoderFor(schema: StructType): ExpressionEncoder[Row] = {
|
||||||
|
ExpressionEncoder(schema)
|
||||||
|
}
|
||||||
|
}
|
|
@ -11,6 +11,7 @@ import org.junit.jupiter.api.Disabled;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
import eu.dnetlib.pace.model.Person;
|
import eu.dnetlib.pace.model.Person;
|
||||||
|
import jdk.nashorn.internal.ir.annotations.Ignore;
|
||||||
|
|
||||||
public class UtilTest {
|
public class UtilTest {
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,169 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
|
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||||
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
<parent>
|
||||||
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
|
<artifactId>dhp</artifactId>
|
||||||
|
<version>1.2.5-SNAPSHOT</version>
|
||||||
|
<relativePath>../pom.xml</relativePath>
|
||||||
|
|
||||||
|
</parent>
|
||||||
|
|
||||||
|
<artifactId>dhp-shade-package</artifactId>
|
||||||
|
<packaging>jar</packaging>
|
||||||
|
|
||||||
|
<distributionManagement>
|
||||||
|
<site>
|
||||||
|
<id>DHPSite</id>
|
||||||
|
<url>${dhp.site.stage.path}/dhp-common</url>
|
||||||
|
</site>
|
||||||
|
</distributionManagement>
|
||||||
|
|
||||||
|
<description>This module create a jar of all module dependencies</description>
|
||||||
|
|
||||||
|
|
||||||
|
<dependencies>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
|
<artifactId>dhp-actionmanager</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<!-- <dependency>-->
|
||||||
|
<!-- <groupId>eu.dnetlib.dhp</groupId>-->
|
||||||
|
<!-- <artifactId>dhp-aggregation</artifactId>-->
|
||||||
|
<!-- <version>${project.version}</version>-->
|
||||||
|
<!-- </dependency>-->
|
||||||
|
<!-- <dependency>-->
|
||||||
|
<!-- <groupId>eu.dnetlib.dhp</groupId>-->
|
||||||
|
<!-- <artifactId>dhp-blacklist</artifactId>-->
|
||||||
|
<!-- <version>${project.version}</version>-->
|
||||||
|
<!-- </dependency>-->
|
||||||
|
<!-- <dependency>-->
|
||||||
|
<!-- <groupId>eu.dnetlib.dhp</groupId>-->
|
||||||
|
<!-- <artifactId>dhp-broker-events</artifactId>-->
|
||||||
|
<!-- <version>${project.version}</version>-->
|
||||||
|
<!-- </dependency>-->
|
||||||
|
<!-- <dependency>-->
|
||||||
|
<!-- <groupId>eu.dnetlib.dhp</groupId>-->
|
||||||
|
<!-- <artifactId>dhp-dedup-openaire</artifactId>-->
|
||||||
|
<!-- <version>${project.version}</version>-->
|
||||||
|
<!-- </dependency>-->
|
||||||
|
<!-- <dependency>-->
|
||||||
|
<!-- <groupId>eu.dnetlib.dhp</groupId>-->
|
||||||
|
<!-- <artifactId>dhp-enrichment</artifactId>-->
|
||||||
|
<!-- <version>${project.version}</version>-->
|
||||||
|
<!-- </dependency>-->
|
||||||
|
<dependency>
|
||||||
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
|
<artifactId>dhp-graph-mapper</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
|
<artifactId>dhp-graph-provision</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
|
<artifactId>dhp-impact-indicators</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
|
<artifactId>dhp-stats-actionsets</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
|
<artifactId>dhp-stats-hist-snaps</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
|
<artifactId>dhp-stats-monitor-irish</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
|
<artifactId>dhp-stats-promote</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
|
<artifactId>dhp-stats-update</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
|
<artifactId>dhp-swh</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
|
<artifactId>dhp-usage-raw-data-update</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
|
<artifactId>dhp-usage-stats-build</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
</dependency>
|
||||||
|
</dependencies>
|
||||||
|
|
||||||
|
|
||||||
|
<build>
|
||||||
|
<plugins>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
|
<artifactId>maven-shade-plugin</artifactId>
|
||||||
|
<executions>
|
||||||
|
<execution>
|
||||||
|
<phase>package</phase>
|
||||||
|
<goals>
|
||||||
|
<goal>shade</goal>
|
||||||
|
</goals>
|
||||||
|
<configuration>
|
||||||
|
<transformers>
|
||||||
|
<transformer
|
||||||
|
implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
|
||||||
|
<mainClass>eu.dnetlib.dhp.oa.dedup.SparkCreateSimRels</mainClass>
|
||||||
|
</transformer>
|
||||||
|
<!-- This is needed if you have dependencies that use Service Loader. Most Google Cloud client libraries do. -->
|
||||||
|
<transformer
|
||||||
|
implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
|
||||||
|
<transformer
|
||||||
|
implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
|
||||||
|
<resource>META-INF/cxf/bus-extensions.txt</resource>
|
||||||
|
</transformer>
|
||||||
|
</transformers>
|
||||||
|
<filters>
|
||||||
|
<filter>
|
||||||
|
<artifact>*:*</artifact>
|
||||||
|
<excludes>
|
||||||
|
<exclude>META-INF/maven/**</exclude>
|
||||||
|
<exclude>META-INF/*.SF</exclude>
|
||||||
|
<exclude>META-INF/*.DSA</exclude>
|
||||||
|
<exclude>META-INF/*.RSA</exclude>
|
||||||
|
</excludes>
|
||||||
|
</filter>
|
||||||
|
</filters>
|
||||||
|
<relocations>
|
||||||
|
<relocation>
|
||||||
|
<pattern>com</pattern>
|
||||||
|
<shadedPattern>repackaged.com.google.common</shadedPattern>
|
||||||
|
<includes>
|
||||||
|
<include>com.google.common.**</include>
|
||||||
|
</includes>
|
||||||
|
</relocation>
|
||||||
|
</relocations>
|
||||||
|
</configuration>
|
||||||
|
</execution>
|
||||||
|
</executions>
|
||||||
|
</plugin>
|
||||||
|
</plugins>
|
||||||
|
</build>
|
||||||
|
|
||||||
|
</project>
|
|
@ -50,11 +50,12 @@
|
||||||
<groupId>org.apache.hadoop</groupId>
|
<groupId>org.apache.hadoop</groupId>
|
||||||
<artifactId>hadoop-distcp</artifactId>
|
<artifactId>hadoop-distcp</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<!--
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>eu.dnetlib</groupId>
|
<groupId>eu.dnetlib</groupId>
|
||||||
<artifactId>dnet-actionmanager-api</artifactId>
|
<artifactId>dnet-actionmanager-api</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>eu.dnetlib</groupId>
|
<groupId>eu.dnetlib</groupId>
|
||||||
<artifactId>dnet-actionmanager-common</artifactId>
|
<artifactId>dnet-actionmanager-common</artifactId>
|
||||||
|
@ -93,6 +94,7 @@
|
||||||
</exclusion>
|
</exclusion>
|
||||||
</exclusions>
|
</exclusions>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
-->
|
||||||
|
|
||||||
</dependencies>
|
</dependencies>
|
||||||
</project>
|
</project>
|
||||||
|
|
|
@ -4,7 +4,6 @@ package eu.dnetlib.dhp.actionmanager;
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.NoSuchElementException;
|
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
@ -22,13 +21,14 @@ import com.google.common.base.Splitter;
|
||||||
import com.google.common.collect.Iterables;
|
import com.google.common.collect.Iterables;
|
||||||
import com.google.common.collect.Sets;
|
import com.google.common.collect.Sets;
|
||||||
|
|
||||||
import eu.dnetlib.actionmanager.rmi.ActionManagerException;
|
|
||||||
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
|
|
||||||
public class ISClient implements Serializable {
|
public class ISClient implements Serializable {
|
||||||
|
|
||||||
|
private static final long serialVersionUID = 4632443200867340872L;
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(ISClient.class);
|
private static final Logger log = LoggerFactory.getLogger(ISClient.class);
|
||||||
|
|
||||||
private static final String INPUT_ACTION_SET_ID_SEPARATOR = ",";
|
private static final String INPUT_ACTION_SET_ID_SEPARATOR = ",";
|
||||||
|
@ -65,7 +65,7 @@ public class ISClient implements Serializable {
|
||||||
.map(t -> buildDirectory(basePath, t))
|
.map(t -> buildDirectory(basePath, t))
|
||||||
.collect(Collectors.toList()))
|
.collect(Collectors.toList()))
|
||||||
.orElseThrow(() -> new IllegalStateException("empty set list"));
|
.orElseThrow(() -> new IllegalStateException("empty set list"));
|
||||||
} catch (ActionManagerException | ISLookUpException e) {
|
} catch (ISLookUpException e) {
|
||||||
throw new IllegalStateException("unable to query ActionSets info from the IS");
|
throw new IllegalStateException("unable to query ActionSets info from the IS");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -89,31 +89,18 @@ public class ISClient implements Serializable {
|
||||||
return Joiner.on("/").join(basePath, t.getMiddle(), t.getRight());
|
return Joiner.on("/").join(basePath, t.getMiddle(), t.getRight());
|
||||||
}
|
}
|
||||||
|
|
||||||
private String getBasePathHDFS(ISLookUpService isLookup) throws ActionManagerException {
|
private String getBasePathHDFS(ISLookUpService isLookup) throws ISLookUpException {
|
||||||
return queryServiceProperty(isLookup, "basePath");
|
return queryServiceProperty(isLookup, "basePath");
|
||||||
}
|
}
|
||||||
|
|
||||||
private String queryServiceProperty(ISLookUpService isLookup, final String propertyName)
|
private String queryServiceProperty(ISLookUpService isLookup, final String propertyName)
|
||||||
throws ActionManagerException {
|
throws ISLookUpException {
|
||||||
final String q = "for $x in /RESOURCE_PROFILE[.//RESOURCE_TYPE/@value='ActionManagerServiceResourceType'] return $x//SERVICE_PROPERTIES/PROPERTY[./@ key='"
|
final String q = "for $x in /RESOURCE_PROFILE[.//RESOURCE_TYPE/@value='ActionManagerServiceResourceType'] return $x//SERVICE_PROPERTIES/PROPERTY[./@ key='"
|
||||||
+ propertyName
|
+ propertyName
|
||||||
+ "']/@value/string()";
|
+ "']/@value/string()";
|
||||||
log.debug("quering for service property: {}", q);
|
log.debug("quering for service property: {}", q);
|
||||||
try {
|
|
||||||
final List<String> value = isLookup.quickSearchProfile(q);
|
final List<String> value = isLookup.quickSearchProfile(q);
|
||||||
return Iterables.getOnlyElement(value);
|
return Iterables.getOnlyElement(value);
|
||||||
} catch (ISLookUpException e) {
|
|
||||||
String msg = "Error accessing service profile, using query: " + q;
|
|
||||||
log.error(msg, e);
|
|
||||||
throw new ActionManagerException(msg, e);
|
|
||||||
} catch (NoSuchElementException e) {
|
|
||||||
String msg = "missing service property: " + propertyName;
|
|
||||||
log.error(msg, e);
|
|
||||||
throw new ActionManagerException(msg, e);
|
|
||||||
} catch (IllegalArgumentException e) {
|
|
||||||
String msg = "found more than one service property: " + propertyName;
|
|
||||||
log.error(msg, e);
|
|
||||||
throw new ActionManagerException(msg, e);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -42,9 +42,6 @@ public class Constants {
|
||||||
public static final String NULL = "NULL";
|
public static final String NULL = "NULL";
|
||||||
public static final String NA = "N/A";
|
public static final String NA = "N/A";
|
||||||
|
|
||||||
public static final String WEB_CRAWL_ID = "10|openaire____::fb98a192f6a055ba495ef414c330834b";
|
|
||||||
public static final String WEB_CRAWL_NAME = "Web Crawl";
|
|
||||||
|
|
||||||
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
private Constants() {
|
private Constants() {
|
||||||
|
|
|
@ -41,9 +41,9 @@ public class PrepareAffiliationRelations implements Serializable {
|
||||||
private static final Logger log = LoggerFactory.getLogger(PrepareAffiliationRelations.class);
|
private static final Logger log = LoggerFactory.getLogger(PrepareAffiliationRelations.class);
|
||||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
private static final String ID_PREFIX = "50|doi_________::";
|
private static final String ID_PREFIX = "50|doi_________::";
|
||||||
public static final String BIP_AFFILIATIONS_CLASSID = "result:organization:openaireinference";
|
public static final String BIP_AFFILIATIONS_CLASSID = "result:organization:bipinference";
|
||||||
public static final String BIP_AFFILIATIONS_CLASSNAME = "Affiliation relation inferred by OpenAIRE";
|
public static final String BIP_AFFILIATIONS_CLASSNAME = "Affiliation relation inferred by BIP!";
|
||||||
public static final String BIP_INFERENCE_PROVENANCE = "openaire:affiliation";
|
public static final String BIP_INFERENCE_PROVENANCE = "bip:affiliation:crossref";
|
||||||
|
|
||||||
public static <I extends Result> void main(String[] args) throws Exception {
|
public static <I extends Result> void main(String[] args) throws Exception {
|
||||||
|
|
||||||
|
@ -71,9 +71,6 @@ public class PrepareAffiliationRelations implements Serializable {
|
||||||
final String dataciteInputPath = parser.get("dataciteInputPath");
|
final String dataciteInputPath = parser.get("dataciteInputPath");
|
||||||
log.info("dataciteInputPath: {}", dataciteInputPath);
|
log.info("dataciteInputPath: {}", dataciteInputPath);
|
||||||
|
|
||||||
final String webcrawlInputPath = parser.get("webCrawlInputPath");
|
|
||||||
log.info("webcrawlInputPath: {}", webcrawlInputPath);
|
|
||||||
|
|
||||||
final String outputPath = parser.get("outputPath");
|
final String outputPath = parser.get("outputPath");
|
||||||
log.info("outputPath: {}", outputPath);
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
|
@ -105,16 +102,10 @@ public class PrepareAffiliationRelations implements Serializable {
|
||||||
JavaPairRDD<Text, Text> dataciteRelations = prepareAffiliationRelations(
|
JavaPairRDD<Text, Text> dataciteRelations = prepareAffiliationRelations(
|
||||||
spark, dataciteInputPath, collectedFromDatacite);
|
spark, dataciteInputPath, collectedFromDatacite);
|
||||||
|
|
||||||
List<KeyValue> collectedFromWebCrawl = OafMapperUtils
|
|
||||||
.listKeyValues(Constants.WEB_CRAWL_ID, Constants.WEB_CRAWL_NAME);
|
|
||||||
JavaPairRDD<Text, Text> webCrawlRelations = prepareAffiliationRelations(
|
|
||||||
spark, webcrawlInputPath, collectedFromWebCrawl);
|
|
||||||
|
|
||||||
crossrefRelations
|
crossrefRelations
|
||||||
.union(pubmedRelations)
|
.union(pubmedRelations)
|
||||||
.union(openAPCRelations)
|
.union(openAPCRelations)
|
||||||
.union(dataciteRelations)
|
.union(dataciteRelations)
|
||||||
.union(webCrawlRelations)
|
|
||||||
.saveAsHadoopFile(
|
.saveAsHadoopFile(
|
||||||
outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, BZip2Codec.class);
|
outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, BZip2Codec.class);
|
||||||
|
|
||||||
|
|
|
@ -10,6 +10,7 @@ import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.hadoop.io.Text;
|
import org.apache.hadoop.io.Text;
|
||||||
|
import org.apache.hadoop.io.compress.BZip2Codec;
|
||||||
import org.apache.hadoop.io.compress.GzipCodec;
|
import org.apache.hadoop.io.compress.GzipCodec;
|
||||||
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
|
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
|
@ -83,7 +84,7 @@ public class SparkAtomicActionScoreJob implements Serializable {
|
||||||
resultsRDD
|
resultsRDD
|
||||||
.union(projectsRDD)
|
.union(projectsRDD)
|
||||||
.saveAsHadoopFile(
|
.saveAsHadoopFile(
|
||||||
outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, GzipCodec.class);
|
outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, BZip2Codec.class);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -21,7 +21,6 @@ import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.actionmanager.Constants;
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.schema.action.AtomicAction;
|
import eu.dnetlib.dhp.schema.action.AtomicAction;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
|
@ -30,7 +29,6 @@ import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.PidCleaner;
|
import eu.dnetlib.dhp.schema.oaf.utils.PidCleaner;
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.PidType;
|
import eu.dnetlib.dhp.schema.oaf.utils.PidType;
|
||||||
import io.netty.util.Constant;
|
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -46,7 +44,8 @@ public class CreateActionSetFromWebEntries implements Serializable {
|
||||||
private static final String PMID_PREFIX = "50|pmid________::";
|
private static final String PMID_PREFIX = "50|pmid________::";
|
||||||
|
|
||||||
private static final String PMCID_PREFIX = "50|pmc_________::";
|
private static final String PMCID_PREFIX = "50|pmc_________::";
|
||||||
|
private static final String WEB_CRAWL_ID = "10|openaire____::fb98a192f6a055ba495ef414c330834b";
|
||||||
|
private static final String WEB_CRAWL_NAME = "Web Crawl";
|
||||||
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception {
|
public static void main(String[] args) throws Exception {
|
||||||
|
@ -105,8 +104,8 @@ public class CreateActionSetFromWebEntries implements Serializable {
|
||||||
final String ror = ROR_PREFIX
|
final String ror = ROR_PREFIX
|
||||||
+ IdentifierFactory.md5(PidCleaner.normalizePidValue("ROR", row.getAs("ror")));
|
+ IdentifierFactory.md5(PidCleaner.normalizePidValue("ROR", row.getAs("ror")));
|
||||||
ret.addAll(createAffiliationRelationPairDOI(row.getAs("doi"), ror));
|
ret.addAll(createAffiliationRelationPairDOI(row.getAs("doi"), ror));
|
||||||
// ret.addAll(createAffiliationRelationPairPMID(row.getAs("pmid"), ror));
|
ret.addAll(createAffiliationRelationPairPMID(row.getAs("pmid"), ror));
|
||||||
// ret.addAll(createAffiliationRelationPairPMCID(row.getAs("pmcid"), ror));
|
ret.addAll(createAffiliationRelationPairPMCID(row.getAs("pmcid"), ror));
|
||||||
|
|
||||||
return ret
|
return ret
|
||||||
.iterator();
|
.iterator();
|
||||||
|
@ -140,17 +139,11 @@ public class CreateActionSetFromWebEntries implements Serializable {
|
||||||
"institution", functions
|
"institution", functions
|
||||||
.explode(
|
.explode(
|
||||||
functions.col("institutions")))
|
functions.col("institutions")))
|
||||||
|
|
||||||
.selectExpr(
|
.selectExpr(
|
||||||
"id", "doi", "institution.ror as ror",
|
"id", "doi", "ids.pmcid as pmcid", "ids.pmid as pmid", "institution.ror as ror",
|
||||||
"institution.country_code as country_code", "publication_year")
|
"institution.country_code as country_code", "publication_year")
|
||||||
.distinct();
|
.distinct();
|
||||||
|
|
||||||
// .selectExpr(
|
|
||||||
// "id", "doi", "ids.pmcid as pmcid", "ids.pmid as pmid", "institution.ror as ror",
|
|
||||||
// "institution.country_code as country_code", "publication_year")
|
|
||||||
// .distinct();
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Dataset<Row> readBlackList(SparkSession spark, String inputPath) {
|
private static Dataset<Row> readBlackList(SparkSession spark, String inputPath) {
|
||||||
|
@ -221,7 +214,7 @@ public class CreateActionSetFromWebEntries implements Serializable {
|
||||||
ModelConstants.IS_AUTHOR_INSTITUTION_OF,
|
ModelConstants.IS_AUTHOR_INSTITUTION_OF,
|
||||||
Arrays
|
Arrays
|
||||||
.asList(
|
.asList(
|
||||||
OafMapperUtils.keyValue(Constants.WEB_CRAWL_ID, Constants.WEB_CRAWL_NAME)),
|
OafMapperUtils.keyValue(WEB_CRAWL_ID, WEB_CRAWL_NAME)),
|
||||||
OafMapperUtils
|
OafMapperUtils
|
||||||
.dataInfo(
|
.dataInfo(
|
||||||
false, null, false, false,
|
false, null, false, false,
|
||||||
|
@ -240,7 +233,7 @@ public class CreateActionSetFromWebEntries implements Serializable {
|
||||||
ModelConstants.HAS_AUTHOR_INSTITUTION,
|
ModelConstants.HAS_AUTHOR_INSTITUTION,
|
||||||
Arrays
|
Arrays
|
||||||
.asList(
|
.asList(
|
||||||
OafMapperUtils.keyValue(Constants.WEB_CRAWL_ID, Constants.WEB_CRAWL_NAME)),
|
OafMapperUtils.keyValue(WEB_CRAWL_ID, WEB_CRAWL_NAME)),
|
||||||
OafMapperUtils
|
OafMapperUtils
|
||||||
.dataInfo(
|
.dataInfo(
|
||||||
false, null, false, false,
|
false, null, false, false,
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.collection.plugin.rest;
|
package eu.dnetlib.dhp.collection.plugin.rest;
|
||||||
|
|
||||||
|
import java.util.Map;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
import java.util.Spliterator;
|
import java.util.Spliterator;
|
||||||
import java.util.Spliterators;
|
import java.util.Spliterators;
|
||||||
|
@ -9,6 +10,8 @@ import java.util.stream.StreamSupport;
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
|
import com.google.gson.Gson;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.collection.ApiDescriptor;
|
import eu.dnetlib.dhp.collection.ApiDescriptor;
|
||||||
import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
|
import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
|
||||||
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
|
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
|
||||||
|
@ -47,6 +50,9 @@ public class RestCollectorPlugin implements CollectorPlugin {
|
||||||
final String entityXpath = api.getParams().get("entityXpath");
|
final String entityXpath = api.getParams().get("entityXpath");
|
||||||
final String authMethod = api.getParams().get("authMethod");
|
final String authMethod = api.getParams().get("authMethod");
|
||||||
final String authToken = api.getParams().get("authToken");
|
final String authToken = api.getParams().get("authToken");
|
||||||
|
final String requestHeaderMap = api.getParams().get("requestHeaderMap");
|
||||||
|
Gson gson = new Gson();
|
||||||
|
Map requestHeaders = gson.fromJson(requestHeaderMap, Map.class);
|
||||||
final String resultSizeValue = Optional
|
final String resultSizeValue = Optional
|
||||||
.ofNullable(api.getParams().get("resultSizeValue"))
|
.ofNullable(api.getParams().get("resultSizeValue"))
|
||||||
.filter(StringUtils::isNotBlank)
|
.filter(StringUtils::isNotBlank)
|
||||||
|
@ -64,9 +70,6 @@ public class RestCollectorPlugin implements CollectorPlugin {
|
||||||
if (StringUtils.isBlank(resultFormatValue)) {
|
if (StringUtils.isBlank(resultFormatValue)) {
|
||||||
throw new CollectorException("Param 'resultFormatValue' is null or empty");
|
throw new CollectorException("Param 'resultFormatValue' is null or empty");
|
||||||
}
|
}
|
||||||
if (StringUtils.isBlank(queryParams)) {
|
|
||||||
throw new CollectorException("Param 'queryParams' is null or empty");
|
|
||||||
}
|
|
||||||
if (StringUtils.isBlank(entityXpath)) {
|
if (StringUtils.isBlank(entityXpath)) {
|
||||||
throw new CollectorException("Param 'entityXpath' is null or empty");
|
throw new CollectorException("Param 'entityXpath' is null or empty");
|
||||||
}
|
}
|
||||||
|
@ -92,7 +95,8 @@ public class RestCollectorPlugin implements CollectorPlugin {
|
||||||
entityXpath,
|
entityXpath,
|
||||||
authMethod,
|
authMethod,
|
||||||
authToken,
|
authToken,
|
||||||
resultOutputFormat);
|
resultOutputFormat,
|
||||||
|
requestHeaders);
|
||||||
|
|
||||||
return StreamSupport
|
return StreamSupport
|
||||||
.stream(
|
.stream(
|
||||||
|
|
|
@ -9,8 +9,11 @@ import java.net.URL;
|
||||||
import java.net.URLEncoder;
|
import java.net.URLEncoder;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
import java.util.Map;
|
||||||
import java.util.Queue;
|
import java.util.Queue;
|
||||||
import java.util.concurrent.PriorityBlockingQueue;
|
import java.util.concurrent.PriorityBlockingQueue;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
import javax.xml.transform.OutputKeys;
|
import javax.xml.transform.OutputKeys;
|
||||||
import javax.xml.transform.Transformer;
|
import javax.xml.transform.Transformer;
|
||||||
|
@ -18,22 +21,18 @@ import javax.xml.transform.TransformerConfigurationException;
|
||||||
import javax.xml.transform.TransformerFactory;
|
import javax.xml.transform.TransformerFactory;
|
||||||
import javax.xml.transform.dom.DOMSource;
|
import javax.xml.transform.dom.DOMSource;
|
||||||
import javax.xml.transform.stream.StreamResult;
|
import javax.xml.transform.stream.StreamResult;
|
||||||
import javax.xml.xpath.XPath;
|
import javax.xml.xpath.*;
|
||||||
import javax.xml.xpath.XPathConstants;
|
|
||||||
import javax.xml.xpath.XPathExpression;
|
|
||||||
import javax.xml.xpath.XPathExpressionException;
|
|
||||||
import javax.xml.xpath.XPathFactory;
|
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.apache.http.HttpHeaders;
|
|
||||||
import org.apache.http.entity.ContentType;
|
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
import org.w3c.dom.Node;
|
import org.w3c.dom.Node;
|
||||||
import org.w3c.dom.NodeList;
|
import org.w3c.dom.NodeList;
|
||||||
import org.xml.sax.InputSource;
|
import org.xml.sax.InputSource;
|
||||||
|
|
||||||
|
import com.google.common.collect.Maps;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.collection.plugin.utils.JsonUtils;
|
import eu.dnetlib.dhp.collection.plugin.utils.JsonUtils;
|
||||||
import eu.dnetlib.dhp.common.collection.CollectorException;
|
import eu.dnetlib.dhp.common.collection.CollectorException;
|
||||||
import eu.dnetlib.dhp.common.collection.HttpClientParams;
|
import eu.dnetlib.dhp.common.collection.HttpClientParams;
|
||||||
|
@ -48,20 +47,23 @@ import eu.dnetlib.dhp.common.collection.HttpClientParams;
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
public class RestIterator implements Iterator<String> {
|
public class RestIterator implements Iterator<String> {
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(RestIterator.class);
|
private static final Logger log = LoggerFactory.getLogger(RestIterator.class);
|
||||||
public static final String UTF_8 = "UTF-8";
|
public static final String UTF_8 = "UTF-8";
|
||||||
private static final int MAX_ATTEMPTS = 5;
|
private static final int MAX_ATTEMPTS = 5;
|
||||||
|
|
||||||
private final HttpClientParams clientParams;
|
private final HttpClientParams clientParams;
|
||||||
|
|
||||||
private final String BASIC = "basic";
|
private final String AUTHBASIC = "basic";
|
||||||
|
|
||||||
|
private static final String XML_HEADER = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
|
||||||
|
private static final String EMPTY_XML = XML_HEADER + "<" + JsonUtils.XML_WRAP_TAG + "></" + JsonUtils.XML_WRAP_TAG
|
||||||
|
+ ">";
|
||||||
|
|
||||||
private final String baseUrl;
|
private final String baseUrl;
|
||||||
private final String resumptionType;
|
private final String resumptionType;
|
||||||
private final String resumptionParam;
|
private final String resumptionParam;
|
||||||
private final String resultFormatValue;
|
private final String resultFormatValue;
|
||||||
private String queryParams;
|
private String queryParams = "";
|
||||||
private final int resultSizeValue;
|
private final int resultSizeValue;
|
||||||
private int resumptionInt = 0; // integer resumption token (first record to harvest)
|
private int resumptionInt = 0; // integer resumption token (first record to harvest)
|
||||||
private int resultTotal = -1;
|
private int resultTotal = -1;
|
||||||
|
@ -89,6 +91,11 @@ public class RestIterator implements Iterator<String> {
|
||||||
*/
|
*/
|
||||||
private final String resultOutputFormat;
|
private final String resultOutputFormat;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Can be used to set additional request headers, like for content negotiation
|
||||||
|
*/
|
||||||
|
private Map<String, String> requestHeaders;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* RestIterator class compatible to version 1.3.33
|
* RestIterator class compatible to version 1.3.33
|
||||||
*/
|
*/
|
||||||
|
@ -107,7 +114,8 @@ public class RestIterator implements Iterator<String> {
|
||||||
final String entityXpath,
|
final String entityXpath,
|
||||||
final String authMethod,
|
final String authMethod,
|
||||||
final String authToken,
|
final String authToken,
|
||||||
final String resultOutputFormat) {
|
final String resultOutputFormat,
|
||||||
|
final Map<String, String> requestHeaders) {
|
||||||
|
|
||||||
this.clientParams = clientParams;
|
this.clientParams = clientParams;
|
||||||
this.baseUrl = baseUrl;
|
this.baseUrl = baseUrl;
|
||||||
|
@ -119,6 +127,7 @@ public class RestIterator implements Iterator<String> {
|
||||||
this.authMethod = authMethod;
|
this.authMethod = authMethod;
|
||||||
this.authToken = authToken;
|
this.authToken = authToken;
|
||||||
this.resultOutputFormat = resultOutputFormat;
|
this.resultOutputFormat = resultOutputFormat;
|
||||||
|
this.requestHeaders = requestHeaders != null ? requestHeaders : Maps.newHashMap();
|
||||||
|
|
||||||
this.queryFormat = StringUtils.isNotBlank(resultFormatParam) ? "&" + resultFormatParam + "=" + resultFormatValue
|
this.queryFormat = StringUtils.isNotBlank(resultFormatParam) ? "&" + resultFormatParam + "=" + resultFormatValue
|
||||||
: "";
|
: "";
|
||||||
|
@ -148,7 +157,12 @@ public class RestIterator implements Iterator<String> {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void initQueue() {
|
private void initQueue() {
|
||||||
this.query = this.baseUrl + "?" + this.queryParams + this.querySize + this.queryFormat;
|
if (queryParams.equals("") && querySize.equals("") && queryFormat.equals("")) {
|
||||||
|
query = baseUrl;
|
||||||
|
} else {
|
||||||
|
query = baseUrl + "?" + queryParams + querySize + queryFormat;
|
||||||
|
}
|
||||||
|
|
||||||
log.info("REST calls starting with {}", this.query);
|
log.info("REST calls starting with {}", this.query);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -162,19 +176,6 @@ public class RestIterator implements Iterator<String> {
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public boolean hasNext() {
|
public boolean hasNext() {
|
||||||
if (this.recordQueue.isEmpty() && this.query.isEmpty()) {
|
|
||||||
disconnect();
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* (non-Javadoc)
|
|
||||||
* @see java.util.Iterator#next()
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public String next() {
|
|
||||||
synchronized (this.recordQueue) {
|
synchronized (this.recordQueue) {
|
||||||
while (this.recordQueue.isEmpty() && !this.query.isEmpty()) {
|
while (this.recordQueue.isEmpty() && !this.query.isEmpty()) {
|
||||||
try {
|
try {
|
||||||
|
@ -184,6 +185,23 @@ public class RestIterator implements Iterator<String> {
|
||||||
throw new RuntimeException(e);
|
throw new RuntimeException(e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!this.recordQueue.isEmpty()) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
disconnect();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* (non-Javadoc)
|
||||||
|
* @see java.util.Iterator#next()
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public String next() {
|
||||||
|
synchronized (this.recordQueue) {
|
||||||
return this.recordQueue.poll();
|
return this.recordQueue.poll();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -209,9 +227,8 @@ public class RestIterator implements Iterator<String> {
|
||||||
|
|
||||||
try {
|
try {
|
||||||
String resultJson;
|
String resultJson;
|
||||||
String resultXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
|
String resultXml = XML_HEADER;
|
||||||
String nextQuery = "";
|
String nextQuery = "";
|
||||||
final String emptyXml = resultXml + "<" + JsonUtils.XML_WRAP_TAG + "></" + JsonUtils.XML_WRAP_TAG + ">";
|
|
||||||
Node resultNode = null;
|
Node resultNode = null;
|
||||||
NodeList nodeList = null;
|
NodeList nodeList = null;
|
||||||
String qUrlArgument = "";
|
String qUrlArgument = "";
|
||||||
|
@ -226,37 +243,48 @@ public class RestIterator implements Iterator<String> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// find pagination page start number in queryParam and remove before start the first query
|
||||||
|
if ((resumptionType.toLowerCase().equals("pagination") || resumptionType.toLowerCase().equals("page"))
|
||||||
|
&& (query.contains("paginationStart="))) {
|
||||||
|
|
||||||
|
final Matcher m = Pattern.compile("paginationStart=([0-9]+)").matcher(query);
|
||||||
|
m.find(); // guaranteed to be true for this regex
|
||||||
|
|
||||||
|
String[] pageVal = m.group(0).split("=");
|
||||||
|
pagination = Integer.parseInt(pageVal[1]);
|
||||||
|
|
||||||
|
// remove page start number from query and queryParams
|
||||||
|
queryParams = queryParams.replaceFirst("&?paginationStart=[0-9]+", "");
|
||||||
|
query = query.replaceFirst("&?paginationStart=[0-9]+", "");
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
log.info("requesting URL [{}]", query);
|
log.info("requesting URL [{}]", query);
|
||||||
|
|
||||||
final URL qUrl = new URL(query);
|
final URL qUrl = new URL(query);
|
||||||
log.debug("authMethod: {}", this.authMethod);
|
log.debug("authMethod: {}", this.authMethod);
|
||||||
if ("bearer".equalsIgnoreCase(this.authMethod)) {
|
if (this.authMethod == "bearer") {
|
||||||
log.trace("authMethod before inputStream: {}", resultXml);
|
log.trace("RestIterator.downloadPage():: authMethod before inputStream: " + resultXml);
|
||||||
final HttpURLConnection conn = (HttpURLConnection) qUrl.openConnection();
|
requestHeaders.put("Authorization", "Bearer " + authToken);
|
||||||
conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + this.authToken);
|
// requestHeaders.put("Content-Type", "application/json");
|
||||||
conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.getMimeType());
|
} else if (AUTHBASIC.equalsIgnoreCase(this.authMethod)) {
|
||||||
conn.setRequestMethod("GET");
|
log.trace("RestIterator.downloadPage():: authMethod before inputStream: " + resultXml);
|
||||||
theHttpInputStream = conn.getInputStream();
|
requestHeaders.put("Authorization", "Basic " + authToken);
|
||||||
} else if (this.BASIC.equalsIgnoreCase(this.authMethod)) {
|
// requestHeaders.put("accept", "application/xml");
|
||||||
log.trace("authMethod before inputStream: {}", resultXml);
|
|
||||||
final HttpURLConnection conn = (HttpURLConnection) qUrl.openConnection();
|
|
||||||
conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Basic " + this.authToken);
|
|
||||||
conn.setRequestProperty(HttpHeaders.ACCEPT, ContentType.APPLICATION_XML.getMimeType());
|
|
||||||
conn.setRequestMethod("GET");
|
|
||||||
theHttpInputStream = conn.getInputStream();
|
|
||||||
} else {
|
|
||||||
theHttpInputStream = qUrl.openStream();
|
|
||||||
}
|
}
|
||||||
|
HttpURLConnection conn = (HttpURLConnection) qUrl.openConnection();
|
||||||
|
conn.setRequestMethod("GET");
|
||||||
|
this.setRequestHeader(conn);
|
||||||
|
resultStream = conn.getInputStream();
|
||||||
|
|
||||||
this.resultStream = theHttpInputStream;
|
|
||||||
if ("json".equals(this.resultOutputFormat)) {
|
if ("json".equals(this.resultOutputFormat)) {
|
||||||
resultJson = IOUtils.toString(this.resultStream, StandardCharsets.UTF_8);
|
resultJson = IOUtils.toString(this.resultStream, StandardCharsets.UTF_8);
|
||||||
resultXml = JsonUtils.convertToXML(resultJson);
|
resultXml = JsonUtils.convertToXML(resultJson);
|
||||||
this.resultStream = IOUtils.toInputStream(resultXml, UTF_8);
|
this.resultStream = IOUtils.toInputStream(resultXml, UTF_8);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!(emptyXml).equalsIgnoreCase(resultXml)) {
|
if (!isEmptyXml(resultXml)) {
|
||||||
resultNode = (Node) this.xpath
|
resultNode = (Node) this.xpath
|
||||||
.evaluate("/", new InputSource(this.resultStream), XPathConstants.NODE);
|
.evaluate("/", new InputSource(this.resultStream), XPathConstants.NODE);
|
||||||
nodeList = (NodeList) this.xprEntity.evaluate(resultNode, XPathConstants.NODESET);
|
nodeList = (NodeList) this.xprEntity.evaluate(resultNode, XPathConstants.NODESET);
|
||||||
|
@ -265,8 +293,7 @@ public class RestIterator implements Iterator<String> {
|
||||||
final StringWriter sw = new StringWriter();
|
final StringWriter sw = new StringWriter();
|
||||||
this.transformer.transform(new DOMSource(nodeList.item(i)), new StreamResult(sw));
|
this.transformer.transform(new DOMSource(nodeList.item(i)), new StreamResult(sw));
|
||||||
final String toEnqueue = sw.toString();
|
final String toEnqueue = sw.toString();
|
||||||
if ((toEnqueue == null) || StringUtils.isBlank(toEnqueue)
|
if ((toEnqueue == null) || StringUtils.isBlank(toEnqueue) || isEmptyXml(toEnqueue)) {
|
||||||
|| emptyXml.equalsIgnoreCase(toEnqueue)) {
|
|
||||||
log
|
log
|
||||||
.warn(
|
.warn(
|
||||||
"The following record resulted in empty item for the feeding queue: {}", resultXml);
|
"The following record resulted in empty item for the feeding queue: {}", resultXml);
|
||||||
|
@ -294,6 +321,7 @@ public class RestIterator implements Iterator<String> {
|
||||||
throw new CollectorException("Mode: discover, Param 'resultSizeValue' is less than 2");
|
throw new CollectorException("Mode: discover, Param 'resultSizeValue' is less than 2");
|
||||||
}
|
}
|
||||||
qUrlArgument = qUrl.getQuery();
|
qUrlArgument = qUrl.getQuery();
|
||||||
|
|
||||||
final String[] arrayQUrlArgument = qUrlArgument.split("&");
|
final String[] arrayQUrlArgument = qUrlArgument.split("&");
|
||||||
for (final String arrayUrlArgStr : arrayQUrlArgument) {
|
for (final String arrayUrlArgStr : arrayQUrlArgument) {
|
||||||
if (arrayUrlArgStr.startsWith(this.resumptionParam)) {
|
if (arrayUrlArgStr.startsWith(this.resumptionParam)) {
|
||||||
|
@ -307,7 +335,7 @@ public class RestIterator implements Iterator<String> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (((emptyXml).equalsIgnoreCase(resultXml))
|
if (isEmptyXml(resultXml)
|
||||||
|| ((nodeList != null) && (nodeList.getLength() < this.resultSizeValue))) {
|
|| ((nodeList != null) && (nodeList.getLength() < this.resultSizeValue))) {
|
||||||
// resumptionStr = "";
|
// resumptionStr = "";
|
||||||
if (nodeList != null) {
|
if (nodeList != null) {
|
||||||
|
@ -326,13 +354,13 @@ public class RestIterator implements Iterator<String> {
|
||||||
|
|
||||||
case "pagination":
|
case "pagination":
|
||||||
case "page": // pagination, iterate over page numbers
|
case "page": // pagination, iterate over page numbers
|
||||||
this.pagination += 1;
|
if (nodeList != null && nodeList.getLength() > 0) {
|
||||||
if (nodeList != null) {
|
|
||||||
this.discoverResultSize += nodeList.getLength();
|
this.discoverResultSize += nodeList.getLength();
|
||||||
} else {
|
} else {
|
||||||
this.resultTotal = this.discoverResultSize;
|
this.resultTotal = this.discoverResultSize;
|
||||||
this.pagination = this.discoverResultSize;
|
this.pagination = this.discoverResultSize;
|
||||||
}
|
}
|
||||||
|
this.pagination += 1;
|
||||||
this.resumptionInt = this.pagination;
|
this.resumptionInt = this.pagination;
|
||||||
this.resumptionStr = Integer.toString(this.resumptionInt);
|
this.resumptionStr = Integer.toString(this.resumptionInt);
|
||||||
break;
|
break;
|
||||||
|
@ -380,7 +408,8 @@ public class RestIterator implements Iterator<String> {
|
||||||
try {
|
try {
|
||||||
if (this.resultTotal == -1) {
|
if (this.resultTotal == -1) {
|
||||||
this.resultTotal = Integer.parseInt(this.xprResultTotalPath.evaluate(resultNode));
|
this.resultTotal = Integer.parseInt(this.xprResultTotalPath.evaluate(resultNode));
|
||||||
if ("page".equalsIgnoreCase(this.resumptionType) && !this.BASIC.equalsIgnoreCase(this.authMethod)) {
|
if ("page".equalsIgnoreCase(this.resumptionType)
|
||||||
|
&& !this.AUTHBASIC.equalsIgnoreCase(this.authMethod)) {
|
||||||
this.resultTotal += 1;
|
this.resultTotal += 1;
|
||||||
} // to correct the upper bound
|
} // to correct the upper bound
|
||||||
log.info("resultTotal was -1 is now: " + this.resultTotal);
|
log.info("resultTotal was -1 is now: " + this.resultTotal);
|
||||||
|
@ -409,6 +438,10 @@ public class RestIterator implements Iterator<String> {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private boolean isEmptyXml(String s) {
|
||||||
|
return EMPTY_XML.equalsIgnoreCase(s);
|
||||||
|
}
|
||||||
|
|
||||||
private boolean isInteger(final String s) {
|
private boolean isInteger(final String s) {
|
||||||
boolean isValidInteger = false;
|
boolean isValidInteger = false;
|
||||||
try {
|
try {
|
||||||
|
@ -433,6 +466,22 @@ public class RestIterator implements Iterator<String> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* setRequestHeader
|
||||||
|
*
|
||||||
|
* setRequestProperty: Sets the general request property. If a property with the key already exists, overwrite its value with the new value.
|
||||||
|
* @param conn
|
||||||
|
*/
|
||||||
|
private void setRequestHeader(HttpURLConnection conn) {
|
||||||
|
if (requestHeaders != null) {
|
||||||
|
for (String key : requestHeaders.keySet()) {
|
||||||
|
conn.setRequestProperty(key, requestHeaders.get(key));
|
||||||
|
}
|
||||||
|
log.debug("Set Request Header with: " + requestHeaders);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
public String getResultFormatValue() {
|
public String getResultFormatValue() {
|
||||||
return this.resultFormatValue;
|
return this.resultFormatValue;
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,7 +8,10 @@ import java.io.StringWriter;
|
||||||
import java.nio.charset.Charset;
|
import java.nio.charset.Charset;
|
||||||
import java.nio.charset.CharsetDecoder;
|
import java.nio.charset.CharsetDecoder;
|
||||||
import java.nio.charset.CodingErrorAction;
|
import java.nio.charset.CodingErrorAction;
|
||||||
|
import java.util.Arrays;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import javax.xml.stream.XMLEventFactory;
|
import javax.xml.stream.XMLEventFactory;
|
||||||
import javax.xml.stream.XMLEventReader;
|
import javax.xml.stream.XMLEventReader;
|
||||||
|
@ -19,6 +22,7 @@ import javax.xml.stream.XMLStreamException;
|
||||||
import javax.xml.stream.events.StartElement;
|
import javax.xml.stream.events.StartElement;
|
||||||
import javax.xml.stream.events.XMLEvent;
|
import javax.xml.stream.events.XMLEvent;
|
||||||
|
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
|
||||||
|
@ -58,13 +62,23 @@ public class XMLIterator implements Iterator<String> {
|
||||||
|
|
||||||
private String element;
|
private String element;
|
||||||
|
|
||||||
|
private List<String> elements;
|
||||||
|
|
||||||
private InputStream inputStream;
|
private InputStream inputStream;
|
||||||
|
|
||||||
public XMLIterator(final String element, final InputStream inputStream) {
|
public XMLIterator(final String element, final InputStream inputStream) {
|
||||||
super();
|
super();
|
||||||
this.element = element;
|
this.element = element;
|
||||||
|
if (element.contains(",")) {
|
||||||
|
elements = Arrays
|
||||||
|
.stream(element.split(","))
|
||||||
|
.filter(StringUtils::isNoneBlank)
|
||||||
|
.map(String::toLowerCase)
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
}
|
||||||
this.inputStream = inputStream;
|
this.inputStream = inputStream;
|
||||||
this.parser = getParser();
|
this.parser = getParser();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
this.current = findElement(parser);
|
this.current = findElement(parser);
|
||||||
} catch (XMLStreamException e) {
|
} catch (XMLStreamException e) {
|
||||||
|
@ -113,7 +127,7 @@ public class XMLIterator implements Iterator<String> {
|
||||||
final XMLEvent event = parser.nextEvent();
|
final XMLEvent event = parser.nextEvent();
|
||||||
|
|
||||||
// TODO: replace with depth tracking instead of close tag tracking.
|
// TODO: replace with depth tracking instead of close tag tracking.
|
||||||
if (event.isEndElement() && event.asEndElement().getName().getLocalPart().equals(element)) {
|
if (event.isEndElement() && isCheckTag(event.asEndElement().getName().getLocalPart())) {
|
||||||
writer.add(event);
|
writer.add(event);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -142,18 +156,16 @@ public class XMLIterator implements Iterator<String> {
|
||||||
XMLEvent peek = parser.peek();
|
XMLEvent peek = parser.peek();
|
||||||
if (peek != null && peek.isStartElement()) {
|
if (peek != null && peek.isStartElement()) {
|
||||||
String name = peek.asStartElement().getName().getLocalPart();
|
String name = peek.asStartElement().getName().getLocalPart();
|
||||||
if (element.equals(name)) {
|
if (isCheckTag(name))
|
||||||
return peek;
|
return peek;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
while (parser.hasNext()) {
|
while (parser.hasNext()) {
|
||||||
final XMLEvent event = parser.nextEvent();
|
XMLEvent event = parser.nextEvent();
|
||||||
if (event != null && event.isStartElement()) {
|
if (event != null && event.isStartElement()) {
|
||||||
String name = event.asStartElement().getName().getLocalPart();
|
String name = event.asStartElement().getName().getLocalPart();
|
||||||
if (element.equals(name)) {
|
if (isCheckTag(name))
|
||||||
return event;
|
return event;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
|
@ -161,12 +173,31 @@ public class XMLIterator implements Iterator<String> {
|
||||||
|
|
||||||
private XMLEventReader getParser() {
|
private XMLEventReader getParser() {
|
||||||
try {
|
try {
|
||||||
return inputFactory.get().createXMLEventReader(sanitize(inputStream));
|
XMLInputFactory xif = inputFactory.get();
|
||||||
|
xif.setProperty(XMLInputFactory.SUPPORT_DTD, false);
|
||||||
|
return xif.createXMLEventReader(sanitize(inputStream));
|
||||||
} catch (XMLStreamException e) {
|
} catch (XMLStreamException e) {
|
||||||
throw new RuntimeException(e);
|
throw new RuntimeException(e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private boolean isCheckTag(final String tagName) {
|
||||||
|
if (elements != null) {
|
||||||
|
final String found = elements
|
||||||
|
.stream()
|
||||||
|
.filter(e -> e.equalsIgnoreCase(tagName))
|
||||||
|
.findFirst()
|
||||||
|
.orElse(null);
|
||||||
|
if (found != null)
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
if (element.equalsIgnoreCase(tagName)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
private Reader sanitize(final InputStream in) {
|
private Reader sanitize(final InputStream in) {
|
||||||
final CharsetDecoder charsetDecoder = Charset.forName(UTF_8).newDecoder();
|
final CharsetDecoder charsetDecoder = Charset.forName(UTF_8).newDecoder();
|
||||||
charsetDecoder.onMalformedInput(CodingErrorAction.REPLACE);
|
charsetDecoder.onMalformedInput(CodingErrorAction.REPLACE);
|
||||||
|
|
|
@ -1,134 +0,0 @@
|
||||||
package eu.dnetlib.dhp.transformation.xslt;
|
|
||||||
|
|
||||||
import java.io.Serializable;
|
|
||||||
import net.sf.saxon.s9api.*;
|
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
|
||||||
import org.json.JSONObject;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.net.MalformedURLException;
|
|
||||||
import java.net.URL;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This class fetches JSON from a provided link and returns
|
|
||||||
* a Dublin Core. This functionality is particularly needed for OSF Preprints
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
public class DataFetcher implements ExtensionFunction, Serializable {
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This method fetches JSON object from a given URL
|
|
||||||
* @param url a url in the metadata for fetching authors in JSON format
|
|
||||||
* @return
|
|
||||||
* @throws IOException
|
|
||||||
*/
|
|
||||||
static JSONObject getJson(URL url) throws IOException {
|
|
||||||
|
|
||||||
String json = IOUtils.toString(url);
|
|
||||||
return new JSONObject(json);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This method extracts authors from a given JSON
|
|
||||||
*
|
|
||||||
* @param jsonObject
|
|
||||||
* @return
|
|
||||||
*/
|
|
||||||
static List<String> getAuthorsFromJson(JSONObject jsonObject) {
|
|
||||||
List<String> authors = new ArrayList<>();
|
|
||||||
// count of authors
|
|
||||||
int countOfAuthors = jsonObject.getJSONArray("data").length();
|
|
||||||
for (int i = 0; i < countOfAuthors; i++) {
|
|
||||||
|
|
||||||
authors.add(jsonObject
|
|
||||||
.getJSONArray("data")
|
|
||||||
.getJSONObject(i)
|
|
||||||
.getJSONObject("embeds")
|
|
||||||
.getJSONObject("users")
|
|
||||||
.getJSONObject("data")
|
|
||||||
.getJSONObject("attributes")
|
|
||||||
.getString("full_name"));
|
|
||||||
}
|
|
||||||
return authors;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This method transforms list of authors into Dublin Core
|
|
||||||
* @param authors
|
|
||||||
* @return Dublin Core list of authors
|
|
||||||
*/
|
|
||||||
static List<String> transformListToDublinCore(List<String> authors) {
|
|
||||||
|
|
||||||
List<String> dublinCoreAuthors = new ArrayList<>();
|
|
||||||
for (String author : authors){
|
|
||||||
|
|
||||||
//splitting full name into first and last names according to OpenAIRE v3 guidelines at:
|
|
||||||
// https://guidelines.openaire.eu/en/latest/literature/field_creator.html
|
|
||||||
// “surname”, “initials” (“first name”) “prefix”.
|
|
||||||
String[] parts = author.split(" ");
|
|
||||||
String firstName = parts[0];
|
|
||||||
String lastName = parts[1];
|
|
||||||
char initialOfFirstName = firstName.charAt(0);
|
|
||||||
|
|
||||||
dublinCoreAuthors.add(
|
|
||||||
"<dc:creator>" + lastName + ", " + initialOfFirstName + ". (" + firstName + ")" + "</dc:creator>");
|
|
||||||
}
|
|
||||||
return dublinCoreAuthors;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This is a public method which fetches authors and transform them into Dublin Core
|
|
||||||
*/
|
|
||||||
public static String getAndTransformAuthors(URL url) throws IOException{
|
|
||||||
return String.join(", ", transformListToDublinCore(getAuthorsFromJson(getJson(url))));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This method extracts link to fulltext from a given JSON
|
|
||||||
*
|
|
||||||
* @return
|
|
||||||
*/
|
|
||||||
static private String getLinkToFulltextFromJson(JSONObject jsonObject) throws MalformedURLException {
|
|
||||||
|
|
||||||
// note: Link to JSON containing fulltextlink is in "primary_file" attribute.
|
|
||||||
// And in the resultant JSON, “links->download” contains the URL to fulltext
|
|
||||||
|
|
||||||
return jsonObject
|
|
||||||
.getJSONObject("data")
|
|
||||||
.getJSONObject("links")
|
|
||||||
.getString("download");
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This is a public method which fetches link to full text and returns it as a suitable format
|
|
||||||
*/
|
|
||||||
public static String getFullTextLinkAndTransform (URL url )throws IOException{
|
|
||||||
|
|
||||||
return getLinkToFulltextFromJson(getJson(url));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public QName getName() {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public SequenceType getResultType() {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public SequenceType[] getArgumentTypes() {
|
|
||||||
return new SequenceType[0];
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public XdmValue call(XdmValue[] xdmValues) throws SaxonApiException {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -55,8 +55,6 @@ public class XSLTTransformationFunction implements MapFunction<MetadataRecord, M
|
||||||
processor.registerExtensionFunction(new DateCleaner());
|
processor.registerExtensionFunction(new DateCleaner());
|
||||||
processor.registerExtensionFunction(new PersonCleaner());
|
processor.registerExtensionFunction(new PersonCleaner());
|
||||||
|
|
||||||
processor.registerExtensionFunction(new DataFetcher());
|
|
||||||
|
|
||||||
final XsltCompiler comp = processor.newXsltCompiler();
|
final XsltCompiler comp = processor.newXsltCompiler();
|
||||||
QName datasourceIDParam = new QName(DATASOURCE_ID_PARAM);
|
QName datasourceIDParam = new QName(DATASOURCE_ID_PARAM);
|
||||||
comp.setParameter(datasourceIDParam, new XdmAtomicValue(value.getProvenance().getDatasourceId()));
|
comp.setParameter(datasourceIDParam, new XdmAtomicValue(value.getProvenance().getDatasourceId()));
|
||||||
|
|
|
@ -28,13 +28,7 @@
|
||||||
"paramLongName": "dataciteInputPath",
|
"paramLongName": "dataciteInputPath",
|
||||||
"paramDescription": "the path to get the input data from Datacite",
|
"paramDescription": "the path to get the input data from Datacite",
|
||||||
"paramRequired": true
|
"paramRequired": true
|
||||||
},{
|
},
|
||||||
"paramName": "wip",
|
|
||||||
"paramLongName": "webCrawlInputPath",
|
|
||||||
"paramDescription": "the path to get the input data from Web Crawl",
|
|
||||||
"paramRequired": true
|
|
||||||
}
|
|
||||||
,
|
|
||||||
{
|
{
|
||||||
"paramName": "o",
|
"paramName": "o",
|
||||||
"paramLongName": "outputPath",
|
"paramLongName": "outputPath",
|
||||||
|
|
|
@ -35,6 +35,5 @@ crossrefInputPath=/data/bip-affiliations/crossref-data.json
|
||||||
pubmedInputPath=/data/bip-affiliations/pubmed-data.json
|
pubmedInputPath=/data/bip-affiliations/pubmed-data.json
|
||||||
openapcInputPath=/data/bip-affiliations/openapc-data.json
|
openapcInputPath=/data/bip-affiliations/openapc-data.json
|
||||||
dataciteInputPath=/data/bip-affiliations/datacite-data.json
|
dataciteInputPath=/data/bip-affiliations/datacite-data.json
|
||||||
webCrawlInputPath=/data/bip-affiliations/webCrawl/
|
|
||||||
|
|
||||||
outputPath=/tmp/crossref-affiliations-output-v5
|
outputPath=/tmp/crossref-affiliations-output-v5
|
||||||
|
|
|
@ -17,10 +17,6 @@
|
||||||
<name>dataciteInputPath</name>
|
<name>dataciteInputPath</name>
|
||||||
<description>the path where to find the inferred affiliation relations from Datacite</description>
|
<description>the path where to find the inferred affiliation relations from Datacite</description>
|
||||||
</property>
|
</property>
|
||||||
<property>
|
|
||||||
<name>webCrawlInputPath</name>
|
|
||||||
<description>the path where to find the inferred affiliation relations from webCrawl</description>
|
|
||||||
</property>
|
|
||||||
<property>
|
<property>
|
||||||
<name>outputPath</name>
|
<name>outputPath</name>
|
||||||
<description>the path where to store the actionset</description>
|
<description>the path where to store the actionset</description>
|
||||||
|
@ -116,7 +112,7 @@
|
||||||
<arg>--pubmedInputPath</arg><arg>${pubmedInputPath}</arg>
|
<arg>--pubmedInputPath</arg><arg>${pubmedInputPath}</arg>
|
||||||
<arg>--openapcInputPath</arg><arg>${openapcInputPath}</arg>
|
<arg>--openapcInputPath</arg><arg>${openapcInputPath}</arg>
|
||||||
<arg>--dataciteInputPath</arg><arg>${dataciteInputPath}</arg>
|
<arg>--dataciteInputPath</arg><arg>${dataciteInputPath}</arg>
|
||||||
<arg>--webCrawlInputPath</arg><arg>${webCrawlInputPath}</arg>
|
|
||||||
<arg>--outputPath</arg><arg>${outputPath}</arg>
|
<arg>--outputPath</arg><arg>${outputPath}</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="End"/>
|
<ok to="End"/>
|
||||||
|
|
|
@ -1,5 +1,10 @@
|
||||||
[
|
[
|
||||||
|
{
|
||||||
|
"id": "100007630",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/100007630",
|
||||||
|
"name": "College of Engineering and Informatics, National University of Ireland, Galway",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"id": "100007731",
|
"id": "100007731",
|
||||||
"uri": "http://dx.doi.org/10.13039/100007731",
|
"uri": "http://dx.doi.org/10.13039/100007731",
|
||||||
|
@ -427,13 +432,13 @@
|
||||||
"id": "501100001634",
|
"id": "501100001634",
|
||||||
"uri": "http://dx.doi.org/10.13039/501100001634",
|
"uri": "http://dx.doi.org/10.13039/501100001634",
|
||||||
"name": "University of Galway",
|
"name": "University of Galway",
|
||||||
"synonym": ["501100019905", "100007630", "501100020570", "501100023852"]
|
"synonym": []
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": "501100001635",
|
"id": "501100001635",
|
||||||
"uri": "http://dx.doi.org/10.13039/501100001635",
|
"uri": "http://dx.doi.org/10.13039/501100001635",
|
||||||
"name": "University of Limerick",
|
"name": "University of Limerick",
|
||||||
"synonym": ["501100014531"]
|
"synonym": []
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": "501100001636",
|
"id": "501100001636",
|
||||||
|
@ -463,7 +468,7 @@
|
||||||
"id": "501100002736",
|
"id": "501100002736",
|
||||||
"uri": "http://dx.doi.org/10.13039/501100002736",
|
"uri": "http://dx.doi.org/10.13039/501100002736",
|
||||||
"name": "Covidien",
|
"name": "Covidien",
|
||||||
"synonym": ["501100003956"]
|
"synonym": []
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": "501100002755",
|
"id": "501100002755",
|
||||||
|
@ -513,6 +518,12 @@
|
||||||
"name": "Irish Institute of Clinical Neuroscience",
|
"name": "Irish Institute of Clinical Neuroscience",
|
||||||
"synonym": []
|
"synonym": []
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"id": "501100003956",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100003956",
|
||||||
|
"name": "Aspect Medical Systems",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"id": "501100004162",
|
"id": "501100004162",
|
||||||
"uri": "http://dx.doi.org/10.13039/501100004162",
|
"uri": "http://dx.doi.org/10.13039/501100004162",
|
||||||
|
@ -633,7 +644,12 @@
|
||||||
"name": "Irish Centre for High-End Computing",
|
"name": "Irish Centre for High-End Computing",
|
||||||
"synonym": []
|
"synonym": []
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"id": "501100019905",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100019905",
|
||||||
|
"name": "Galway University Foundation",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"id": "501100020036",
|
"id": "501100020036",
|
||||||
"uri": "http://dx.doi.org/10.13039/501100020036",
|
"uri": "http://dx.doi.org/10.13039/501100020036",
|
||||||
|
@ -808,7 +824,12 @@
|
||||||
"name": "Energy Policy Research Centre, Economic and Social Research Institute",
|
"name": "Energy Policy Research Centre, Economic and Social Research Institute",
|
||||||
"synonym": []
|
"synonym": []
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"id": "501100014531",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100014531",
|
||||||
|
"name": "Physical Education and Sport Sciences Department, University of Limerick",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"id": "501100014745",
|
"id": "501100014745",
|
||||||
"uri": "http://dx.doi.org/10.13039/501100014745",
|
"uri": "http://dx.doi.org/10.13039/501100014745",
|
||||||
|
@ -821,11 +842,22 @@
|
||||||
"name": "ADAPT - Centre for Digital Content Technology",
|
"name": "ADAPT - Centre for Digital Content Technology",
|
||||||
"synonym": []
|
"synonym": []
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"id": "501100020570",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100020570",
|
||||||
|
"name": "College of Medicine, Nursing and Health Sciences, National University of Ireland, Galway",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"id": "501100020871",
|
"id": "501100020871",
|
||||||
"uri": "http://dx.doi.org/10.13039/501100020871",
|
"uri": "http://dx.doi.org/10.13039/501100020871",
|
||||||
"name": "Bernal Institute, University of Limerick",
|
"name": "Bernal Institute, University of Limerick",
|
||||||
"synonym": []
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100023852",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100023852",
|
||||||
|
"name": "Moore Institute for Research in the Humanities and Social Studies, University of Galway",
|
||||||
|
"synonym": []
|
||||||
}
|
}
|
||||||
]
|
]
|
|
@ -48,37 +48,12 @@
|
||||||
<description>Used to configure the heap size for the map JVM process. Should be 80% of mapreduce.map.memory.mb.</description>
|
<description>Used to configure the heap size for the map JVM process. Should be 80% of mapreduce.map.memory.mb.</description>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
<property>
|
|
||||||
<name>JAVA_HOME</name>
|
|
||||||
<value>/srv/java/openjdk-17</value>
|
|
||||||
<description>Used to configure the Java home location for oozie.launcher.mapreduce.map.env</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
<property>
|
|
||||||
<name>JAVA_OPTS</name>
|
|
||||||
<value>-Dcom.sun.security.enableAIAcaIssuers=true</value>
|
|
||||||
<description>Used to configure the JAVA_OPTS parameter</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
</parameters>
|
</parameters>
|
||||||
|
|
||||||
<global>
|
<global>
|
||||||
<job-tracker>${jobTracker}</job-tracker>
|
<job-tracker>${jobTracker}</job-tracker>
|
||||||
<name-node>${nameNode}</name-node>
|
<name-node>${nameNode}</name-node>
|
||||||
<configuration>
|
|
||||||
<property>
|
|
||||||
<name>mapreduce.job.queuename</name>
|
|
||||||
<value>${queueName}</value>
|
|
||||||
</property>
|
|
||||||
<property>
|
|
||||||
<name>oozie.launcher.mapred.job.queue.name</name>
|
|
||||||
<value>${oozieLauncherQueueName}</value>
|
|
||||||
</property>
|
|
||||||
<property>
|
|
||||||
<name>oozie.launcher.mapreduce.map.env</name>
|
|
||||||
<value>JAVA_HOME=${JAVA_HOME}</value>
|
|
||||||
</property>
|
|
||||||
</configuration>
|
|
||||||
</global>
|
</global>
|
||||||
|
|
||||||
<start to="collection_mode"/>
|
<start to="collection_mode"/>
|
||||||
|
@ -124,7 +99,7 @@
|
||||||
<action name="CollectionWorker">
|
<action name="CollectionWorker">
|
||||||
<java>
|
<java>
|
||||||
<main-class>eu.dnetlib.dhp.collection.CollectorWorkerApplication</main-class>
|
<main-class>eu.dnetlib.dhp.collection.CollectorWorkerApplication</main-class>
|
||||||
<java-opts>${JAVA_OPTS} ${collection_java_xmx}</java-opts>
|
<java-opts>${collection_java_xmx}</java-opts>
|
||||||
<arg>--apidescriptor</arg><arg>${apiDescription}</arg>
|
<arg>--apidescriptor</arg><arg>${apiDescription}</arg>
|
||||||
<arg>--namenode</arg><arg>${nameNode}</arg>
|
<arg>--namenode</arg><arg>${nameNode}</arg>
|
||||||
<arg>--workflowId</arg><arg>${workflowId}</arg>
|
<arg>--workflowId</arg><arg>${workflowId}</arg>
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
<workflow-app name="Transform_BioEntity_Workflow" xmlns="uri:oozie:workflow:0.5">
|
<workflow-app name="Transform_BioEntity_Workflow" xmlns="uri:oozie:workflow:0.5">
|
||||||
<parameters>
|
<parameters>
|
||||||
<property>
|
<property>
|
||||||
<name>sourcePath</name>
|
<name>sourcePath</name>
|
||||||
|
@ -8,40 +8,19 @@
|
||||||
<name>database</name>
|
<name>database</name>
|
||||||
<description>the PDB Database Working Path</description>
|
<description>the PDB Database Working Path</description>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<name>mdStoreOutputId</name>
|
<name>targetPath</name>
|
||||||
<description>the identifier of the cleaned MDStore</description>
|
<description>the Target Working dir path</description>
|
||||||
</property>
|
|
||||||
<property>
|
|
||||||
<name>mdStoreManagerURI</name>
|
|
||||||
<description>the path of the cleaned mdstore</description>
|
|
||||||
</property>
|
</property>
|
||||||
</parameters>
|
</parameters>
|
||||||
|
|
||||||
<start to="StartTransaction"/>
|
<start to="ConvertDB"/>
|
||||||
|
|
||||||
|
|
||||||
<kill name="Kill">
|
<kill name="Kill">
|
||||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||||
</kill>
|
</kill>
|
||||||
|
|
||||||
<action name="StartTransaction">
|
|
||||||
<java>
|
|
||||||
<configuration>
|
|
||||||
<property>
|
|
||||||
<name>oozie.launcher.mapreduce.user.classpath.first</name>
|
|
||||||
<value>true</value>
|
|
||||||
</property>
|
|
||||||
</configuration>
|
|
||||||
<main-class>eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode</main-class>
|
|
||||||
<arg>--action</arg><arg>NEW_VERSION</arg>
|
|
||||||
<arg>--mdStoreID</arg><arg>${mdStoreOutputId}</arg>
|
|
||||||
<arg>--mdStoreManagerURI</arg><arg>${mdStoreManagerURI}</arg>
|
|
||||||
<capture-output/>
|
|
||||||
</java>
|
|
||||||
<ok to="ConvertDB"/>
|
|
||||||
<error to="RollBack"/>
|
|
||||||
</action>
|
|
||||||
<action name="ConvertDB">
|
<action name="ConvertDB">
|
||||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
<master>yarn</master>
|
<master>yarn</master>
|
||||||
|
@ -62,48 +41,11 @@
|
||||||
<arg>--master</arg><arg>yarn</arg>
|
<arg>--master</arg><arg>yarn</arg>
|
||||||
<arg>--dbPath</arg><arg>${sourcePath}</arg>
|
<arg>--dbPath</arg><arg>${sourcePath}</arg>
|
||||||
<arg>--database</arg><arg>${database}</arg>
|
<arg>--database</arg><arg>${database}</arg>
|
||||||
<arg>--mdstoreOutputVersion</arg><arg>${wf:actionData('StartTransaction')['mdStoreVersion']}</arg>
|
<arg>--targetPath</arg><arg>${targetPath}</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="CommitVersion"/>
|
<ok to="End"/>
|
||||||
<error to="RollBack"/>
|
<error to="Kill"/>
|
||||||
|
|
||||||
</action>
|
</action>
|
||||||
<action name="CommitVersion">
|
<end name="End"/>
|
||||||
<java>
|
|
||||||
<configuration>
|
|
||||||
<property>
|
|
||||||
<name>oozie.launcher.mapreduce.user.classpath.first</name>
|
|
||||||
<value>true</value>
|
|
||||||
</property>
|
|
||||||
</configuration>
|
|
||||||
<main-class>eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode</main-class>
|
|
||||||
<arg>--action</arg><arg>COMMIT</arg>
|
|
||||||
<arg>--namenode</arg><arg>${nameNode}</arg>
|
|
||||||
<arg>--mdStoreVersion</arg><arg>${wf:actionData('StartTransaction')['mdStoreVersion']}</arg>
|
|
||||||
<arg>--mdStoreManagerURI</arg><arg>${mdStoreManagerURI}</arg>
|
|
||||||
</java>
|
|
||||||
<ok to="End"/>
|
|
||||||
<error to="Kill"/>
|
|
||||||
</action>
|
|
||||||
|
|
||||||
<action name="RollBack">
|
|
||||||
<java>
|
|
||||||
<configuration>
|
|
||||||
<property>
|
|
||||||
<name>oozie.launcher.mapreduce.user.classpath.first</name>
|
|
||||||
<value>true</value>
|
|
||||||
</property>
|
|
||||||
</configuration>
|
|
||||||
<main-class>eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode</main-class>
|
|
||||||
<arg>--action</arg><arg>ROLLBACK</arg>
|
|
||||||
<arg>--mdStoreVersion</arg><arg>${wf:actionData('StartTransaction')['mdStoreVersion']}</arg>
|
|
||||||
<arg>--mdStoreManagerURI</arg><arg>${mdStoreManagerURI}</arg>
|
|
||||||
</java>
|
|
||||||
<ok to="Kill"/>
|
|
||||||
<error to="Kill"/>
|
|
||||||
</action>
|
|
||||||
|
|
||||||
|
|
||||||
<end name="End"/>
|
|
||||||
|
|
||||||
</workflow-app>
|
</workflow-app>
|
|
@ -2,5 +2,5 @@
|
||||||
{"paramName":"mt", "paramLongName":"master", "paramDescription": "should be local or yarn", "paramRequired": true},
|
{"paramName":"mt", "paramLongName":"master", "paramDescription": "should be local or yarn", "paramRequired": true},
|
||||||
{"paramName":"db", "paramLongName":"database", "paramDescription": "should be PDB or UNIPROT", "paramRequired": true},
|
{"paramName":"db", "paramLongName":"database", "paramDescription": "should be PDB or UNIPROT", "paramRequired": true},
|
||||||
{"paramName":"p", "paramLongName":"dbPath", "paramDescription": "the path of the database to transform", "paramRequired": true},
|
{"paramName":"p", "paramLongName":"dbPath", "paramDescription": "the path of the database to transform", "paramRequired": true},
|
||||||
{"paramName":"mo", "paramLongName":"mdstoreOutputVersion", "paramDescription": "the oaf path ", "paramRequired": true}
|
{"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the OAF target path ", "paramRequired": true}
|
||||||
]
|
]
|
|
@ -1,20 +1,5 @@
|
||||||
[
|
[
|
||||||
{
|
{"paramName":"mt", "paramLongName":"master", "paramDescription": "should be local or yarn", "paramRequired": true},
|
||||||
"paramName": "mt",
|
{"paramName":"s", "paramLongName":"sourcePath","paramDescription": "the source Path", "paramRequired": true},
|
||||||
"paramLongName": "master",
|
{"paramName":"t", "paramLongName":"targetPath","paramDescription": "the oaf path ", "paramRequired": true}
|
||||||
"paramDescription": "should be local or yarn",
|
|
||||||
"paramRequired": true
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"paramName": "s",
|
|
||||||
"paramLongName": "sourcePath",
|
|
||||||
"paramDescription": "the source Path",
|
|
||||||
"paramRequired": true
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"paramName": "mo",
|
|
||||||
"paramLongName": "mdstoreOutputVersion",
|
|
||||||
"paramDescription": "the oaf path ",
|
|
||||||
"paramRequired": true
|
|
||||||
}
|
|
||||||
]
|
]
|
|
@ -9,26 +9,34 @@
|
||||||
<description>the Working Path</description>
|
<description>the Working Path</description>
|
||||||
</property>
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>mdStoreOutputId</name>
|
<name>targetPath</name>
|
||||||
<description>the identifier of the cleaned MDStore</description>
|
<description>the OAF MDStore Path</description>
|
||||||
</property>
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>mdStoreManagerURI</name>
|
<name>sparkDriverMemory</name>
|
||||||
<description>the path of the cleaned mdstore</description>
|
<description>memory for driver process</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>sparkExecutorMemory</name>
|
||||||
|
<description>memory for individual executor</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>sparkExecutorCores</name>
|
||||||
|
<description>number of cores used by single executor</description>
|
||||||
</property>
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>resumeFrom</name>
|
<name>resumeFrom</name>
|
||||||
<value>CreateEBIDataSet</value>
|
<value>DownloadEBILinks</value>
|
||||||
<description>node to start</description>
|
<description>node to start</description>
|
||||||
</property>
|
</property>
|
||||||
</parameters>
|
</parameters>
|
||||||
|
|
||||||
<start to="StartTransaction"/>
|
<start to="resume_from"/>
|
||||||
|
|
||||||
<decision name="resume_from">
|
<decision name="resume_from">
|
||||||
<switch>
|
<switch>
|
||||||
<case to="DownloadEBILinks">${wf:conf('resumeFrom') eq 'DownloadEBILinks'}</case>
|
<case to="DownloadEBILinks">${wf:conf('resumeFrom') eq 'DownloadEBILinks'}</case>
|
||||||
<case to="StartTransaction">${wf:conf('resumeFrom') eq 'CreateEBIDataSet'}</case>
|
<case to="CreateEBIDataSet">${wf:conf('resumeFrom') eq 'CreateEBIDataSet'}</case>
|
||||||
<default to="DownloadEBILinks"/>
|
<default to="DownloadEBILinks"/>
|
||||||
</switch>
|
</switch>
|
||||||
</decision>
|
</decision>
|
||||||
|
@ -69,29 +77,9 @@
|
||||||
<move source="${sourcePath}/ebi_links_dataset" target="${sourcePath}/ebi_links_dataset_old"/>
|
<move source="${sourcePath}/ebi_links_dataset" target="${sourcePath}/ebi_links_dataset_old"/>
|
||||||
<move source="${workingPath}/links_final" target="${sourcePath}/ebi_links_dataset"/>
|
<move source="${workingPath}/links_final" target="${sourcePath}/ebi_links_dataset"/>
|
||||||
</fs>
|
</fs>
|
||||||
<ok to="StartTransaction"/>
|
<ok to="CreateEBIDataSet"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
<action name="StartTransaction">
|
|
||||||
<java>
|
|
||||||
<configuration>
|
|
||||||
<property>
|
|
||||||
<name>oozie.launcher.mapreduce.user.classpath.first</name>
|
|
||||||
<value>true</value>
|
|
||||||
</property>
|
|
||||||
</configuration>
|
|
||||||
<main-class>eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode</main-class>
|
|
||||||
<arg>--action</arg><arg>NEW_VERSION</arg>
|
|
||||||
<arg>--mdStoreID</arg><arg>${mdStoreOutputId}</arg>
|
|
||||||
<arg>--mdStoreManagerURI</arg><arg>${mdStoreManagerURI}</arg>
|
|
||||||
<capture-output/>
|
|
||||||
</java>
|
|
||||||
<ok to="CreateEBIDataSet"/>
|
|
||||||
<error to="RollBack"/>
|
|
||||||
</action>
|
|
||||||
|
|
||||||
|
|
||||||
<action name="CreateEBIDataSet">
|
<action name="CreateEBIDataSet">
|
||||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
<master>yarn-cluster</master>
|
<master>yarn-cluster</master>
|
||||||
|
@ -107,49 +95,11 @@
|
||||||
${sparkExtraOPT}
|
${sparkExtraOPT}
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--sourcePath</arg><arg>${sourcePath}/ebi_links_dataset</arg>
|
<arg>--sourcePath</arg><arg>${sourcePath}/ebi_links_dataset</arg>
|
||||||
<arg>--mdstoreOutputVersion</arg><arg>${wf:actionData('StartTransaction')['mdStoreVersion']}</arg>
|
<arg>--targetPath</arg><arg>${targetPath}</arg>
|
||||||
<arg>--master</arg><arg>yarn</arg>
|
<arg>--master</arg><arg>yarn</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="End"/>
|
<ok to="End"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
|
|
||||||
<action name="CommitVersion">
|
|
||||||
<java>
|
|
||||||
<configuration>
|
|
||||||
<property>
|
|
||||||
<name>oozie.launcher.mapreduce.user.classpath.first</name>
|
|
||||||
<value>true</value>
|
|
||||||
</property>
|
|
||||||
</configuration>
|
|
||||||
<main-class>eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode</main-class>
|
|
||||||
<arg>--action</arg><arg>COMMIT</arg>
|
|
||||||
<arg>--namenode</arg><arg>${nameNode}</arg>
|
|
||||||
<arg>--mdStoreVersion</arg><arg>${wf:actionData('StartTransaction')['mdStoreVersion']}</arg>
|
|
||||||
<arg>--mdStoreManagerURI</arg><arg>${mdStoreManagerURI}</arg>
|
|
||||||
</java>
|
|
||||||
<ok to="End"/>
|
|
||||||
<error to="Kill"/>
|
|
||||||
</action>
|
|
||||||
|
|
||||||
<action name="RollBack">
|
|
||||||
<java>
|
|
||||||
<configuration>
|
|
||||||
<property>
|
|
||||||
<name>oozie.launcher.mapreduce.user.classpath.first</name>
|
|
||||||
<value>true</value>
|
|
||||||
</property>
|
|
||||||
</configuration>
|
|
||||||
<main-class>eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode</main-class>
|
|
||||||
<arg>--action</arg><arg>ROLLBACK</arg>
|
|
||||||
<arg>--mdStoreVersion</arg><arg>${wf:actionData('StartTransaction')['mdStoreVersion']}</arg>
|
|
||||||
<arg>--mdStoreManagerURI</arg><arg>${mdStoreManagerURI}</arg>
|
|
||||||
</java>
|
|
||||||
<ok to="Kill"/>
|
|
||||||
<error to="Kill"/>
|
|
||||||
</action>
|
|
||||||
|
|
||||||
<end name="End"/>
|
<end name="End"/>
|
||||||
|
|
||||||
</workflow-app>
|
</workflow-app>
|
|
@ -1025,6 +1025,7 @@ case object Crossref2Oaf {
|
||||||
tp._1 match {
|
tp._1 match {
|
||||||
case "electronic" => journal.setIssnOnline(tp._2)
|
case "electronic" => journal.setIssnOnline(tp._2)
|
||||||
case "print" => journal.setIssnPrinted(tp._2)
|
case "print" => journal.setIssnPrinted(tp._2)
|
||||||
|
case _ =>
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
|
@ -231,7 +231,7 @@ object BioDBToOAF {
|
||||||
def uniprotToOAF(input: String): List[Oaf] = {
|
def uniprotToOAF(input: String): List[Oaf] = {
|
||||||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||||
lazy val json = parse(input)
|
lazy val json = parse(input)
|
||||||
val pid = (json \ "pid").extract[String].trim()
|
val pid = (json \ "pid").extract[String]
|
||||||
|
|
||||||
val d = new Dataset
|
val d = new Dataset
|
||||||
|
|
||||||
|
|
|
@ -2,15 +2,12 @@ package eu.dnetlib.dhp.sx.bio
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser
|
||||||
import eu.dnetlib.dhp.collection.CollectionUtils
|
import eu.dnetlib.dhp.collection.CollectionUtils
|
||||||
import eu.dnetlib.dhp.common.Constants.{MDSTORE_DATA_PATH, MDSTORE_SIZE_PATH}
|
|
||||||
import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Oaf
|
import eu.dnetlib.dhp.schema.oaf.Oaf
|
||||||
import eu.dnetlib.dhp.sx.bio.BioDBToOAF.ScholixResolved
|
import eu.dnetlib.dhp.sx.bio.BioDBToOAF.ScholixResolved
|
||||||
import org.apache.commons.io.IOUtils
|
import org.apache.commons.io.IOUtils
|
||||||
import org.apache.spark.SparkConf
|
import org.apache.spark.SparkConf
|
||||||
import org.apache.spark.sql.{Encoder, Encoders, SparkSession}
|
import org.apache.spark.sql.{Encoder, Encoders, SparkSession}
|
||||||
import org.slf4j.{Logger, LoggerFactory}
|
import org.slf4j.{Logger, LoggerFactory}
|
||||||
import eu.dnetlib.dhp.utils.DHPUtils.{MAPPER, writeHdfsFile}
|
|
||||||
|
|
||||||
object SparkTransformBioDatabaseToOAF {
|
object SparkTransformBioDatabaseToOAF {
|
||||||
|
|
||||||
|
@ -28,13 +25,8 @@ object SparkTransformBioDatabaseToOAF {
|
||||||
|
|
||||||
val dbPath: String = parser.get("dbPath")
|
val dbPath: String = parser.get("dbPath")
|
||||||
log.info("dbPath: {}", database)
|
log.info("dbPath: {}", database)
|
||||||
|
val targetPath: String = parser.get("targetPath")
|
||||||
val mdstoreOutputVersion = parser.get("mdstoreOutputVersion")
|
log.info("targetPath: {}", database)
|
||||||
log.info("mdstoreOutputVersion: {}", mdstoreOutputVersion)
|
|
||||||
|
|
||||||
val cleanedMdStoreVersion = MAPPER.readValue(mdstoreOutputVersion, classOf[MDStoreVersion])
|
|
||||||
val outputBasePath = cleanedMdStoreVersion.getHdfsPath
|
|
||||||
log.info("outputBasePath: {}", outputBasePath)
|
|
||||||
|
|
||||||
val spark: SparkSession =
|
val spark: SparkSession =
|
||||||
SparkSession
|
SparkSession
|
||||||
|
@ -51,28 +43,24 @@ object SparkTransformBioDatabaseToOAF {
|
||||||
case "UNIPROT" =>
|
case "UNIPROT" =>
|
||||||
CollectionUtils.saveDataset(
|
CollectionUtils.saveDataset(
|
||||||
spark.createDataset(sc.textFile(dbPath).flatMap(i => BioDBToOAF.uniprotToOAF(i))),
|
spark.createDataset(sc.textFile(dbPath).flatMap(i => BioDBToOAF.uniprotToOAF(i))),
|
||||||
s"$outputBasePath/$MDSTORE_DATA_PATH"
|
targetPath
|
||||||
)
|
)
|
||||||
case "PDB" =>
|
case "PDB" =>
|
||||||
CollectionUtils.saveDataset(
|
CollectionUtils.saveDataset(
|
||||||
spark.createDataset(sc.textFile(dbPath).flatMap(i => BioDBToOAF.pdbTOOaf(i))),
|
spark.createDataset(sc.textFile(dbPath).flatMap(i => BioDBToOAF.pdbTOOaf(i))),
|
||||||
s"$outputBasePath/$MDSTORE_DATA_PATH"
|
targetPath
|
||||||
)
|
)
|
||||||
case "SCHOLIX" =>
|
case "SCHOLIX" =>
|
||||||
CollectionUtils.saveDataset(
|
CollectionUtils.saveDataset(
|
||||||
spark.read.load(dbPath).as[ScholixResolved].map(i => BioDBToOAF.scholixResolvedToOAF(i)),
|
spark.read.load(dbPath).as[ScholixResolved].map(i => BioDBToOAF.scholixResolvedToOAF(i)),
|
||||||
s"$outputBasePath/$MDSTORE_DATA_PATH"
|
targetPath
|
||||||
)
|
)
|
||||||
case "CROSSREF_LINKS" =>
|
case "CROSSREF_LINKS" =>
|
||||||
CollectionUtils.saveDataset(
|
CollectionUtils.saveDataset(
|
||||||
spark.createDataset(sc.textFile(dbPath).map(i => BioDBToOAF.crossrefLinksToOaf(i))),
|
spark.createDataset(sc.textFile(dbPath).map(i => BioDBToOAF.crossrefLinksToOaf(i))),
|
||||||
s"$outputBasePath/$MDSTORE_DATA_PATH"
|
targetPath
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
val df = spark.read.text(s"$outputBasePath/$MDSTORE_DATA_PATH")
|
|
||||||
val mdStoreSize = df.count
|
|
||||||
writeHdfsFile(spark.sparkContext.hadoopConfiguration, s"$mdStoreSize", s"$outputBasePath/$MDSTORE_SIZE_PATH")
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,12 +2,9 @@ package eu.dnetlib.dhp.sx.bio.ebi
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser
|
||||||
import eu.dnetlib.dhp.collection.CollectionUtils
|
import eu.dnetlib.dhp.collection.CollectionUtils
|
||||||
import eu.dnetlib.dhp.common.Constants.{MDSTORE_DATA_PATH, MDSTORE_SIZE_PATH}
|
|
||||||
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup
|
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup
|
||||||
import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion
|
import eu.dnetlib.dhp.schema.oaf.Oaf
|
||||||
import eu.dnetlib.dhp.schema.oaf.{Oaf, Result}
|
|
||||||
import eu.dnetlib.dhp.sx.bio.pubmed._
|
import eu.dnetlib.dhp.sx.bio.pubmed._
|
||||||
import eu.dnetlib.dhp.utils.DHPUtils.{MAPPER, writeHdfsFile}
|
|
||||||
import eu.dnetlib.dhp.utils.ISLookupClientFactory
|
import eu.dnetlib.dhp.utils.ISLookupClientFactory
|
||||||
import org.apache.commons.io.IOUtils
|
import org.apache.commons.io.IOUtils
|
||||||
import org.apache.hadoop.conf.Configuration
|
import org.apache.hadoop.conf.Configuration
|
||||||
|
@ -17,13 +14,13 @@ import org.apache.http.client.methods.HttpGet
|
||||||
import org.apache.http.impl.client.HttpClientBuilder
|
import org.apache.http.impl.client.HttpClientBuilder
|
||||||
import org.apache.spark.SparkConf
|
import org.apache.spark.SparkConf
|
||||||
import org.apache.spark.rdd.RDD
|
import org.apache.spark.rdd.RDD
|
||||||
import org.apache.spark.sql.expressions.Aggregator
|
|
||||||
import org.apache.spark.sql._
|
import org.apache.spark.sql._
|
||||||
|
import org.apache.spark.sql.expressions.Aggregator
|
||||||
import org.slf4j.{Logger, LoggerFactory}
|
import org.slf4j.{Logger, LoggerFactory}
|
||||||
|
|
||||||
import java.io.InputStream
|
import java.io.{ByteArrayInputStream, InputStream}
|
||||||
import scala.io.Source
|
import java.nio.charset.Charset
|
||||||
import scala.xml.pull.XMLEventReader
|
import javax.xml.stream.XMLInputFactory
|
||||||
|
|
||||||
object SparkCreateBaselineDataFrame {
|
object SparkCreateBaselineDataFrame {
|
||||||
|
|
||||||
|
@ -86,7 +83,7 @@ object SparkCreateBaselineDataFrame {
|
||||||
if (response.getStatusLine.getStatusCode > 400) {
|
if (response.getStatusLine.getStatusCode > 400) {
|
||||||
tries -= 1
|
tries -= 1
|
||||||
} else
|
} else
|
||||||
return IOUtils.toString(response.getEntity.getContent)
|
return IOUtils.toString(response.getEntity.getContent, Charset.defaultCharset())
|
||||||
} catch {
|
} catch {
|
||||||
case e: Throwable =>
|
case e: Throwable =>
|
||||||
println(s"Error on requesting ${r.getURI}")
|
println(s"Error on requesting ${r.getURI}")
|
||||||
|
@ -158,7 +155,8 @@ object SparkCreateBaselineDataFrame {
|
||||||
IOUtils.toString(
|
IOUtils.toString(
|
||||||
SparkEBILinksToOaf.getClass.getResourceAsStream(
|
SparkEBILinksToOaf.getClass.getResourceAsStream(
|
||||||
"/eu/dnetlib/dhp/sx/bio/ebi/baseline_to_oaf_params.json"
|
"/eu/dnetlib/dhp/sx/bio/ebi/baseline_to_oaf_params.json"
|
||||||
)
|
),
|
||||||
|
Charset.defaultCharset()
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
parser.parseArgument(args)
|
parser.parseArgument(args)
|
||||||
|
@ -167,15 +165,11 @@ object SparkCreateBaselineDataFrame {
|
||||||
val workingPath = parser.get("workingPath")
|
val workingPath = parser.get("workingPath")
|
||||||
log.info("workingPath: {}", workingPath)
|
log.info("workingPath: {}", workingPath)
|
||||||
|
|
||||||
val mdstoreOutputVersion = parser.get("mdstoreOutputVersion")
|
val targetPath = parser.get("targetPath")
|
||||||
log.info("mdstoreOutputVersion: {}", mdstoreOutputVersion)
|
log.info("targetPath: {}", targetPath)
|
||||||
|
|
||||||
val cleanedMdStoreVersion = MAPPER.readValue(mdstoreOutputVersion, classOf[MDStoreVersion])
|
|
||||||
val outputBasePath = cleanedMdStoreVersion.getHdfsPath
|
|
||||||
log.info("outputBasePath: {}", outputBasePath)
|
|
||||||
|
|
||||||
val hdfsServerUri = parser.get("hdfsServerUri")
|
val hdfsServerUri = parser.get("hdfsServerUri")
|
||||||
log.info("hdfsServerUri: {}", hdfsServerUri)
|
log.info("hdfsServerUri: {}", targetPath)
|
||||||
|
|
||||||
val skipUpdate = parser.get("skipUpdate")
|
val skipUpdate = parser.get("skipUpdate")
|
||||||
log.info("skipUpdate: {}", skipUpdate)
|
log.info("skipUpdate: {}", skipUpdate)
|
||||||
|
@ -201,10 +195,11 @@ object SparkCreateBaselineDataFrame {
|
||||||
if (!"true".equalsIgnoreCase(skipUpdate)) {
|
if (!"true".equalsIgnoreCase(skipUpdate)) {
|
||||||
downloadBaseLineUpdate(s"$workingPath/baseline", hdfsServerUri)
|
downloadBaseLineUpdate(s"$workingPath/baseline", hdfsServerUri)
|
||||||
val k: RDD[(String, String)] = sc.wholeTextFiles(s"$workingPath/baseline", 2000)
|
val k: RDD[(String, String)] = sc.wholeTextFiles(s"$workingPath/baseline", 2000)
|
||||||
|
val inputFactory = XMLInputFactory.newInstance
|
||||||
val ds: Dataset[PMArticle] = spark.createDataset(
|
val ds: Dataset[PMArticle] = spark.createDataset(
|
||||||
k.filter(i => i._1.endsWith(".gz"))
|
k.filter(i => i._1.endsWith(".gz"))
|
||||||
.flatMap(i => {
|
.flatMap(i => {
|
||||||
val xml = new XMLEventReader(Source.fromBytes(i._2.getBytes()))
|
val xml = inputFactory.createXMLEventReader(new ByteArrayInputStream(i._2.getBytes()))
|
||||||
new PMParser(xml)
|
new PMParser(xml)
|
||||||
})
|
})
|
||||||
)
|
)
|
||||||
|
@ -223,11 +218,8 @@ object SparkCreateBaselineDataFrame {
|
||||||
.map(a => PubMedToOaf.convert(a, vocabularies))
|
.map(a => PubMedToOaf.convert(a, vocabularies))
|
||||||
.as[Oaf]
|
.as[Oaf]
|
||||||
.filter(p => p != null),
|
.filter(p => p != null),
|
||||||
s"$outputBasePath/$MDSTORE_DATA_PATH"
|
targetPath
|
||||||
)
|
)
|
||||||
|
|
||||||
val df = spark.read.text(s"$outputBasePath/$MDSTORE_DATA_PATH")
|
|
||||||
val mdStoreSize = df.count
|
|
||||||
writeHdfsFile(spark.sparkContext.hadoopConfiguration, s"$mdStoreSize", s"$outputBasePath/$MDSTORE_SIZE_PATH")
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -9,9 +9,6 @@ import org.apache.commons.io.IOUtils
|
||||||
import org.apache.spark.SparkConf
|
import org.apache.spark.SparkConf
|
||||||
import org.apache.spark.sql._
|
import org.apache.spark.sql._
|
||||||
import org.slf4j.{Logger, LoggerFactory}
|
import org.slf4j.{Logger, LoggerFactory}
|
||||||
import eu.dnetlib.dhp.common.Constants.{MDSTORE_DATA_PATH, MDSTORE_SIZE_PATH}
|
|
||||||
import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion
|
|
||||||
import eu.dnetlib.dhp.utils.DHPUtils.{MAPPER, writeHdfsFile}
|
|
||||||
|
|
||||||
object SparkEBILinksToOaf {
|
object SparkEBILinksToOaf {
|
||||||
|
|
||||||
|
@ -35,13 +32,8 @@ object SparkEBILinksToOaf {
|
||||||
import spark.implicits._
|
import spark.implicits._
|
||||||
val sourcePath = parser.get("sourcePath")
|
val sourcePath = parser.get("sourcePath")
|
||||||
log.info(s"sourcePath -> $sourcePath")
|
log.info(s"sourcePath -> $sourcePath")
|
||||||
val mdstoreOutputVersion = parser.get("mdstoreOutputVersion")
|
val targetPath = parser.get("targetPath")
|
||||||
log.info("mdstoreOutputVersion: {}", mdstoreOutputVersion)
|
log.info(s"targetPath -> $targetPath")
|
||||||
|
|
||||||
val cleanedMdStoreVersion = MAPPER.readValue(mdstoreOutputVersion, classOf[MDStoreVersion])
|
|
||||||
val outputBasePath = cleanedMdStoreVersion.getHdfsPath
|
|
||||||
log.info("outputBasePath: {}", outputBasePath)
|
|
||||||
|
|
||||||
implicit val PMEncoder: Encoder[Oaf] = Encoders.kryo(classOf[Oaf])
|
implicit val PMEncoder: Encoder[Oaf] = Encoders.kryo(classOf[Oaf])
|
||||||
|
|
||||||
val ebLinks: Dataset[EBILinkItem] = spark.read
|
val ebLinks: Dataset[EBILinkItem] = spark.read
|
||||||
|
@ -54,10 +46,7 @@ object SparkEBILinksToOaf {
|
||||||
.flatMap(j => BioDBToOAF.parse_ebi_links(j.links))
|
.flatMap(j => BioDBToOAF.parse_ebi_links(j.links))
|
||||||
.filter(p => BioDBToOAF.EBITargetLinksFilter(p))
|
.filter(p => BioDBToOAF.EBITargetLinksFilter(p))
|
||||||
.flatMap(p => BioDBToOAF.convertEBILinksToOaf(p)),
|
.flatMap(p => BioDBToOAF.convertEBILinksToOaf(p)),
|
||||||
s"$outputBasePath/$MDSTORE_DATA_PATH"
|
targetPath
|
||||||
)
|
)
|
||||||
val df = spark.read.text(s"$outputBasePath/$MDSTORE_DATA_PATH")
|
|
||||||
val mdStoreSize = df.count
|
|
||||||
writeHdfsFile(spark.sparkContext.hadoopConfiguration, s"$mdStoreSize", s"$outputBasePath/$MDSTORE_SIZE_PATH")
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,7 +1,8 @@
|
||||||
package eu.dnetlib.dhp.sx.bio.pubmed
|
package eu.dnetlib.dhp.sx.bio.pubmed
|
||||||
|
|
||||||
import scala.xml.MetaData
|
import scala.xml.MetaData
|
||||||
import scala.xml.pull.{EvElemEnd, EvElemStart, EvText, XMLEventReader}
|
import javax.xml.stream.XMLEventReader
|
||||||
|
import scala.xml.pull.{EvElemEnd, EvElemStart, EvText}
|
||||||
|
|
||||||
/** @param xml
|
/** @param xml
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -88,7 +88,6 @@ public class PrepareAffiliationRelationsTest {
|
||||||
"-pubmedInputPath", crossrefAffiliationRelationPath,
|
"-pubmedInputPath", crossrefAffiliationRelationPath,
|
||||||
"-openapcInputPath", crossrefAffiliationRelationPath,
|
"-openapcInputPath", crossrefAffiliationRelationPath,
|
||||||
"-dataciteInputPath", crossrefAffiliationRelationPath,
|
"-dataciteInputPath", crossrefAffiliationRelationPath,
|
||||||
"-webCrawlInputPath", crossrefAffiliationRelationPath,
|
|
||||||
"-outputPath", outputPath
|
"-outputPath", outputPath
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -105,7 +104,7 @@ public class PrepareAffiliationRelationsTest {
|
||||||
// );
|
// );
|
||||||
// }
|
// }
|
||||||
// count the number of relations
|
// count the number of relations
|
||||||
assertEquals(120, tmp.count());
|
assertEquals(80, tmp.count());
|
||||||
|
|
||||||
Dataset<Relation> dataset = spark.createDataset(tmp.rdd(), Encoders.bean(Relation.class));
|
Dataset<Relation> dataset = spark.createDataset(tmp.rdd(), Encoders.bean(Relation.class));
|
||||||
dataset.createOrReplaceTempView("result");
|
dataset.createOrReplaceTempView("result");
|
||||||
|
@ -116,7 +115,7 @@ public class PrepareAffiliationRelationsTest {
|
||||||
// verify that we have equal number of bi-directional relations
|
// verify that we have equal number of bi-directional relations
|
||||||
Assertions
|
Assertions
|
||||||
.assertEquals(
|
.assertEquals(
|
||||||
60, execVerification
|
40, execVerification
|
||||||
.filter(
|
.filter(
|
||||||
"relClass='" + ModelConstants.HAS_AUTHOR_INSTITUTION + "'")
|
"relClass='" + ModelConstants.HAS_AUTHOR_INSTITUTION + "'")
|
||||||
.collectAsList()
|
.collectAsList()
|
||||||
|
@ -124,7 +123,7 @@ public class PrepareAffiliationRelationsTest {
|
||||||
|
|
||||||
Assertions
|
Assertions
|
||||||
.assertEquals(
|
.assertEquals(
|
||||||
60, execVerification
|
40, execVerification
|
||||||
.filter(
|
.filter(
|
||||||
"relClass='" + ModelConstants.IS_AUTHOR_INSTITUTION_OF + "'")
|
"relClass='" + ModelConstants.IS_AUTHOR_INSTITUTION_OF + "'")
|
||||||
.collectAsList()
|
.collectAsList()
|
||||||
|
|
|
@ -15,10 +15,7 @@ import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.JavaRDD;
|
import org.apache.spark.api.java.JavaRDD;
|
||||||
import org.apache.spark.api.java.JavaSparkContext;
|
import org.apache.spark.api.java.JavaSparkContext;
|
||||||
import org.apache.spark.sql.SparkSession;
|
import org.apache.spark.sql.SparkSession;
|
||||||
import org.junit.jupiter.api.AfterAll;
|
import org.junit.jupiter.api.*;
|
||||||
import org.junit.jupiter.api.Assertions;
|
|
||||||
import org.junit.jupiter.api.BeforeAll;
|
|
||||||
import org.junit.jupiter.api.Test;
|
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
|
|
@ -119,7 +119,9 @@ public class ReadCOCITest {
|
||||||
workingDir.toString() + "/COCI",
|
workingDir.toString() + "/COCI",
|
||||||
"-outputPath",
|
"-outputPath",
|
||||||
workingDir.toString() + "/COCI_json/",
|
workingDir.toString() + "/COCI_json/",
|
||||||
"-inputFile", "input1;input2;input3;input4;input5"
|
"-inputFile", "input1;input2;input3;input4;input5",
|
||||||
|
"-format",
|
||||||
|
"COCI"
|
||||||
});
|
});
|
||||||
|
|
||||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
|
@ -0,0 +1,64 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.collection.plugin.file;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Objects;
|
||||||
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.junit.jupiter.api.*;
|
||||||
|
import org.junit.jupiter.api.extension.ExtendWith;
|
||||||
|
import org.mockito.junit.jupiter.MockitoExtension;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.collection.ApiDescriptor;
|
||||||
|
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
|
||||||
|
import eu.dnetlib.dhp.common.collection.CollectorException;
|
||||||
|
|
||||||
|
@TestMethodOrder(MethodOrderer.OrderAnnotation.class)
|
||||||
|
@ExtendWith(MockitoExtension.class)
|
||||||
|
public class FileGZipMultipleNodeTest {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(FileGZipCollectorPluginTest.class);
|
||||||
|
|
||||||
|
private final ApiDescriptor api = new ApiDescriptor();
|
||||||
|
|
||||||
|
private FileGZipCollectorPlugin plugin;
|
||||||
|
|
||||||
|
private static final String SPLIT_ON_ELEMENT = "incollection,article";
|
||||||
|
|
||||||
|
@BeforeEach
|
||||||
|
public void setUp() throws IOException {
|
||||||
|
|
||||||
|
final String gzipFile = Objects
|
||||||
|
.requireNonNull(
|
||||||
|
this
|
||||||
|
.getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/collection/plugin/file/dblp.gz"))
|
||||||
|
.getFile();
|
||||||
|
|
||||||
|
api.setBaseUrl(gzipFile);
|
||||||
|
|
||||||
|
HashMap<String, String> params = new HashMap<>();
|
||||||
|
params.put("splitOnElement", SPLIT_ON_ELEMENT);
|
||||||
|
|
||||||
|
api.setParams(params);
|
||||||
|
|
||||||
|
FileSystem fs = FileSystem.get(new Configuration());
|
||||||
|
plugin = new FileGZipCollectorPlugin(fs);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void test() throws CollectorException {
|
||||||
|
|
||||||
|
final Stream<String> stream = plugin.collect(api, new AggregatorReport());
|
||||||
|
|
||||||
|
stream.limit(10).forEach(s -> {
|
||||||
|
Assertions.assertTrue(s.length() > 0);
|
||||||
|
log.info(s);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
|
@ -36,11 +36,11 @@ public class OsfPreprintCollectorTest {
|
||||||
private final String resultTotalXpath = "/*/*[local-name()='links']/*[local-name()='meta']/*[local-name()='total']";
|
private final String resultTotalXpath = "/*/*[local-name()='links']/*[local-name()='meta']/*[local-name()='total']";
|
||||||
|
|
||||||
private final String resumptionParam = "page";
|
private final String resumptionParam = "page";
|
||||||
private final String resumptionType = "page";
|
private final String resumptionType = "scan";
|
||||||
private final String resumptionXpath = "/*/*[local-name()='links']/*[local-name()='next']";
|
private final String resumptionXpath = "substring-before(substring-after(/*/*[local-name()='links']/*[local-name()='next'], 'page='), '&')";
|
||||||
|
|
||||||
private final String resultSizeParam = "";
|
private final String resultSizeParam = "page[size]";
|
||||||
private final String resultSizeValue = "";
|
private final String resultSizeValue = "100";
|
||||||
|
|
||||||
private final String resultFormatParam = "format";
|
private final String resultFormatParam = "format";
|
||||||
private final String resultFormatValue = "json";
|
private final String resultFormatValue = "json";
|
||||||
|
@ -74,7 +74,7 @@ public class OsfPreprintCollectorTest {
|
||||||
final AtomicInteger i = new AtomicInteger(0);
|
final AtomicInteger i = new AtomicInteger(0);
|
||||||
final Stream<String> stream = this.rcp.collect(this.api, new AggregatorReport());
|
final Stream<String> stream = this.rcp.collect(this.api, new AggregatorReport());
|
||||||
|
|
||||||
stream.limit(200).forEach(s -> {
|
stream.limit(2000).forEach(s -> {
|
||||||
Assertions.assertTrue(s.length() > 0);
|
Assertions.assertTrue(s.length() > 0);
|
||||||
i.incrementAndGet();
|
i.incrementAndGet();
|
||||||
log.info(s);
|
log.info(s);
|
||||||
|
|
|
@ -4,6 +4,11 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.collection.plugin.rest;
|
package eu.dnetlib.dhp.collection.plugin.rest;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.net.HttpURLConnection;
|
||||||
|
import java.net.MalformedURLException;
|
||||||
|
import java.net.URL;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.concurrent.atomic.AtomicInteger;
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
import java.util.stream.Stream;
|
import java.util.stream.Stream;
|
||||||
|
@ -12,6 +17,8 @@ import org.junit.jupiter.api.*;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.google.gson.Gson;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.collection.ApiDescriptor;
|
import eu.dnetlib.dhp.collection.ApiDescriptor;
|
||||||
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
|
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
|
||||||
import eu.dnetlib.dhp.common.collection.CollectorException;
|
import eu.dnetlib.dhp.common.collection.CollectorException;
|
||||||
|
@ -25,18 +32,18 @@ class RestCollectorPluginTest {
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(RestCollectorPluginTest.class);
|
private static final Logger log = LoggerFactory.getLogger(RestCollectorPluginTest.class);
|
||||||
|
|
||||||
private final String baseUrl = "https://share.osf.io/api/v2/search/creativeworks/_search";
|
private final String baseUrl = "https://ddh-openapi.worldbank.org/search";
|
||||||
private final String resumptionType = "count";
|
private final String resumptionType = "discover";
|
||||||
private final String resumptionParam = "from";
|
private final String resumptionParam = "skip";
|
||||||
private final String entityXpath = "//hits/hits";
|
private final String entityXpath = "//*[local-name()='data']";
|
||||||
private final String resumptionXpath = "//hits";
|
private final String resumptionXpath = "";
|
||||||
private final String resultTotalXpath = "//hits/total";
|
private final String resultTotalXpath = "//*[local-name()='count']";
|
||||||
private final String resultFormatParam = "format";
|
private final String resultFormatParam = "";
|
||||||
private final String resultFormatValue = "json";
|
private final String resultFormatValue = "json";
|
||||||
private final String resultSizeParam = "size";
|
private final String resultSizeParam = "top";
|
||||||
private final String resultSizeValue = "10";
|
private final String resultSizeValue = "10";
|
||||||
// private String query = "q=%28sources%3ASocArXiv+AND+type%3Apreprint%29";
|
// private String query = "q=%28sources%3ASocArXiv+AND+type%3Apreprint%29";
|
||||||
private final String query = "q=%28sources%3AengrXiv+AND+type%3Apreprint%29";
|
private final String query = "";
|
||||||
// private String query = "=(sources:engrXiv AND type:preprint)";
|
// private String query = "=(sources:engrXiv AND type:preprint)";
|
||||||
|
|
||||||
private final String protocolDescriptor = "rest_json2xml";
|
private final String protocolDescriptor = "rest_json2xml";
|
||||||
|
@ -56,6 +63,7 @@ class RestCollectorPluginTest {
|
||||||
params.put("resultSizeValue", resultSizeValue);
|
params.put("resultSizeValue", resultSizeValue);
|
||||||
params.put("queryParams", query);
|
params.put("queryParams", query);
|
||||||
params.put("entityXpath", entityXpath);
|
params.put("entityXpath", entityXpath);
|
||||||
|
params.put("requestHeaderMap", "{\"User-Agent\": \"OpenAIRE DEV\"}");
|
||||||
|
|
||||||
api.setBaseUrl(baseUrl);
|
api.setBaseUrl(baseUrl);
|
||||||
api.setParams(params);
|
api.setParams(params);
|
||||||
|
@ -78,4 +86,19 @@ class RestCollectorPluginTest {
|
||||||
log.info("{}", i.intValue());
|
log.info("{}", i.intValue());
|
||||||
Assertions.assertTrue(i.intValue() > 0);
|
Assertions.assertTrue(i.intValue() > 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Disabled
|
||||||
|
@Test
|
||||||
|
void testUrl() throws IOException {
|
||||||
|
String url_s = "https://ddh-openapi.worldbank.org/search?&top=10";
|
||||||
|
URL url = new URL(url_s);
|
||||||
|
final HttpURLConnection conn = (HttpURLConnection) url.openConnection();
|
||||||
|
conn.setRequestMethod("GET");
|
||||||
|
conn.setRequestProperty("User-Agent", "OpenAIRE");
|
||||||
|
Gson gson = new Gson();
|
||||||
|
System.out.println("Request header");
|
||||||
|
System.out.println(gson.toJson(conn.getHeaderFields()));
|
||||||
|
InputStream inputStream = conn.getInputStream();
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -44,7 +44,7 @@ public class RestIteratorTest {
|
||||||
|
|
||||||
final RestIterator iterator = new RestIterator(clientParams, baseUrl, resumptionType, resumptionParam,
|
final RestIterator iterator = new RestIterator(clientParams, baseUrl, resumptionType, resumptionParam,
|
||||||
resumptionXpath, resultTotalXpath, resultFormatParam, resultFormatValue, resultSizeParam, resultSizeValue,
|
resumptionXpath, resultTotalXpath, resultFormatParam, resultFormatValue, resultSizeParam, resultSizeValue,
|
||||||
query, entityXpath, authMethod, authToken, resultOffsetParam);
|
query, entityXpath, authMethod, authToken, resultOffsetParam, null);
|
||||||
int i = 20;
|
int i = 20;
|
||||||
while (iterator.hasNext() && i > 0) {
|
while (iterator.hasNext() && i > 0) {
|
||||||
String result = iterator.next();
|
String result = iterator.next();
|
||||||
|
|
|
@ -1,68 +0,0 @@
|
||||||
package eu.dnetlib.dhp.transformation.xslt;
|
|
||||||
|
|
||||||
import org.json.JSONObject;
|
|
||||||
import org.junit.jupiter.api.AfterEach;
|
|
||||||
import org.junit.jupiter.api.BeforeEach;
|
|
||||||
import org.junit.jupiter.api.Test;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.net.MalformedURLException;
|
|
||||||
import java.net.URI;
|
|
||||||
import java.net.URISyntaxException;
|
|
||||||
import java.net.URL;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import static org.junit.jupiter.api.Assertions.*;
|
|
||||||
|
|
||||||
class DataFetcherTest {
|
|
||||||
|
|
||||||
@BeforeEach
|
|
||||||
void setUp() {
|
|
||||||
}
|
|
||||||
|
|
||||||
@AfterEach
|
|
||||||
void tearDown() {
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void getJson() throws IOException, URISyntaxException {
|
|
||||||
URL contributorsUrl = new URI("https://api.osf.io/v2/preprints/mrwqb/contributors/?format=json").toURL();
|
|
||||||
JSONObject testJsonObj = DataFetcher.getJson(contributorsUrl);
|
|
||||||
|
|
||||||
String x = testJsonObj
|
|
||||||
.getJSONArray("data")
|
|
||||||
.getJSONObject(0)
|
|
||||||
.getJSONObject("embeds")
|
|
||||||
.getJSONObject("users")
|
|
||||||
.getJSONObject("data")
|
|
||||||
.getJSONObject("attributes")
|
|
||||||
.getString("full_name");
|
|
||||||
System.out.println(x);
|
|
||||||
System.out.println(testJsonObj.getJSONArray("data").length());
|
|
||||||
testJsonObj.getJSONArray("data").forEach(System.out::println);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void getAuthorsFromJson() throws IOException, URISyntaxException {
|
|
||||||
URL contributorsUrl = new URI("https://api.osf.io/v2/preprints/mrwqb/contributors/?format=json").toURL();
|
|
||||||
JSONObject testJsonObj = DataFetcher.getJson(contributorsUrl);
|
|
||||||
List<String> authors = DataFetcher.getAuthorsFromJson(testJsonObj);
|
|
||||||
System.out.println(authors);
|
|
||||||
System.out.println(DataFetcher.transformListToDublinCore(authors));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void getAndTransformAuthors() throws IOException, URISyntaxException {
|
|
||||||
URL contributorsUrl = new URI("https://api.osf.io/v2/preprints/mrwqb/contributors/?format=json").toURL();
|
|
||||||
System.out.println(DataFetcher.getAndTransformAuthors(contributorsUrl));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void getLinkToFulltextFromJson() throws URISyntaxException, IOException {
|
|
||||||
URL linkToFullTextDocument = new URI("https://api.osf.io/v2/files/5de7c96f84c479000c7928af/?format=json").toURL();
|
|
||||||
System.out.println(DataFetcher.getFullTextLinkAndTransform(linkToFullTextDocument));
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -4,6 +4,4 @@
|
||||||
{"DOI":"10.1142\/s021821650200186x","Matchings":[{"RORid":"https:\/\/ror.org\/035xkbk20","Confidence":1},{"RORid":"https:\/\/ror.org\/05apxxy63","Confidence":1}]}
|
{"DOI":"10.1142\/s021821650200186x","Matchings":[{"RORid":"https:\/\/ror.org\/035xkbk20","Confidence":1},{"RORid":"https:\/\/ror.org\/05apxxy63","Confidence":1}]}
|
||||||
{"DOI":"10.1061\/(asce)0733-9372(2002)128:7(575)","Matchings":[{"RORid":"https:\/\/ror.org\/04j198w64","Confidence":0.82}]}
|
{"DOI":"10.1061\/(asce)0733-9372(2002)128:7(575)","Matchings":[{"RORid":"https:\/\/ror.org\/04j198w64","Confidence":0.82}]}
|
||||||
{"DOI":"10.1061\/(asce)0733-9372(2002)128:7(588)","Matchings":[{"RORid":"https:\/\/ror.org\/03m8km719","Confidence":0.8660254038},{"RORid":"https:\/\/ror.org\/02aze4h65","Confidence":0.87}]}
|
{"DOI":"10.1061\/(asce)0733-9372(2002)128:7(588)","Matchings":[{"RORid":"https:\/\/ror.org\/03m8km719","Confidence":0.8660254038},{"RORid":"https:\/\/ror.org\/02aze4h65","Confidence":0.87}]}
|
||||||
{"DOI":"10.1161\/hy0202.103001","Matchings":[{"RORid":"https:\/\/ror.org\/057xtrt18","Confidence":0.7071067812}]}
|
{"DOI":"10.1161\/hy0202.103001","Matchings":[{"RORid":"https:\/\/ror.org\/057xtrt18","Confidence":0.7071067812}]}
|
||||||
{"DOI": "10.1080/13669877.2015.1042504", "Matchings": [{"Confidence": 1.0, "RORid": "https://ror.org/03265fv13"}]}
|
|
||||||
{"DOI": "10.1007/3-540-47984-8_14", "Matchings": [{"Confidence": 1.0, "RORid": "https://ror.org/00a0n9e72"}]}
|
|
|
@ -789,10 +789,6 @@
|
||||||
"value": "2227-9717",
|
"value": "2227-9717",
|
||||||
"type": "electronic"
|
"type": "electronic"
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"value": "VALUE",
|
|
||||||
"type": "PIPPO"
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"value": "1063-4584",
|
"value": "1063-4584",
|
||||||
"type": "pu"
|
"type": "pu"
|
||||||
|
|
Binary file not shown.
|
@ -1,44 +1,15 @@
|
||||||
{"classification": "Signaling protein", "pdb": "5NM4", "deposition_date": "2017-04-05", "title": "A2a adenosine receptor room-temperature structure determined by serial Femtosecond crystallography", "Keywords": ["Oom-temperature", " serial crystallography", " signaling protein"], "authors": ["T.weinert", "R.cheng", "D.james", "D.gashi", "P.nogly", "K.jaeger", "M.hennig", "", "J.standfuss"], "pmid": "28912485", "doi": "10.1038/S41467-017-00630-4"}
|
{"pdb": "1CW0", "title": "crystal structure analysis of very short patch repair (vsr) endonuclease in complex with a duplex dna", "authors": ["S.E.Tsutakawa", "H.Jingami", "K.Morikawa"], "doi": "10.1016/S0092-8674(00)81550-0", "pmid": "10612397"}
|
||||||
{"classification": "Oxidoreductase/oxidoreductase inhibitor", "pdb": "4KN3", "deposition_date": "2013-05-08", "title": "Structure of the y34ns91g double mutant of dehaloperoxidase from Amphitrite ornata with 2,4,6-trichlorophenol", "Keywords": ["Lobin", " oxygen storage", " peroxidase", " oxidoreductase", " oxidoreductase-", "Oxidoreductase inhibitor complex"], "authors": ["C.wang", "L.lovelace", "L.lebioda"], "pmid": "23952341", "doi": "10.1021/BI400627W"}
|
{"pdb": "2CWW", "title": "crystal structure of thermus thermophilus ttha1280, a putative sam- dependent rna methyltransferase, in complex with s-adenosyl-l- homocysteine", "authors": ["A.A.Pioszak", "K.Murayama", "N.Nakagawa", "A.Ebihara", "S.Kuramitsu", "M.Shirouzu", "S.Yokoyama", "Riken Structural Genomics/proteomics Initiative (Rsgi)"], "doi": "10.1107/S1744309105029842", "pmid": "16511182"}
|
||||||
{"classification": "Transport protein", "pdb": "8HKM", "deposition_date": "2022-11-27", "title": "Ion channel", "Keywords": ["On channel", " transport protein"], "authors": ["D.h.jiang", "J.t.zhang"], "pmid": "37494189", "doi": "10.1016/J.CELREP.2023.112858"}
|
{"pdb": "6CWE", "title": "structure of alpha-gsa[8,6p] bound by cd1d and in complex with the va14vb8.2 tcr", "authors": ["J.Wang", "D.Zajonc"], "doi": null, "pmid": null}
|
||||||
{"classification": "Signaling protein", "pdb": "6JT1", "deposition_date": "2019-04-08", "title": "Structure of human soluble guanylate cyclase in the heme oxidised State", "Keywords": ["Oluble guanylate cyclase", " signaling protein"], "authors": ["L.chen", "Y.kang", "R.liu", "J.-x.wu"], "pmid": "31514202", "doi": "10.1038/S41586-019-1584-6"}
|
{"pdb": "5CWS", "title": "crystal structure of the intact chaetomium thermophilum nsp1-nup49- nup57 channel nucleoporin heterotrimer bound to its nic96 nuclear pore complex attachment site", "authors": ["C.J.Bley", "S.Petrovic", "M.Paduch", "V.Lu", "A.A.Kossiakoff", "A.Hoelz"], "doi": "10.1126/SCIENCE.AAC9176", "pmid": "26316600"}
|
||||||
{"classification": "Immune system", "pdb": "7OW6", "deposition_date": "2021-06-16", "title": "Crystal structure of a tcr in complex with hla-a*11:01 bound to kras G12d peptide (vvvgadgvgk)", "Keywords": ["La", " kras", " tcr", " immune system"], "authors": ["V.karuppiah", "R.a.robinson"], "doi": "10.1038/S41467-022-32811-1"}
|
{"pdb": "5CWE", "title": "structure of cyp107l2 from streptomyces avermitilis with lauric acid", "authors": ["T.-V.Pham", "S.-H.Han", "J.-H.Kim", "D.-H.Kim", "L.-W.Kang"], "doi": null, "pmid": null}
|
||||||
{"classification": "Biosynthetic protein", "pdb": "5EQ8", "deposition_date": "2015-11-12", "title": "Crystal structure of medicago truncatula histidinol-phosphate Phosphatase (mthpp) in complex with l-histidinol", "Keywords": ["Istidine biosynthesis", " metabolic pathways", " dimer", " plant", "", "Biosynthetic protein"], "authors": ["M.ruszkowski", "Z.dauter"], "pmid": "26994138", "doi": "10.1074/JBC.M115.708727"}
|
{"pdb": "7CW4", "title": "acetyl-coa acetyltransferase from bacillus cereus atcc 14579", "authors": ["J.Hong", "K.J.Kim"], "doi": "10.1016/J.BBRC.2020.09.048", "pmid": "32972748"}
|
||||||
{"classification": "De novo protein", "pdb": "8CWA", "deposition_date": "2022-05-18", "title": "Solution nmr structure of 8-residue rosetta-designed cyclic peptide D8.21 in cdcl3 with cis/trans switching (tc conformation, 53%)", "Keywords": ["Yclic peptide", " non natural amino acids", " cis/trans", " switch peptides", "", "De novo design", "Membrane permeability", "De novo protein"], "authors": ["T.a.ramelot", "R.tejero", "G.t.montelione"], "pmid": "36041435", "doi": "10.1016/J.CELL.2022.07.019"}
|
{"pdb": "2CWP", "title": "crystal structure of metrs related protein from pyrococcus horikoshii", "authors": ["K.Murayama", "M.Kato-Murayama", "M.Shirouzu", "S.Yokoyama", "Riken StructuralGenomics/proteomics Initiative (Rsgi)"], "doi": null, "pmid": null}
|
||||||
{"classification": "Hydrolase", "pdb": "3R6M", "deposition_date": "2011-03-21", "title": "Crystal structure of vibrio parahaemolyticus yeaz", "Keywords": ["Ctin/hsp70 nucleotide-binding fold", " bacterial resuscitation", " viable", "But non-culturable state", "Resuscitation promoting factor", "Ygjd", "", "Yjee", "Vibrio parahaemolyticus", "Hydrolase"], "authors": ["A.roujeinikova", "I.aydin"], "pmid": "21858042", "doi": "10.1371/JOURNAL.PONE.0023245"}
|
{"pdb": "2CW7", "title": "crystal structure of intein homing endonuclease ii", "authors": ["H.Matsumura", "H.Takahashi", "T.Inoue", "H.Hashimoto", "M.Nishioka", "S.Fujiwara", "M.Takagi", "T.Imanaka", "Y.Kai"], "doi": "10.1002/PROT.20858", "pmid": "16493661"}
|
||||||
{"classification": "Hydrolase", "pdb": "2W5J", "deposition_date": "2008-12-10", "title": "Structure of the c14-rotor ring of the proton translocating Chloroplast atp synthase", "Keywords": ["Ydrolase", " chloroplast", " atp synthase", " lipid-binding", " cf(0)", " membrane", "", "Transport", "Formylation", "Energy transduction", "Hydrogen ion transport", "", "Ion transport", "Transmembrane", "Membrane protein"], "authors": ["M.vollmar", "D.schlieper", "M.winn", "C.buechner", "G.groth"], "pmid": "19423706", "doi": "10.1074/JBC.M109.006916"}
|
{"pdb": "1CWU", "title": "brassica napus enoyl acp reductase a138g mutant complexed with nad+ and thienodiazaborine", "authors": ["A.Roujeinikova", "J.B.Rafferty", "D.W.Rice"], "doi": "10.1074/JBC.274.43.30811", "pmid": "10521472"}
|
||||||
{"classification": "De novo protein", "pdb": "4GLU", "deposition_date": "2012-08-14", "title": "Crystal structure of the mirror image form of vegf-a", "Keywords": ["-protein", " covalent dimer", " cysteine knot protein", " growth factor", " de", "Novo protein"], "authors": ["K.mandal", "M.uppalapati", "D.ault-riche", "J.kenney", "J.lowitz", "S.sidhu", "", "S.b.h.kent"], "pmid": "22927390", "doi": "10.1073/PNAS.1210483109"}
|
{"pdb": "3CWN", "title": "escherichia coli transaldolase b mutant f178y", "authors": ["T.Sandalova", "G.Schneider", "A.Samland"], "doi": "10.1074/JBC.M803184200", "pmid": "18687684"}
|
||||||
{"classification": "Hydrolase/hydrolase inhibitor", "pdb": "3WYL", "deposition_date": "2014-09-01", "title": "Crystal structure of the catalytic domain of pde10a complexed with 5- Methoxy-3-(1-phenyl-1h-pyrazol-5-yl)-1-(3-(trifluoromethyl)phenyl) Pyridazin-4(1h)-one", "Keywords": ["Ydrolase-hydrolase inhibitor complex"], "authors": ["H.oki", "Y.hayano"], "pmid": "25384088", "doi": "10.1021/JM5013648"}
|
{"pdb": "1CWL", "title": "human cyclophilin a complexed with 4 4-hydroxy-meleu cyclosporin", "authors": ["V.Mikol", "J.Kallen", "P.Taylor", "M.D.Walkinshaw"], "doi": "10.1006/JMBI.1998.2108", "pmid": "9769216"}
|
||||||
{"classification": "Isomerase", "pdb": "5BOR", "deposition_date": "2015-05-27", "title": "Structure of acetobacter aceti pure-s57c, sulfonate form", "Keywords": ["Cidophile", " pure", " purine biosynthesis", " isomerase"], "authors": ["K.l.sullivan", "T.j.kappock"]}
|
{"pdb": "3CW2", "title": "crystal structure of the intact archaeal translation initiation factor 2 from sulfolobus solfataricus .", "authors": ["E.A.Stolboushkina", "S.V.Nikonov", "A.D.Nikulin", "U.Blaesi", "D.J.Manstein", "R.V.Fedorov", "M.B.Garber", "O.S.Nikonov"], "doi": "10.1016/J.JMB.2008.07.039", "pmid": "18675278"}
|
||||||
{"classification": "Hydrolase", "pdb": "1X0C", "deposition_date": "2005-03-17", "title": "Improved crystal structure of isopullulanase from aspergillus niger Atcc 9642", "Keywords": ["Ullulan", " glycoside hydrolase family 49", " glycoprotein", " hydrolase"], "authors": ["M.mizuno", "T.tonozuka", "A.yamamura", "Y.miyasaka", "H.akeboshi", "S.kamitori", "", "A.nishikawa", "Y.sakano"], "pmid": "18155243", "doi": "10.1016/J.JMB.2007.11.098"}
|
{"pdb": "3CW9", "title": "4-chlorobenzoyl-coa ligase/synthetase in the thioester-forming conformation, bound to 4-chlorophenacyl-coa", "authors": ["A.S.Reger", "J.Cao", "R.Wu", "D.Dunaway-Mariano", "A.M.Gulick"], "doi": "10.1021/BI800696Y", "pmid": "18620418"}
|
||||||
{"classification": "Oxidoreductase", "pdb": "7CUP", "deposition_date": "2020-08-23", "title": "Structure of 2,5-dihydroxypridine dioxygenase from pseudomonas putida Kt2440", "Keywords": ["On-heme dioxygenase", " oxidoreductase"], "authors": ["G.q.liu", "H.z.tang"]}
|
{"pdb": "3CWU", "title": "crystal structure of an alka host/guest complex 2'-fluoro-2'-deoxy-1, n6-ethenoadenine:thymine base pair", "authors": ["B.R.Bowman", "S.Lee", "S.Wang", "G.L.Verdine"], "doi": "10.1016/J.STR.2008.04.012", "pmid": "18682218"}
|
||||||
{"classification": "Ligase", "pdb": "1VCN", "deposition_date": "2004-03-10", "title": "Crystal structure of t.th. hb8 ctp synthetase complex with sulfate Anion", "Keywords": ["Etramer", " riken structural genomics/proteomics initiative", " rsgi", "", "Structural genomics", "Ligase"], "authors": ["M.goto", "Riken structural genomics/proteomics initiative (rsgi)"], "pmid": "15296735", "doi": "10.1016/J.STR.2004.05.013"}
|
{"pdb": "5CWF", "title": "crystal structure of de novo designed helical repeat protein dhr8", "authors": ["G.Bhabha", "D.C.Ekiert"], "doi": "10.1038/NATURE16162", "pmid": "26675729"}
|
||||||
{"classification": "Transferase/transferase inhibitor", "pdb": "6C9V", "deposition_date": "2018-01-28", "title": "Mycobacterium tuberculosis adenosine kinase bound to (2r,3s,4r,5r)-2- (hydroxymethyl)-5-(6-(4-phenylpiperazin-1-yl)-9h-purin-9-yl) Tetrahydrofuran-3,4-diol", "Keywords": ["Ucleoside analog", " complex", " inhibitor", " structural genomics", " psi-2", "", "Protein structure initiative", "Tb structural genomics consortium", "", "Tbsgc", "Transferase-transferase inhibitor complex"], "authors": ["R.a.crespo", "Tb structural genomics consortium (tbsgc)"], "pmid": "31002508", "doi": "10.1021/ACS.JMEDCHEM.9B00020"}
|
|
||||||
{"classification": "De novo protein", "pdb": "4LPY", "deposition_date": "2013-07-16", "title": "Crystal structure of tencon variant g10", "Keywords": ["Ibronectin type iii fold", " alternate scaffold", " de novo protein"], "authors": ["A.teplyakov", "G.obmolova", "G.l.gilliland"], "pmid": "24375666", "doi": "10.1002/PROT.24502"}
|
|
||||||
{"classification": "Isomerase", "pdb": "2Y88", "deposition_date": "2011-02-03", "title": "Crystal structure of mycobacterium tuberculosis phosphoribosyl Isomerase (variant d11n) with bound prfar", "Keywords": ["Romatic amino acid biosynthesis", " isomerase", " tim-barrel", " histidine", "Biosynthesis", "Tryptophan biosynthesis"], "authors": ["J.kuper", "A.v.due", "A.geerlof", "M.wilmanns"], "pmid": "21321225", "doi": "10.1073/PNAS.1015996108"}
|
|
||||||
{"classification": "Unknown function", "pdb": "1SR0", "deposition_date": "2004-03-22", "title": "Crystal structure of signalling protein from sheep(sps-40) at 3.0a Resolution using crystal grown in the presence of polysaccharides", "Keywords": ["Ignalling protein", " involution", " unknown function"], "authors": ["D.b.srivastava", "A.s.ethayathulla", "N.singh", "J.kumar", "S.sharma", "T.p.singh"]}
|
|
||||||
{"classification": "Dna binding protein", "pdb": "3RH2", "deposition_date": "2011-04-11", "title": "Crystal structure of a tetr-like transcriptional regulator (sama_0099) From shewanella amazonensis sb2b at 2.42 a resolution", "Keywords": ["Na/rna-binding 3-helical bundle", " structural genomics", " joint center", "For structural genomics", "Jcsg", "Protein structure initiative", "Psi-", "Biology", "Dna binding protein"], "authors": ["Joint center for structural genomics (jcsg)"]}
|
|
||||||
{"classification": "Transferase", "pdb": "2WK5", "deposition_date": "2009-06-05", "title": "Structural features of native human thymidine phosphorylase And in complex with 5-iodouracil", "Keywords": ["Lycosyltransferase", " developmental protein", " angiogenesis", "", "5-iodouracil", "Growth factor", "Enzyme kinetics", "", "Differentiation", "Disease mutation", "Thymidine", "Phosphorylase", "Chemotaxis", "Transferase", "Mutagenesis", "", "Polymorphism"], "authors": ["E.mitsiki", "A.c.papageorgiou", "S.iyer", "N.thiyagarajan", "S.h.prior", "", "D.sleep", "C.finnis", "K.r.acharya"], "pmid": "19555658", "doi": "10.1016/J.BBRC.2009.06.104"}
|
|
||||||
{"classification": "Hydrolase", "pdb": "3P9Y", "deposition_date": "2010-10-18", "title": "Crystal structure of the drosophila melanogaster ssu72-pctd complex", "Keywords": ["Hosphatase", " cis proline", " lmw ptp-like fold", " rna polymerase ii ctd", "", "Hydrolase"], "authors": ["J.w.werner-allen", "P.zhou"], "pmid": "21159777", "doi": "10.1074/JBC.M110.197129"}
|
|
||||||
{"classification": "Recombination/dna", "pdb": "6OEO", "deposition_date": "2019-03-27", "title": "Cryo-em structure of mouse rag1/2 nfc complex (dna1)", "Keywords": ["(d)j recombination", " dna transposition", " rag", " scid", " recombination", "", "Recombination-dna complex"], "authors": ["X.chen", "Y.cui", "Z.h.zhou", "W.yang", "M.gellert"], "pmid": "32015552", "doi": "10.1038/S41594-019-0363-2"}
|
|
||||||
{"classification": "Hydrolase", "pdb": "4ECA", "deposition_date": "1997-02-21", "title": "Asparaginase from e. coli, mutant t89v with covalently bound aspartate", "Keywords": ["Ydrolase", " acyl-enzyme intermediate", " threonine amidohydrolase"], "authors": ["G.j.palm", "J.lubkowski", "A.wlodawer"], "pmid": "8706862", "doi": "10.1016/0014-5793(96)00660-6"}
|
|
||||||
{"classification": "Transcription/protein binding", "pdb": "3UVX", "deposition_date": "2011-11-30", "title": "Crystal structure of the first bromodomain of human brd4 in complex With a diacetylated histone 4 peptide (h4k12ack16ac)", "Keywords": ["Romodomain", " bromodomain containing protein 4", " cap", " hunk1", " mcap", "", "Mitotic chromosome associated protein", "Peptide complex", "Structural", "Genomics consortium", "Sgc", "Transcription-protein binding complex"], "authors": ["P.filippakopoulos", "S.picaud", "T.keates", "E.ugochukwu", "F.von delft", "", "C.h.arrowsmith", "A.m.edwards", "J.weigelt", "C.bountra", "S.knapp", "Structural", "Genomics consortium (sgc)"], "pmid": "22464331", "doi": "10.1016/J.CELL.2012.02.013"}
|
|
||||||
{"classification": "Membrane protein", "pdb": "1TLZ", "deposition_date": "2004-06-10", "title": "Tsx structure complexed with uridine", "Keywords": ["Ucleoside transporter", " beta barrel", " uridine", " membrane", "Protein"], "authors": ["J.ye", "B.van den berg"], "pmid": "15272310", "doi": "10.1038/SJ.EMBOJ.7600330"}
|
|
||||||
{"classification": "Dna binding protein", "pdb": "7AZD", "deposition_date": "2020-11-16", "title": "Dna polymerase sliding clamp from escherichia coli with peptide 20 Bound", "Keywords": ["Ntibacterial drug", " dna binding protein"], "authors": ["C.monsarrat", "G.compain", "C.andre", "I.martiel", "S.engilberge", "V.olieric", "", "P.wolff", "K.brillet", "M.landolfo", "C.silva da veiga", "J.wagner", "G.guichard", "", "D.y.burnouf"], "pmid": "34806883", "doi": "10.1021/ACS.JMEDCHEM.1C00918"}
|
|
||||||
{"classification": "Transferase", "pdb": "5N3K", "deposition_date": "2017-02-08", "title": "Camp-dependent protein kinase a from cricetulus griseus in complex With fragment like molecule o-guanidino-l-homoserine", "Keywords": ["Ragment", " complex", " transferase", " serine threonine kinase", " camp", "", "Kinase", "Pka"], "authors": ["C.siefker", "A.heine", "G.klebe"]}
|
|
||||||
{"classification": "Biosynthetic protein", "pdb": "8H52", "deposition_date": "2022-10-11", "title": "Crystal structure of helicobacter pylori carboxyspermidine Dehydrogenase in complex with nadp", "Keywords": ["Arboxyspermidine dehydrogenase", " biosynthetic protein"], "authors": ["K.y.ko", "S.c.park", "S.y.cho", "S.i.yoon"], "pmid": "36283333", "doi": "10.1016/J.BBRC.2022.10.049"}
|
|
||||||
{"classification": "Metal binding protein", "pdb": "6DYC", "deposition_date": "2018-07-01", "title": "Co(ii)-bound structure of the engineered cyt cb562 variant, ch3", "Keywords": ["Esigned protein", " 4-helix bundle", " electron transport", " metal binding", "Protein"], "authors": ["F.a.tezcan", "J.rittle"], "pmid": "30778140", "doi": "10.1038/S41557-019-0218-9"}
|
|
||||||
{"classification": "Protein fibril", "pdb": "6A6B", "deposition_date": "2018-06-27", "title": "Cryo-em structure of alpha-synuclein fiber", "Keywords": ["Lpha-syn fiber", " parkinson disease", " protein fibril"], "authors": ["Y.w.li", "C.y.zhao", "F.luo", "Z.liu", "X.gui", "Z.luo", "X.zhang", "D.li", "C.liu", "X.li"], "pmid": "30065316", "doi": "10.1038/S41422-018-0075-X"}
|
|
||||||
{"classification": "Dna", "pdb": "7D5E", "deposition_date": "2020-09-25", "title": "Left-handed g-quadruplex containing two bulges", "Keywords": ["-quadruplex", " bulge", " dna", " left-handed"], "authors": ["P.das", "A.maity", "K.h.ngo", "F.r.winnerdy", "B.bakalar", "Y.mechulam", "E.schmitt", "", "A.t.phan"], "pmid": "33503265", "doi": "10.1093/NAR/GKAA1259"}
|
|
||||||
{"classification": "Transferase", "pdb": "3RSY", "deposition_date": "2011-05-02", "title": "Cellobiose phosphorylase from cellulomonas uda in complex with sulfate And glycerol", "Keywords": ["H94", " alpha barrel", " cellobiose phosphorylase", " disaccharide", "Phosphorylase", "Transferase"], "authors": ["A.van hoorebeke", "J.stout", "W.soetaert", "J.van beeumen", "T.desmet", "S.savvides"]}
|
|
||||||
{"classification": "Oxidoreductase", "pdb": "7MCI", "deposition_date": "2021-04-02", "title": "Mofe protein from azotobacter vinelandii with a sulfur-replenished Cofactor", "Keywords": ["Zotobacter vinelandii", " mofe-protein", " nitrogenase", " oxidoreductase"], "authors": ["W.kang", "C.lee", "Y.hu", "M.w.ribbe"], "doi": "10.1038/S41929-022-00782-7"}
|
|
||||||
{"classification": "Dna", "pdb": "1XUW", "deposition_date": "2004-10-26", "title": "Structural rationalization of a large difference in rna affinity Despite a small difference in chemistry between two 2'-o-modified Nucleic acid analogs", "Keywords": ["Na mimetic methylcarbamate amide analog", " dna"], "authors": ["R.pattanayek", "L.sethaphong", "C.pan", "M.prhavc", "T.p.prakash", "M.manoharan", "", "M.egli"], "pmid": "15547979", "doi": "10.1021/JA044637K"}
|
|
||||||
{"classification": "Lyase", "pdb": "7C0D", "deposition_date": "2020-05-01", "title": "Crystal structure of azospirillum brasilense l-2-keto-3-deoxyarabonate Dehydratase (hydroxypyruvate-bound form)", "Keywords": ["-2-keto-3-deoxyarabonate dehydratase", " lyase"], "authors": ["Y.watanabe", "S.watanabe"], "pmid": "32697085", "doi": "10.1021/ACS.BIOCHEM.0C00515"}
|
|
||||||
{"classification": "Signaling protein", "pdb": "5LYK", "deposition_date": "2016-09-28", "title": "Crystal structure of intracellular b30.2 domain of btn3a1 bound to Citrate", "Keywords": ["30.2", " butyrophilin", " signaling protein"], "authors": ["F.mohammed", "A.t.baker", "M.salim", "B.e.willcox"], "pmid": "28862425", "doi": "10.1021/ACSCHEMBIO.7B00694"}
|
|
||||||
{"classification": "Toxin", "pdb": "4IZL", "deposition_date": "2013-01-30", "title": "Structure of the n248a mutant of the panton-valentine leucocidin s Component from staphylococcus aureus", "Keywords": ["I-component leucotoxin", " staphylococcus aureus", " s component", "Leucocidin", "Beta-barrel pore forming toxin", "Toxin"], "authors": ["L.maveyraud", "B.j.laventie", "G.prevost", "L.mourey"], "pmid": "24643034", "doi": "10.1371/JOURNAL.PONE.0092094"}
|
|
||||||
{"classification": "Dna", "pdb": "6F3C", "deposition_date": "2017-11-28", "title": "The cytotoxic [pt(h2bapbpy)] platinum complex interacting with the Cgtacg hexamer", "Keywords": ["Rug-dna complex", " four-way junction", " dna"], "authors": ["M.ferraroni", "C.bazzicalupi", "P.gratteri", "F.papi"], "pmid": "31046177", "doi": "10.1002/ANIE.201814532"}
|
|
||||||
{"classification": "Signaling protein/inhibitor", "pdb": "4L5M", "deposition_date": "2013-06-11", "title": "Complexe of arno sec7 domain with the protein-protein interaction Inhibitor n-(4-hydroxy-2,6-dimethylphenyl)benzenesulfonamide at ph6.5", "Keywords": ["Ec-7domain", " signaling protein-inhibitor complex"], "authors": ["F.hoh", "J.rouhana"], "pmid": "24112024", "doi": "10.1021/JM4009357"}
|
|
||||||
{"classification": "Signaling protein", "pdb": "5I6J", "deposition_date": "2016-02-16", "title": "Crystal structure of srgap2 f-barx", "Keywords": ["Rgap2", " f-bar", " fx", " signaling protein"], "authors": ["M.sporny", "J.guez-haddad", "M.n.isupov", "Y.opatowsky"], "pmid": "28333212", "doi": "10.1093/MOLBEV/MSX094"}
|
|
||||||
{"classification": "Metal binding protein", "pdb": "1Q80", "deposition_date": "2003-08-20", "title": "Solution structure and dynamics of nereis sarcoplasmic calcium binding Protein", "Keywords": ["Ll-alpha", " metal binding protein"], "authors": ["G.rabah", "R.popescu", "J.a.cox", "Y.engelborghs", "C.t.craescu"], "pmid": "15819893", "doi": "10.1111/J.1742-4658.2005.04629.X"}
|
|
||||||
{"classification": "Transferase", "pdb": "1TW1", "deposition_date": "2004-06-30", "title": "Beta-1,4-galactosyltransferase mutant met344his (m344h-gal-t1) complex With udp-galactose and magnesium", "Keywords": ["Et344his mutation; closed conformation; mn binding", " transferase"], "authors": ["B.ramakrishnan", "E.boeggeman", "P.k.qasba"], "pmid": "15449940", "doi": "10.1021/BI049007+"}
|
|
||||||
{"classification": "Rna", "pdb": "2PN4", "deposition_date": "2007-04-23", "title": "Crystal structure of hepatitis c virus ires subdomain iia", "Keywords": ["Cv", " ires", " subdoamin iia", " rna", " strontium", " hepatitis"], "authors": ["Q.zhao", "Q.han", "C.r.kissinger", "P.a.thompson"], "pmid": "18391410", "doi": "10.1107/S0907444908002011"}
|
|
|
@ -1,36 +1,6 @@
|
||||||
{"pid": " Q6GZX4", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 43"}], "title": "Putative transcription factor 001R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
|
{"pid": "Q6GZX4", "dates": [{"date": "28-JUN-2011", "date_info": " integrated into UniProtKB/Swiss-Prot."}, {"date": "19-JUL-2004", "date_info": " sequence version 1."}, {"date": "12-AUG-2020", "date_info": " entry version 41."}], "title": "Putative transcription factor 001R;", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3).", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus."], "references": [{"PubMed": "15165820"}, {" DOI": "10.1016/j.virol.2004.02.019"}]}
|
||||||
{"pid": " Q6GZX3", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 45"}], "title": "Uncharacterized protein 002L", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
|
{"pid": "Q6GZX3", "dates": [{"date": "28-JUN-2011", "date_info": " integrated into UniProtKB/Swiss-Prot."}, {"date": "19-JUL-2004", "date_info": " sequence version 1."}, {"date": "12-AUG-2020", "date_info": " entry version 42."}], "title": "Uncharacterized protein 002L;", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3).", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus."], "references": [{"PubMed": "15165820"}, {" DOI": "10.1016/j.virol.2004.02.019"}]}
|
||||||
{"pid": " Q197F8", "dates": [{"date": "2009-06-16", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2006-07-11", "date_info": "sequence version 1"}, {"date": "2022-02-23", "date_info": "entry version 29"}], "title": "Uncharacterized protein 002R", "organism_species": "Invertebrate iridescent virus 3 (IIV-3) (Mosquito iridescent virus)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Betairidovirinae", "Chloriridovirus"], "references": [{"PubMed": "16912294"}, {"DOI": "10.1128/jvi.00464-06"}]}
|
{"pid": "Q197F8", "dates": [{"date": "16-JUN-2009", "date_info": " integrated into UniProtKB/Swiss-Prot."}, {"date": "11-JUL-2006", "date_info": " sequence version 1."}, {"date": "12-AUG-2020", "date_info": " entry version 27."}], "title": "Uncharacterized protein 002R;", "organism_species": "Invertebrate iridescent virus 3 (IIV-3) (Mosquito iridescent virus).", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Betairidovirinae", "Chloriridovirus."], "references": [{"PubMed": "16912294"}, {" DOI": "10.1128/jvi.00464-06"}]}
|
||||||
{"pid": " Q197F7", "dates": [{"date": "2009-06-16", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2006-07-11", "date_info": "sequence version 1"}, {"date": "2020-08-12", "date_info": "entry version 23"}], "title": "Uncharacterized protein 003L", "organism_species": "Invertebrate iridescent virus 3 (IIV-3) (Mosquito iridescent virus)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Betairidovirinae", "Chloriridovirus"], "references": [{"PubMed": "16912294"}, {"DOI": "10.1128/jvi.00464-06"}]}
|
{"pid": "Q197F7", "dates": [{"date": "16-JUN-2009", "date_info": " integrated into UniProtKB/Swiss-Prot."}, {"date": "11-JUL-2006", "date_info": " sequence version 1."}, {"date": "12-AUG-2020", "date_info": " entry version 23."}], "title": "Uncharacterized protein 003L;", "organism_species": "Invertebrate iridescent virus 3 (IIV-3) (Mosquito iridescent virus).", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Betairidovirinae", "Chloriridovirus."], "references": [{"PubMed": "16912294"}, {" DOI": "10.1128/jvi.00464-06"}]}
|
||||||
{"pid": " Q6GZX2", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 37"}], "title": "Uncharacterized protein 3R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
|
{"pid": "Q6GZX2", "dates": [{"date": "28-JUN-2011", "date_info": " integrated into UniProtKB/Swiss-Prot."}, {"date": "19-JUL-2004", "date_info": " sequence version 1."}, {"date": "12-AUG-2020", "date_info": " entry version 36."}], "title": "Uncharacterized protein 3R;", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3).", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus."], "references": [{"PubMed": "15165820"}, {" DOI": "10.1016/j.virol.2004.02.019"}]}
|
||||||
{"pid": " Q6GZX1", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 38"}], "title": "Uncharacterized protein 004R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
|
{"pid": "Q6GZX1", "dates": [{"date": "28-JUN-2011", "date_info": " integrated into UniProtKB/Swiss-Prot."}, {"date": "19-JUL-2004", "date_info": " sequence version 1."}, {"date": "12-AUG-2020", "date_info": " entry version 34."}], "title": "Uncharacterized protein 004R;", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3).", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus."], "references": [{"PubMed": "15165820"}, {" DOI": "10.1016/j.virol.2004.02.019"}]}
|
||||||
{"pid": " Q197F5", "dates": [{"date": "2009-06-16", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2006-07-11", "date_info": "sequence version 1"}, {"date": "2022-10-12", "date_info": "entry version 32"}], "title": "Uncharacterized protein 005L", "organism_species": "Invertebrate iridescent virus 3 (IIV-3) (Mosquito iridescent virus)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Betairidovirinae", "Chloriridovirus"], "references": [{"PubMed": "16912294"}, {"DOI": "10.1128/jvi.00464-06"}]}
|
|
||||||
{"pid": " Q6GZX0", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 47"}], "title": "Uncharacterized protein 005R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
|
|
||||||
{"pid": " Q91G88", "dates": [{"date": "2009-06-16", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2001-12-01", "date_info": "sequence version 1"}, {"date": "2023-06-28", "date_info": "entry version 53"}], "title": "Putative KilA-N domain-containing protein 006L", "organism_species": "Invertebrate iridescent virus 6 (IIV-6) (Chilo iridescent virus)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Betairidovirinae", "Iridovirus"], "references": [{"PubMed": "17239238"}, {"DOI": "10.1186/1743-422x-4-11"}]}
|
|
||||||
{"pid": " Q6GZW9", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 34"}], "title": "Uncharacterized protein 006R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
|
|
||||||
{"pid": " Q6GZW8", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 32"}], "title": "Uncharacterized protein 007R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
|
|
||||||
{"pid": " Q197F3", "dates": [{"date": "2009-06-16", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2006-07-11", "date_info": "sequence version 1"}, {"date": "2023-02-22", "date_info": "entry version 28"}], "title": "Uncharacterized protein 007R", "organism_species": "Invertebrate iridescent virus 3 (IIV-3) (Mosquito iridescent virus)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Betairidovirinae", "Chloriridovirus"], "references": [{"PubMed": "16912294"}, {"DOI": "10.1128/jvi.00464-06"}]}
|
|
||||||
{"pid": " Q197F2", "dates": [{"date": "2009-06-16", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2006-07-11", "date_info": "sequence version 1"}, {"date": "2022-02-23", "date_info": "entry version 22"}], "title": "Uncharacterized protein 008L", "organism_species": "Invertebrate iridescent virus 3 (IIV-3) (Mosquito iridescent virus)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Betairidovirinae", "Chloriridovirus"], "references": [{"PubMed": "16912294"}, {"DOI": "10.1128/jvi.00464-06"}]}
|
|
||||||
{"pid": " Q6GZW6", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 67"}], "title": "Putative helicase 009L", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
|
|
||||||
{"pid": " Q91G85", "dates": [{"date": "2009-06-16", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2001-12-01", "date_info": "sequence version 1"}, {"date": "2023-02-22", "date_info": "entry version 38"}], "title": "Uncharacterized protein 009R", "organism_species": "Invertebrate iridescent virus 6 (IIV-6) (Chilo iridescent virus)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Betairidovirinae", "Iridovirus"], "references": [{"PubMed": "17239238"}, {"DOI": "10.1186/1743-422x-4-11"}]}
|
|
||||||
{"pid": " Q6GZW5", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 37"}], "title": "Uncharacterized protein 010R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
|
|
||||||
{"pid": " Q197E9", "dates": [{"date": "2009-06-16", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2006-07-11", "date_info": "sequence version 1"}, {"date": "2023-02-22", "date_info": "entry version 28"}], "title": "Uncharacterized protein 011L", "organism_species": "Invertebrate iridescent virus 3 (IIV-3) (Mosquito iridescent virus)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Betairidovirinae", "Chloriridovirus"], "references": [{"PubMed": "16912294"}, {"DOI": "10.1128/jvi.00464-06"}]}
|
|
||||||
{"pid": " Q6GZW4", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 37"}], "title": "Uncharacterized protein 011R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
|
|
||||||
{"pid": " Q6GZW3", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 35"}], "title": "Uncharacterized protein 012L", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
|
|
||||||
{"pid": " Q197E7", "dates": [{"date": "2009-06-16", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2006-07-11", "date_info": "sequence version 1"}, {"date": "2023-02-22", "date_info": "entry version 37"}], "title": "Uncharacterized protein IIV3-013L", "organism_species": "Invertebrate iridescent virus 3 (IIV-3) (Mosquito iridescent virus)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Betairidovirinae", "Chloriridovirus"], "references": [{"PubMed": "16912294"}, {"DOI": "10.1128/jvi.00464-06"}]}
|
|
||||||
{"pid": " Q6GZW2", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 30"}], "title": "Uncharacterized protein 013R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
|
|
||||||
{"pid": " Q6GZW1", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 35"}], "title": "Uncharacterized protein 014R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
|
|
||||||
{"pid": " Q6GZW0", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 50"}], "title": "Uncharacterized protein 015R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
|
|
||||||
{"pid": " Q6GZV8", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 35"}], "title": "Uncharacterized protein 017L", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
|
|
||||||
{"pid": " Q6GZV7", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 33"}], "title": "Uncharacterized protein 018L", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
|
|
||||||
{"pid": " Q6GZV6", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 87"}], "title": "Putative serine/threonine-protein kinase 019R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
|
|
||||||
{"pid": " Q6GZV5", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 40"}], "title": "Uncharacterized protein 020R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
|
|
||||||
{"pid": " Q6GZV4", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 35"}], "title": "Uncharacterized protein 021L", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
|
|
||||||
{"pid": " Q197D8", "dates": [{"date": "2009-06-16", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2006-07-11", "date_info": "sequence version 1"}, {"date": "2022-12-14", "date_info": "entry version 35"}], "title": "Transmembrane protein 022L", "organism_species": "Invertebrate iridescent virus 3 (IIV-3) (Mosquito iridescent virus)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Betairidovirinae", "Chloriridovirus"], "references": [{"PubMed": "16912294"}, {"DOI": "10.1128/jvi.00464-06"}]}
|
|
||||||
{"pid": " Q6GZV2", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 33"}], "title": "Uncharacterized protein 023R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
|
|
||||||
{"pid": " Q197D7", "dates": [{"date": "2009-06-16", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2006-07-11", "date_info": "sequence version 1"}, {"date": "2023-02-22", "date_info": "entry version 25"}], "title": "Uncharacterized protein 023R", "organism_species": "Invertebrate iridescent virus 3 (IIV-3) (Mosquito iridescent virus)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Betairidovirinae", "Chloriridovirus"], "references": [{"PubMed": "16912294"}, {"DOI": "10.1128/jvi.00464-06"}]}
|
|
||||||
{"pid": " Q6GZV1", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 37"}], "title": "Uncharacterized protein 024R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
|
|
||||||
{"pid": " Q197D5", "dates": [{"date": "2009-06-16", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2006-07-11", "date_info": "sequence version 1"}, {"date": "2022-10-12", "date_info": "entry version 24"}], "title": "Uncharacterized protein 025R", "organism_species": "Invertebrate iridescent virus 3 (IIV-3) (Mosquito iridescent virus)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Betairidovirinae", "Chloriridovirus"], "references": [{"PubMed": "16912294"}, {"DOI": "10.1128/jvi.00464-06"}]}
|
|
||||||
{"pid": " Q91G70", "dates": [{"date": "2009-06-16", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2001-12-01", "date_info": "sequence version 1"}, {"date": "2020-08-12", "date_info": "entry version 32"}], "title": "Uncharacterized protein 026R", "organism_species": "Invertebrate iridescent virus 6 (IIV-6) (Chilo iridescent virus)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Betairidovirinae", "Iridovirus"], "references": [{"PubMed": "17239238"}, {"DOI": "10.1186/1743-422x-4-11"}]}
|
|
||||||
{"pid": " Q6GZU9", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 49"}], "title": "Uncharacterized protein 027R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
|
|
||||||
{"pid": " Q6GZU8", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 55"}], "title": "Uncharacterized protein 028R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
|
|
|
@ -2,7 +2,9 @@ package eu.dnetlib.dhp.collection.crossref
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper
|
import com.fasterxml.jackson.databind.ObjectMapper
|
||||||
import eu.dnetlib.dhp.aggregation.AbstractVocabularyTest
|
import eu.dnetlib.dhp.aggregation.AbstractVocabularyTest
|
||||||
import org.junit.jupiter.api.BeforeEach
|
import eu.dnetlib.dhp.collection.crossref.Crossref2Oaf.TransformationType
|
||||||
|
import org.apache.commons.io.IOUtils
|
||||||
|
import org.junit.jupiter.api.{BeforeEach, Test}
|
||||||
import org.junit.jupiter.api.extension.ExtendWith
|
import org.junit.jupiter.api.extension.ExtendWith
|
||||||
import org.mockito.junit.jupiter.MockitoExtension
|
import org.mockito.junit.jupiter.MockitoExtension
|
||||||
import org.slf4j.{Logger, LoggerFactory}
|
import org.slf4j.{Logger, LoggerFactory}
|
||||||
|
@ -18,4 +20,13 @@ class CrossrefMappingTest extends AbstractVocabularyTest {
|
||||||
super.setUpVocabulary()
|
super.setUpVocabulary()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
def mappingRecord(): Unit = {
|
||||||
|
val input =
|
||||||
|
IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/collection/crossref/issn_pub.json"), "utf-8")
|
||||||
|
|
||||||
|
println(Crossref2Oaf.convert(input, vocabularies, TransformationType.All))
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,6 +16,7 @@ import org.mockito.junit.jupiter.MockitoExtension
|
||||||
|
|
||||||
import java.io.{BufferedReader, InputStream, InputStreamReader}
|
import java.io.{BufferedReader, InputStream, InputStreamReader}
|
||||||
import java.util.zip.GZIPInputStream
|
import java.util.zip.GZIPInputStream
|
||||||
|
import javax.xml.stream.XMLInputFactory
|
||||||
import scala.collection.JavaConverters._
|
import scala.collection.JavaConverters._
|
||||||
import scala.collection.mutable.ListBuffer
|
import scala.collection.mutable.ListBuffer
|
||||||
import scala.io.Source
|
import scala.io.Source
|
||||||
|
@ -49,10 +50,8 @@ class BioScholixTest extends AbstractVocabularyTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
def testEBIData() = {
|
def testEBIData() = {
|
||||||
val inputXML = Source
|
val inputFactory = XMLInputFactory.newInstance
|
||||||
.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/pubmed.xml"))
|
val xml = inputFactory.createXMLEventReader(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/pubmed.xml"))
|
||||||
.mkString
|
|
||||||
val xml = new XMLEventReader(Source.fromBytes(inputXML.getBytes()))
|
|
||||||
new PMParser(xml).foreach(s => println(mapper.writeValueAsString(s)))
|
new PMParser(xml).foreach(s => println(mapper.writeValueAsString(s)))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -91,9 +90,10 @@ class BioScholixTest extends AbstractVocabularyTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
def testParsingPubmedXML(): Unit = {
|
def testParsingPubmedXML(): Unit = {
|
||||||
val xml = new XMLEventReader(
|
val inputFactory = XMLInputFactory.newInstance
|
||||||
Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/pubmed.xml"))
|
|
||||||
)
|
val xml = inputFactory.createXMLEventReader(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/pubmed.xml"))
|
||||||
|
|
||||||
val parser = new PMParser(xml)
|
val parser = new PMParser(xml)
|
||||||
parser.foreach(checkPMArticle)
|
parser.foreach(checkPMArticle)
|
||||||
}
|
}
|
||||||
|
@ -156,9 +156,9 @@ class BioScholixTest extends AbstractVocabularyTest {
|
||||||
@Test
|
@Test
|
||||||
def testPubmedMapping(): Unit = {
|
def testPubmedMapping(): Unit = {
|
||||||
|
|
||||||
val xml = new XMLEventReader(
|
val inputFactory = XMLInputFactory.newInstance
|
||||||
Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/pubmed.xml"))
|
val xml = inputFactory.createXMLEventReader(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/pubmed.xml"))
|
||||||
)
|
|
||||||
val parser = new PMParser(xml)
|
val parser = new PMParser(xml)
|
||||||
val results = ListBuffer[Oaf]()
|
val results = ListBuffer[Oaf]()
|
||||||
parser.foreach(x => results += PubMedToOaf.convert(x, vocabularies))
|
parser.foreach(x => results += PubMedToOaf.convert(x, vocabularies))
|
||||||
|
|
|
@ -26,15 +26,15 @@ import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Software;
|
import eu.dnetlib.dhp.schema.oaf.Software;
|
||||||
|
|
||||||
public class PrepareSimpleEntitiesJob {
|
public class PrepareSimpleEntititiesJob {
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(PrepareSimpleEntitiesJob.class);
|
private static final Logger log = LoggerFactory.getLogger(PrepareSimpleEntititiesJob.class);
|
||||||
|
|
||||||
public static void main(final String[] args) throws Exception {
|
public static void main(final String[] args) throws Exception {
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||||
IOUtils
|
IOUtils
|
||||||
.toString(
|
.toString(
|
||||||
PrepareSimpleEntitiesJob.class
|
PrepareSimpleEntititiesJob.class
|
||||||
.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/common_params.json")));
|
.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/common_params.json")));
|
||||||
parser.parseArgument(args);
|
parser.parseArgument(args);
|
||||||
|
|
|
@ -160,7 +160,8 @@ public class ConversionUtils {
|
||||||
.stream()
|
.stream()
|
||||||
.filter(Objects::nonNull)
|
.filter(Objects::nonNull)
|
||||||
.filter(pid -> pid.getQualifier() != null)
|
.filter(pid -> pid.getQualifier() != null)
|
||||||
.filter(pid -> StringUtils.startsWithIgnoreCase(pid.getQualifier().getClassid(), ModelConstants.ORCID))
|
.filter(pid -> pid.getQualifier().getClassid() != null)
|
||||||
|
.filter(pid -> pid.getQualifier().getClassid().equalsIgnoreCase(ModelConstants.ORCID))
|
||||||
.map(StructuredProperty::getValue)
|
.map(StructuredProperty::getValue)
|
||||||
.map(ConversionUtils::cleanOrcid)
|
.map(ConversionUtils::cleanOrcid)
|
||||||
.filter(StringUtils::isNotBlank)
|
.filter(StringUtils::isNotBlank)
|
||||||
|
|
|
@ -7,7 +7,7 @@
|
||||||
</property>
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>outputDir</name>
|
<name>outputDir</name>
|
||||||
<description>the path where the generated data will be stored</description>
|
<description>the path where the the generated data will be stored</description>
|
||||||
</property>
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>datasourceIdWhitelist</name>
|
<name>datasourceIdWhitelist</name>
|
||||||
|
@ -179,18 +179,17 @@
|
||||||
<master>yarn</master>
|
<master>yarn</master>
|
||||||
<mode>cluster</mode>
|
<mode>cluster</mode>
|
||||||
<name>PrepareSimpleEntititiesJob</name>
|
<name>PrepareSimpleEntititiesJob</name>
|
||||||
<class>eu.dnetlib.dhp.broker.oa.PrepareSimpleEntitiesJob</class>
|
<class>eu.dnetlib.dhp.broker.oa.PrepareSimpleEntititiesJob</class>
|
||||||
<jar>dhp-broker-events-${projectVersion}.jar</jar>
|
<jar>dhp-broker-events-${projectVersion}.jar</jar>
|
||||||
<spark-opts>
|
<spark-opts>
|
||||||
--executor-cores=${sparkExecutorCores}
|
--executor-cores=${sparkExecutorCores}
|
||||||
--executor-memory=${sparkExecutorMemory}
|
--executor-memory=${sparkExecutorMemory}
|
||||||
--driver-memory=${sparkDriverMemory}
|
--driver-memory=${sparkDriverMemory}
|
||||||
--conf spark.executor.memoryOverhead=${sparkExecutorMemory}
|
|
||||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
--conf spark.sql.shuffle.partitions=5000
|
--conf spark.sql.shuffle.partitions=3840
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--graphPath</arg><arg>${graphInputPath}</arg>
|
<arg>--graphPath</arg><arg>${graphInputPath}</arg>
|
||||||
<arg>--workingDir</arg><arg>${workingDir}</arg>
|
<arg>--workingDir</arg><arg>${workingDir}</arg>
|
||||||
|
@ -210,12 +209,11 @@
|
||||||
--executor-cores=${sparkExecutorCores}
|
--executor-cores=${sparkExecutorCores}
|
||||||
--executor-memory=${sparkExecutorMemory}
|
--executor-memory=${sparkExecutorMemory}
|
||||||
--driver-memory=${sparkDriverMemory}
|
--driver-memory=${sparkDriverMemory}
|
||||||
--conf spark.executor.memoryOverhead=${sparkExecutorMemory}
|
|
||||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
--conf spark.sql.shuffle.partitions=8000
|
--conf spark.sql.shuffle.partitions=3840
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--graphPath</arg><arg>${graphInputPath}</arg>
|
<arg>--graphPath</arg><arg>${graphInputPath}</arg>
|
||||||
<arg>--workingDir</arg><arg>${workingDir}</arg>
|
<arg>--workingDir</arg><arg>${workingDir}</arg>
|
||||||
|
@ -236,12 +234,11 @@
|
||||||
--executor-cores=${sparkExecutorCores}
|
--executor-cores=${sparkExecutorCores}
|
||||||
--executor-memory=${sparkExecutorMemory}
|
--executor-memory=${sparkExecutorMemory}
|
||||||
--driver-memory=${sparkDriverMemory}
|
--driver-memory=${sparkDriverMemory}
|
||||||
--conf spark.executor.memoryOverhead=${sparkExecutorMemory}
|
|
||||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
--conf spark.sql.shuffle.partitions=8000
|
--conf spark.sql.shuffle.partitions=3840
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--graphPath</arg><arg>${graphInputPath}</arg>
|
<arg>--graphPath</arg><arg>${graphInputPath}</arg>
|
||||||
<arg>--workingDir</arg><arg>${workingDir}</arg>
|
<arg>--workingDir</arg><arg>${workingDir}</arg>
|
||||||
|
@ -261,12 +258,11 @@
|
||||||
--executor-cores=${sparkExecutorCores}
|
--executor-cores=${sparkExecutorCores}
|
||||||
--executor-memory=${sparkExecutorMemory}
|
--executor-memory=${sparkExecutorMemory}
|
||||||
--driver-memory=${sparkDriverMemory}
|
--driver-memory=${sparkDriverMemory}
|
||||||
--conf spark.executor.memoryOverhead=${sparkExecutorMemory}
|
|
||||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
--conf spark.sql.shuffle.partitions=5000
|
--conf spark.sql.shuffle.partitions=3840
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--graphPath</arg><arg>${graphInputPath}</arg>
|
<arg>--graphPath</arg><arg>${graphInputPath}</arg>
|
||||||
<arg>--workingDir</arg><arg>${workingDir}</arg>
|
<arg>--workingDir</arg><arg>${workingDir}</arg>
|
||||||
|
@ -286,12 +282,11 @@
|
||||||
--executor-cores=${sparkExecutorCores}
|
--executor-cores=${sparkExecutorCores}
|
||||||
--executor-memory=${sparkExecutorMemory}
|
--executor-memory=${sparkExecutorMemory}
|
||||||
--driver-memory=${sparkDriverMemory}
|
--driver-memory=${sparkDriverMemory}
|
||||||
--conf spark.executor.memoryOverhead=${sparkExecutorMemory}
|
|
||||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
--conf spark.sql.shuffle.partitions=10000
|
--conf spark.sql.shuffle.partitions=3840
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--graphPath</arg><arg>${graphInputPath}</arg>
|
<arg>--graphPath</arg><arg>${graphInputPath}</arg>
|
||||||
<arg>--workingDir</arg><arg>${workingDir}</arg>
|
<arg>--workingDir</arg><arg>${workingDir}</arg>
|
||||||
|
@ -311,12 +306,11 @@
|
||||||
--executor-cores=${sparkExecutorCores}
|
--executor-cores=${sparkExecutorCores}
|
||||||
--executor-memory=${sparkExecutorMemory}
|
--executor-memory=${sparkExecutorMemory}
|
||||||
--driver-memory=${sparkDriverMemory}
|
--driver-memory=${sparkDriverMemory}
|
||||||
--conf spark.executor.memoryOverhead=${sparkExecutorMemory}
|
|
||||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
--conf spark.sql.shuffle.partitions=2000
|
--conf spark.sql.shuffle.partitions=3840
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--graphPath</arg><arg>${graphInputPath}</arg>
|
<arg>--graphPath</arg><arg>${graphInputPath}</arg>
|
||||||
<arg>--workingDir</arg><arg>${workingDir}</arg>
|
<arg>--workingDir</arg><arg>${workingDir}</arg>
|
||||||
|
@ -338,12 +332,11 @@
|
||||||
--executor-cores=${sparkExecutorCores}
|
--executor-cores=${sparkExecutorCores}
|
||||||
--executor-memory=${sparkExecutorMemory}
|
--executor-memory=${sparkExecutorMemory}
|
||||||
--driver-memory=${sparkDriverMemory}
|
--driver-memory=${sparkDriverMemory}
|
||||||
--conf spark.executor.memoryOverhead=${sparkExecutorMemory}
|
|
||||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
--conf spark.sql.shuffle.partitions=8000
|
--conf spark.sql.shuffle.partitions=3840
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--graphPath</arg><arg>${graphInputPath}</arg>
|
<arg>--graphPath</arg><arg>${graphInputPath}</arg>
|
||||||
<arg>--workingDir</arg><arg>${workingDir}</arg>
|
<arg>--workingDir</arg><arg>${workingDir}</arg>
|
||||||
|
@ -363,12 +356,11 @@
|
||||||
--executor-cores=${sparkExecutorCores}
|
--executor-cores=${sparkExecutorCores}
|
||||||
--executor-memory=${sparkExecutorMemory}
|
--executor-memory=${sparkExecutorMemory}
|
||||||
--driver-memory=${sparkDriverMemory}
|
--driver-memory=${sparkDriverMemory}
|
||||||
--conf spark.executor.memoryOverhead=${sparkExecutorMemory}
|
|
||||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
--conf spark.sql.shuffle.partitions=8000
|
--conf spark.sql.shuffle.partitions=3840
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--graphPath</arg><arg>${graphInputPath}</arg>
|
<arg>--graphPath</arg><arg>${graphInputPath}</arg>
|
||||||
<arg>--workingDir</arg><arg>${workingDir}</arg>
|
<arg>--workingDir</arg><arg>${workingDir}</arg>
|
||||||
|
@ -388,12 +380,11 @@
|
||||||
--executor-cores=${sparkExecutorCores}
|
--executor-cores=${sparkExecutorCores}
|
||||||
--executor-memory=${sparkExecutorMemory}
|
--executor-memory=${sparkExecutorMemory}
|
||||||
--driver-memory=${sparkDriverMemory}
|
--driver-memory=${sparkDriverMemory}
|
||||||
--conf spark.executor.memoryOverhead=${sparkExecutorMemory}
|
|
||||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
--conf spark.sql.shuffle.partitions=8000
|
--conf spark.sql.shuffle.partitions=3840
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--graphPath</arg><arg>${graphInputPath}</arg>
|
<arg>--graphPath</arg><arg>${graphInputPath}</arg>
|
||||||
<arg>--workingDir</arg><arg>${workingDir}</arg>
|
<arg>--workingDir</arg><arg>${workingDir}</arg>
|
||||||
|
@ -413,12 +404,11 @@
|
||||||
--executor-cores=${sparkExecutorCores}
|
--executor-cores=${sparkExecutorCores}
|
||||||
--executor-memory=${sparkExecutorMemory}
|
--executor-memory=${sparkExecutorMemory}
|
||||||
--driver-memory=${sparkDriverMemory}
|
--driver-memory=${sparkDriverMemory}
|
||||||
--conf spark.executor.memoryOverhead=${sparkExecutorMemory}
|
|
||||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
--conf spark.sql.shuffle.partitions=8000
|
--conf spark.sql.shuffle.partitions=3840
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--graphPath</arg><arg>${graphInputPath}</arg>
|
<arg>--graphPath</arg><arg>${graphInputPath}</arg>
|
||||||
<arg>--workingDir</arg><arg>${workingDir}</arg>
|
<arg>--workingDir</arg><arg>${workingDir}</arg>
|
||||||
|
@ -438,12 +428,11 @@
|
||||||
--executor-cores=${sparkExecutorCores}
|
--executor-cores=${sparkExecutorCores}
|
||||||
--executor-memory=${sparkExecutorMemory}
|
--executor-memory=${sparkExecutorMemory}
|
||||||
--driver-memory=${sparkDriverMemory}
|
--driver-memory=${sparkDriverMemory}
|
||||||
--conf spark.executor.memoryOverhead=${sparkExecutorMemory}
|
|
||||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
--conf spark.sql.shuffle.partitions=8000
|
--conf spark.sql.shuffle.partitions=3840
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--graphPath</arg><arg>${graphInputPath}</arg>
|
<arg>--graphPath</arg><arg>${graphInputPath}</arg>
|
||||||
<arg>--workingDir</arg><arg>${workingDir}</arg>
|
<arg>--workingDir</arg><arg>${workingDir}</arg>
|
||||||
|
@ -463,12 +452,11 @@
|
||||||
--executor-cores=${sparkExecutorCores}
|
--executor-cores=${sparkExecutorCores}
|
||||||
--executor-memory=${sparkExecutorMemory}
|
--executor-memory=${sparkExecutorMemory}
|
||||||
--driver-memory=${sparkDriverMemory}
|
--driver-memory=${sparkDriverMemory}
|
||||||
--conf spark.executor.memoryOverhead=${sparkExecutorMemory}
|
|
||||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
--conf spark.sql.shuffle.partitions=8000
|
--conf spark.sql.shuffle.partitions=3840
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--graphPath</arg><arg>${graphInputPath}</arg>
|
<arg>--graphPath</arg><arg>${graphInputPath}</arg>
|
||||||
<arg>--workingDir</arg><arg>${workingDir}</arg>
|
<arg>--workingDir</arg><arg>${workingDir}</arg>
|
||||||
|
@ -488,12 +476,11 @@
|
||||||
--executor-cores=${sparkExecutorCores}
|
--executor-cores=${sparkExecutorCores}
|
||||||
--executor-memory=${sparkExecutorMemory}
|
--executor-memory=${sparkExecutorMemory}
|
||||||
--driver-memory=${sparkDriverMemory}
|
--driver-memory=${sparkDriverMemory}
|
||||||
--conf spark.executor.memoryOverhead=${sparkExecutorMemory}
|
|
||||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
--conf spark.sql.shuffle.partitions=8000
|
--conf spark.sql.shuffle.partitions=3840
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--workingDir</arg><arg>${workingDir}</arg>
|
<arg>--workingDir</arg><arg>${workingDir}</arg>
|
||||||
<arg>--outputDir</arg><arg>${outputDir}</arg>
|
<arg>--outputDir</arg><arg>${outputDir}</arg>
|
||||||
|
@ -516,7 +503,6 @@
|
||||||
<spark-opts>
|
<spark-opts>
|
||||||
--executor-memory=${sparkExecutorMemory}
|
--executor-memory=${sparkExecutorMemory}
|
||||||
--driver-memory=${sparkDriverMemory}
|
--driver-memory=${sparkDriverMemory}
|
||||||
--conf spark.executor.memoryOverhead=${sparkExecutorMemory}
|
|
||||||
--conf spark.dynamicAllocation.maxExecutors=${sparkMaxExecutorsForIndexing}
|
--conf spark.dynamicAllocation.maxExecutors=${sparkMaxExecutorsForIndexing}
|
||||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
@ -549,7 +535,6 @@
|
||||||
--executor-cores=${sparkExecutorCores}
|
--executor-cores=${sparkExecutorCores}
|
||||||
--executor-memory=${sparkExecutorMemory}
|
--executor-memory=${sparkExecutorMemory}
|
||||||
--driver-memory=${sparkDriverMemory}
|
--driver-memory=${sparkDriverMemory}
|
||||||
--conf spark.executor.memoryOverhead=${sparkExecutorMemory}
|
|
||||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
@ -577,7 +562,6 @@
|
||||||
--executor-cores=${sparkExecutorCores}
|
--executor-cores=${sparkExecutorCores}
|
||||||
--executor-memory=${sparkExecutorMemory}
|
--executor-memory=${sparkExecutorMemory}
|
||||||
--driver-memory=${sparkDriverMemory}
|
--driver-memory=${sparkDriverMemory}
|
||||||
--conf spark.executor.memoryOverhead=${sparkExecutorMemory}
|
|
||||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
@ -601,7 +585,6 @@
|
||||||
<spark-opts>
|
<spark-opts>
|
||||||
--executor-memory=${sparkExecutorMemory}
|
--executor-memory=${sparkExecutorMemory}
|
||||||
--driver-memory=${sparkDriverMemory}
|
--driver-memory=${sparkDriverMemory}
|
||||||
--conf spark.executor.memoryOverhead=${sparkExecutorMemory}
|
|
||||||
--conf spark.dynamicAllocation.maxExecutors=${sparkMaxExecutorsForIndexing}
|
--conf spark.dynamicAllocation.maxExecutors=${sparkMaxExecutorsForIndexing}
|
||||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
|
|
@ -1,64 +0,0 @@
|
||||||
package eu.dnetlib.dhp.broker.oa.matchers.simple;
|
|
||||||
|
|
||||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
|
||||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import org.junit.jupiter.api.BeforeEach;
|
|
||||||
import org.junit.jupiter.api.Test;
|
|
||||||
|
|
||||||
import eu.dnetlib.broker.objects.OaBrokerAuthor;
|
|
||||||
import eu.dnetlib.broker.objects.OaBrokerMainEntity;
|
|
||||||
|
|
||||||
class EnrichMissingAuthorOrcidTest {
|
|
||||||
|
|
||||||
final EnrichMissingAuthorOrcid matcher = new EnrichMissingAuthorOrcid();
|
|
||||||
|
|
||||||
@BeforeEach
|
|
||||||
void setUp() throws Exception {}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void testFindDifferences_1() {
|
|
||||||
final OaBrokerMainEntity source = new OaBrokerMainEntity();
|
|
||||||
final OaBrokerMainEntity target = new OaBrokerMainEntity();
|
|
||||||
final List<OaBrokerAuthor> list = this.matcher.findDifferences(source, target);
|
|
||||||
assertTrue(list.isEmpty());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void testFindDifferences_2() {
|
|
||||||
final OaBrokerMainEntity source = new OaBrokerMainEntity();
|
|
||||||
final OaBrokerMainEntity target = new OaBrokerMainEntity();
|
|
||||||
|
|
||||||
source.getCreators().add(new OaBrokerAuthor("Claudio Atzori", "0000-0001-9613-6639"));
|
|
||||||
target.getCreators().add(new OaBrokerAuthor("Claudio Atzori", null));
|
|
||||||
|
|
||||||
final List<OaBrokerAuthor> list = this.matcher.findDifferences(source, target);
|
|
||||||
assertEquals(1, list.size());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void testFindDifferences_3() {
|
|
||||||
final OaBrokerMainEntity source = new OaBrokerMainEntity();
|
|
||||||
final OaBrokerMainEntity target = new OaBrokerMainEntity();
|
|
||||||
|
|
||||||
source.getCreators().add(new OaBrokerAuthor("Claudio Atzori", null));
|
|
||||||
target.getCreators().add(new OaBrokerAuthor("Claudio Atzori", "0000-0001-9613-6639"));
|
|
||||||
|
|
||||||
final List<OaBrokerAuthor> list = this.matcher.findDifferences(source, target);
|
|
||||||
assertTrue(list.isEmpty());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void testFindDifferences_4() {
|
|
||||||
final OaBrokerMainEntity source = new OaBrokerMainEntity();
|
|
||||||
final OaBrokerMainEntity target = new OaBrokerMainEntity();
|
|
||||||
source.getCreators().add(new OaBrokerAuthor("Claudio Atzori", "0000-0001-9613-6639"));
|
|
||||||
target.getCreators().add(new OaBrokerAuthor("Claudio Atzori", "0000-0001-9613-6639"));
|
|
||||||
|
|
||||||
final List<OaBrokerAuthor> list = this.matcher.findDifferences(source, target);
|
|
||||||
assertTrue(list.isEmpty());
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -2,31 +2,27 @@
|
||||||
package eu.dnetlib.dhp.broker.oa.util;
|
package eu.dnetlib.dhp.broker.oa.util;
|
||||||
|
|
||||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
import static org.junit.jupiter.api.Assertions.assertNull;
|
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.junit.jupiter.api.BeforeEach;
|
import org.junit.jupiter.api.BeforeEach;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
import eu.dnetlib.broker.objects.OaBrokerMainEntity;
|
|
||||||
import eu.dnetlib.broker.objects.OaBrokerTypedValue;
|
import eu.dnetlib.broker.objects.OaBrokerTypedValue;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Author;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Instance;
|
import eu.dnetlib.dhp.schema.oaf.Instance;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
|
|
||||||
public class ConversionUtilsTest {
|
class ConversionUtilsTest {
|
||||||
|
|
||||||
@BeforeEach
|
@BeforeEach
|
||||||
public void setUp() throws Exception {}
|
void setUp() throws Exception {
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testAllResultPids() {
|
void testAllResultPids() {
|
||||||
final Qualifier qf = new Qualifier();
|
final Qualifier qf = new Qualifier();
|
||||||
qf.setClassid("test");
|
qf.setClassid("test");
|
||||||
qf.setClassname("test");
|
qf.setClassname("test");
|
||||||
|
@ -95,42 +91,4 @@ public class ConversionUtilsTest {
|
||||||
assertEquals(6, list.size());
|
assertEquals(6, list.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testOafResultToBrokerResult() {
|
|
||||||
|
|
||||||
final Author a1 = createAuthor("Michele Artini", "0000-0002-4406-428X");
|
|
||||||
final Author a2 = createAuthor("Claudio Atzori", "http://orcid.org/0000-0001-9613-6639");
|
|
||||||
final Author a3 = createAuthor("Alessia Bardi", null);
|
|
||||||
|
|
||||||
final Result r = new Result();
|
|
||||||
r.setAuthor(Arrays.asList(a1, a2, a3));
|
|
||||||
|
|
||||||
final OaBrokerMainEntity br = ConversionUtils.oafResultToBrokerResult(r);
|
|
||||||
|
|
||||||
assertEquals(3, br.getCreators().size());
|
|
||||||
assertEquals("0000-0002-4406-428X", br.getCreators().get(0).getOrcid());
|
|
||||||
assertEquals("0000-0001-9613-6639", br.getCreators().get(1).getOrcid());
|
|
||||||
assertNull(br.getCreators().get(2).getOrcid());
|
|
||||||
}
|
|
||||||
|
|
||||||
private Author createAuthor(final String name, final String orcid) {
|
|
||||||
|
|
||||||
final Author a = new Author();
|
|
||||||
a.setFullname("Michele Artini");
|
|
||||||
|
|
||||||
if (orcid != null) {
|
|
||||||
final Qualifier q = new Qualifier();
|
|
||||||
q.setClassid(ModelConstants.ORCID);
|
|
||||||
q.setClassname(ModelConstants.ORCID);
|
|
||||||
q.setSchemeid("dnet:pids");
|
|
||||||
q.setSchemename("dnet:pids");
|
|
||||||
|
|
||||||
final StructuredProperty pid = new StructuredProperty();
|
|
||||||
pid.setQualifier(q);
|
|
||||||
pid.setValue(orcid);
|
|
||||||
|
|
||||||
a.setPid(Arrays.asList(pid));
|
|
||||||
}
|
|
||||||
return a;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -53,24 +53,10 @@
|
||||||
<artifactId>dhp-pace-core</artifactId>
|
<artifactId>dhp-pace-core</artifactId>
|
||||||
<version>${project.version}</version>
|
<version>${project.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.commons</groupId>
|
<groupId>org.apache.commons</groupId>
|
||||||
<artifactId>commons-lang3</artifactId>
|
<artifactId>commons-lang3</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.scala-lang.modules</groupId>
|
|
||||||
<artifactId>scala-java8-compat_${scala.binary.version}</artifactId>
|
|
||||||
<version>1.0.2</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.scala-lang.modules</groupId>
|
|
||||||
<artifactId>scala-collection-compat_${scala.binary.version}</artifactId>
|
|
||||||
<version>2.11.0</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
<artifactId>spark-core_${scala.binary.version}</artifactId>
|
<artifactId>spark-core_${scala.binary.version}</artifactId>
|
||||||
|
@ -79,16 +65,10 @@
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
<artifactId>spark-sql_${scala.binary.version}</artifactId>
|
<artifactId>spark-sql_${scala.binary.version}</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
<artifactId>spark-graphx_${scala.binary.version}</artifactId>
|
<artifactId>spark-graphx_${scala.binary.version}</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>com.arakelian</groupId>
|
|
||||||
<artifactId>java-jq</artifactId>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>dom4j</groupId>
|
<groupId>dom4j</groupId>
|
||||||
<artifactId>dom4j</artifactId>
|
<artifactId>dom4j</artifactId>
|
||||||
|
@ -101,10 +81,6 @@
|
||||||
<groupId>com.fasterxml.jackson.core</groupId>
|
<groupId>com.fasterxml.jackson.core</groupId>
|
||||||
<artifactId>jackson-databind</artifactId>
|
<artifactId>jackson-databind</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
|
||||||
<groupId>com.fasterxml.jackson.core</groupId>
|
|
||||||
<artifactId>jackson-core</artifactId>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.httpcomponents</groupId>
|
<groupId>org.apache.httpcomponents</groupId>
|
||||||
<artifactId>httpclient</artifactId>
|
<artifactId>httpclient</artifactId>
|
||||||
|
|
|
@ -42,6 +42,7 @@ import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
import eu.dnetlib.pace.config.DedupConfig;
|
import eu.dnetlib.pace.config.DedupConfig;
|
||||||
|
import eu.dnetlib.pace.util.SparkCompatUtils;
|
||||||
import scala.Tuple3;
|
import scala.Tuple3;
|
||||||
import scala.collection.JavaConversions;
|
import scala.collection.JavaConversions;
|
||||||
|
|
||||||
|
@ -148,8 +149,7 @@ public class SparkCreateMergeRels extends AbstractSparkAction {
|
||||||
Dataset<Row> pivotHistory = spark
|
Dataset<Row> pivotHistory = spark
|
||||||
.createDataset(
|
.createDataset(
|
||||||
Collections.emptyList(),
|
Collections.emptyList(),
|
||||||
RowEncoder
|
SparkCompatUtils.encoderFor(StructType.fromDDL("id STRING, lastUsage STRING")));
|
||||||
.apply(StructType.fromDDL("id STRING, lastUsage STRING")));
|
|
||||||
|
|
||||||
if (StringUtils.isNotBlank(pivotHistoryDatabase)) {
|
if (StringUtils.isNotBlank(pivotHistoryDatabase)) {
|
||||||
pivotHistory = spark
|
pivotHistory = spark
|
||||||
|
|
|
@ -8,7 +8,6 @@ import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.function.MapFunction;
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
import org.apache.spark.api.java.function.ReduceFunction;
|
import org.apache.spark.api.java.function.ReduceFunction;
|
||||||
import org.apache.spark.sql.*;
|
import org.apache.spark.sql.*;
|
||||||
import org.apache.spark.sql.catalyst.encoders.RowEncoder;
|
|
||||||
import org.apache.spark.sql.types.StructType;
|
import org.apache.spark.sql.types.StructType;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
@ -23,6 +22,7 @@ import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
|
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
|
||||||
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
|
import eu.dnetlib.pace.util.SparkCompatUtils;
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
import scala.Tuple3;
|
import scala.Tuple3;
|
||||||
|
|
||||||
|
@ -145,7 +145,7 @@ public class SparkPropagateRelation extends AbstractSparkAction {
|
||||||
StructType idsSchema = StructType
|
StructType idsSchema = StructType
|
||||||
.fromDDL("`id` STRING, `dataInfo` STRUCT<`deletedbyinference`:BOOLEAN,`invisible`:BOOLEAN>");
|
.fromDDL("`id` STRING, `dataInfo` STRUCT<`deletedbyinference`:BOOLEAN,`invisible`:BOOLEAN>");
|
||||||
|
|
||||||
Dataset<Row> allIds = spark.emptyDataset(RowEncoder.apply(idsSchema));
|
Dataset<Row> allIds = spark.emptyDataset(SparkCompatUtils.encoderFor(idsSchema));
|
||||||
|
|
||||||
for (EntityType entityType : ModelSupport.entityTypes.keySet()) {
|
for (EntityType entityType : ModelSupport.entityTypes.keySet()) {
|
||||||
String entityPath = graphBasePath + '/' + entityType.name();
|
String entityPath = graphBasePath + '/' + entityType.name();
|
||||||
|
|
File diff suppressed because one or more lines are too long
|
@ -73,6 +73,12 @@
|
||||||
"name": "Irish Nephrology Society",
|
"name": "Irish Nephrology Society",
|
||||||
"synonym": []
|
"synonym": []
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"id": "100011062",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/100011062",
|
||||||
|
"name": "Asian Spinal Cord Network",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"id": "100011096",
|
"id": "100011096",
|
||||||
"uri": "http://dx.doi.org/10.13039/100011096",
|
"uri": "http://dx.doi.org/10.13039/100011096",
|
||||||
|
@ -217,6 +223,12 @@
|
||||||
"name": "Global Brain Health Institute",
|
"name": "Global Brain Health Institute",
|
||||||
"synonym": []
|
"synonym": []
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"id": "100015776",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/100015776",
|
||||||
|
"name": "Health and Social Care Board",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"id": "100015992",
|
"id": "100015992",
|
||||||
"uri": "http://dx.doi.org/10.13039/100015992",
|
"uri": "http://dx.doi.org/10.13039/100015992",
|
||||||
|
@ -391,6 +403,18 @@
|
||||||
"name": "Irish Hospice Foundation",
|
"name": "Irish Hospice Foundation",
|
||||||
"synonym": []
|
"synonym": []
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"id": "501100001596",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100001596",
|
||||||
|
"name": "Irish Research Council for Science, Engineering and Technology",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100001597",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100001597",
|
||||||
|
"name": "Irish Research Council for the Humanities and Social Sciences",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"id": "501100001598",
|
"id": "501100001598",
|
||||||
"uri": "http://dx.doi.org/10.13039/501100001598",
|
"uri": "http://dx.doi.org/10.13039/501100001598",
|
||||||
|
@ -491,7 +515,7 @@
|
||||||
"id": "501100002081",
|
"id": "501100002081",
|
||||||
"uri": "http://dx.doi.org/10.13039/501100002081",
|
"uri": "http://dx.doi.org/10.13039/501100002081",
|
||||||
"name": "Irish Research Council",
|
"name": "Irish Research Council",
|
||||||
"synonym": ["501100001596", "501100001597"]
|
"synonym": []
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": "501100002736",
|
"id": "501100002736",
|
||||||
|
|
|
@ -560,15 +560,7 @@ case object Crossref2Oaf {
|
||||||
"10.13039/501100000266" | "10.13039/501100006041" | "10.13039/501100000265" | "10.13039/501100000270" |
|
"10.13039/501100000266" | "10.13039/501100006041" | "10.13039/501100000265" | "10.13039/501100000270" |
|
||||||
"10.13039/501100013589" | "10.13039/501100000271" =>
|
"10.13039/501100013589" | "10.13039/501100000271" =>
|
||||||
generateSimpleRelationFromAward(funder, "ukri________", a => a)
|
generateSimpleRelationFromAward(funder, "ukri________", a => a)
|
||||||
//HFRI
|
|
||||||
case "10.13039/501100013209" =>
|
|
||||||
generateSimpleRelationFromAward(funder, "hfri________", a => a)
|
|
||||||
val targetId = getProjectId("hfri________", "1e5e62235d094afd01cd56e65112fc63")
|
|
||||||
queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY)
|
|
||||||
queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES)
|
|
||||||
//ERASMUS+
|
|
||||||
case "10.13039/501100010790" =>
|
|
||||||
generateSimpleRelationFromAward(funder, "erasmusplus_", a => a)
|
|
||||||
case _ => logger.debug("no match for " + funder.DOI.get)
|
case _ => logger.debug("no match for " + funder.DOI.get)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -53,8 +53,6 @@ public class Constraints implements Serializable {
|
||||||
|
|
||||||
for (Constraint sc : constraint) {
|
for (Constraint sc : constraint) {
|
||||||
boolean verified = false;
|
boolean verified = false;
|
||||||
if (!param.containsKey(sc.getField()))
|
|
||||||
return false;
|
|
||||||
for (String value : param.get(sc.getField())) {
|
for (String value : param.get(sc.getField())) {
|
||||||
if (sc.verifyCriteria(value.trim())) {
|
if (sc.verifyCriteria(value.trim())) {
|
||||||
verified = true;
|
verified = true;
|
||||||
|
|
|
@ -14,7 +14,6 @@ import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.function.MapFunction;
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
import org.apache.spark.sql.Dataset;
|
import org.apache.spark.sql.Dataset;
|
||||||
import org.apache.spark.sql.Encoders;
|
import org.apache.spark.sql.Encoders;
|
||||||
import org.apache.spark.sql.Row;
|
|
||||||
import org.apache.spark.sql.SaveMode;
|
import org.apache.spark.sql.SaveMode;
|
||||||
import org.apache.spark.sql.SparkSession;
|
import org.apache.spark.sql.SparkSession;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
|
@ -85,26 +84,19 @@ public class SparkCountryPropagationJob {
|
||||||
Dataset<R> res = readPath(spark, sourcePath, resultClazz);
|
Dataset<R> res = readPath(spark, sourcePath, resultClazz);
|
||||||
|
|
||||||
log.info("Reading prepared info: {}", preparedInfoPath);
|
log.info("Reading prepared info: {}", preparedInfoPath);
|
||||||
final Dataset<Row> preparedInfoRaw = spark
|
Dataset<ResultCountrySet> prepared = spark
|
||||||
.read()
|
.read()
|
||||||
.json(preparedInfoPath);
|
.json(preparedInfoPath)
|
||||||
|
.as(Encoders.bean(ResultCountrySet.class));
|
||||||
|
|
||||||
|
res
|
||||||
|
.joinWith(prepared, res.col("id").equalTo(prepared.col("resultId")), "left_outer")
|
||||||
|
.map(getCountryMergeFn(), Encoders.bean(resultClazz))
|
||||||
|
.write()
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.json(outputPath);
|
||||||
|
|
||||||
if (!preparedInfoRaw.isEmpty()) {
|
|
||||||
final Dataset<ResultCountrySet> prepared = preparedInfoRaw.as(Encoders.bean(ResultCountrySet.class));
|
|
||||||
res
|
|
||||||
.joinWith(prepared, res.col("id").equalTo(prepared.col("resultId")), "left_outer")
|
|
||||||
.map(getCountryMergeFn(), Encoders.bean(resultClazz))
|
|
||||||
.write()
|
|
||||||
.option("compression", "gzip")
|
|
||||||
.mode(SaveMode.Overwrite)
|
|
||||||
.json(outputPath);
|
|
||||||
} else {
|
|
||||||
res
|
|
||||||
.write()
|
|
||||||
.option("compression", "gzip")
|
|
||||||
.mode(SaveMode.Overwrite)
|
|
||||||
.json(outputPath);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static <R extends Result> MapFunction<Tuple2<R, ResultCountrySet>, R> getCountryMergeFn() {
|
private static <R extends Result> MapFunction<Tuple2<R, ResultCountrySet>, R> getCountryMergeFn() {
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -147,7 +147,6 @@ public class CleanGraphSparkJob {
|
||||||
.map((MapFunction<T, T>) GraphCleaningFunctions::fixVocabularyNames, Encoders.bean(clazz))
|
.map((MapFunction<T, T>) GraphCleaningFunctions::fixVocabularyNames, Encoders.bean(clazz))
|
||||||
.map((MapFunction<T, T>) value -> OafCleaner.apply(value, mapping), Encoders.bean(clazz))
|
.map((MapFunction<T, T>) value -> OafCleaner.apply(value, mapping), Encoders.bean(clazz))
|
||||||
.map((MapFunction<T, T>) value -> GraphCleaningFunctions.cleanup(value, vocs), Encoders.bean(clazz))
|
.map((MapFunction<T, T>) value -> GraphCleaningFunctions.cleanup(value, vocs), Encoders.bean(clazz))
|
||||||
.map((MapFunction<T, T>) GraphCleaningFunctions::dedicatedUglyHacks, Encoders.bean(clazz))
|
|
||||||
.filter((FilterFunction<T>) GraphCleaningFunctions::filter);
|
.filter((FilterFunction<T>) GraphCleaningFunctions::filter);
|
||||||
|
|
||||||
// read the master-duplicate tuples
|
// read the master-duplicate tuples
|
||||||
|
|
|
@ -85,7 +85,7 @@
|
||||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||||
</kill>
|
</kill>
|
||||||
|
|
||||||
<fork name="fork_downloads_csv">
|
<fork name="fork_downloads_csv">
|
||||||
<path start="download_gold"/>
|
<path start="download_gold"/>
|
||||||
<path start="download_doaj_json"/>
|
<path start="download_doaj_json"/>
|
||||||
</fork>
|
</fork>
|
||||||
|
@ -223,13 +223,11 @@
|
||||||
--executor-memory=${sparkExecutorMemory}
|
--executor-memory=${sparkExecutorMemory}
|
||||||
--executor-cores=${sparkExecutorCores}
|
--executor-cores=${sparkExecutorCores}
|
||||||
--driver-memory=${sparkDriverMemory}
|
--driver-memory=${sparkDriverMemory}
|
||||||
--conf spark.executor.memoryOverhead=${sparkExecutorMemory}
|
|
||||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
--conf spark.sql.shuffle.partitions=15000
|
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--hostedByMapPath</arg><arg>${hostedByMapPath}</arg>
|
<arg>--hostedByMapPath</arg><arg>${hostedByMapPath}</arg>
|
||||||
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
|
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
|
||||||
|
@ -255,13 +253,11 @@
|
||||||
--executor-memory=${sparkExecutorMemory}
|
--executor-memory=${sparkExecutorMemory}
|
||||||
--executor-cores=${sparkExecutorCores}
|
--executor-cores=${sparkExecutorCores}
|
||||||
--driver-memory=${sparkDriverMemory}
|
--driver-memory=${sparkDriverMemory}
|
||||||
--conf spark.executor.memoryOverhead=${sparkExecutorMemory}
|
|
||||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
--conf spark.sql.shuffle.partitions=15000
|
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--outputPath</arg><arg>${outputPath}/publication</arg>
|
<arg>--outputPath</arg><arg>${outputPath}/publication</arg>
|
||||||
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
|
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
|
||||||
|
@ -282,7 +278,6 @@
|
||||||
--executor-memory=${sparkExecutorMemory}
|
--executor-memory=${sparkExecutorMemory}
|
||||||
--executor-cores=${sparkExecutorCores}
|
--executor-cores=${sparkExecutorCores}
|
||||||
--driver-memory=${sparkDriverMemory}
|
--driver-memory=${sparkDriverMemory}
|
||||||
--conf spark.executor.memoryOverhead=${sparkExecutorMemory}
|
|
||||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
|
|
@ -25,8 +25,7 @@ SELECT
|
||||||
o.country || '@@@dnet:countries' AS country,
|
o.country || '@@@dnet:countries' AS country,
|
||||||
array[]::text[] AS alternativenames,
|
array[]::text[] AS alternativenames,
|
||||||
'sysimport:crosswalk:entityregistry@@@dnet:provenance_actions' AS provenanceaction,
|
'sysimport:crosswalk:entityregistry@@@dnet:provenance_actions' AS provenanceaction,
|
||||||
array_remove(array_agg(DISTINCT i.pid || '###' || i.issuertype || '@@@' || i.issuertype), NULL) AS pid,
|
array_remove(array_agg(DISTINCT i.pid || '###' || i.issuertype || '@@@' || i.issuertype), NULL) AS pid
|
||||||
'Unknown' AS typology
|
|
||||||
FROM dsm_organizations o
|
FROM dsm_organizations o
|
||||||
LEFT OUTER JOIN dsm_services d ON (d.id = o.collectedfrom)
|
LEFT OUTER JOIN dsm_services d ON (d.id = o.collectedfrom)
|
||||||
LEFT OUTER JOIN dsm_organizationpids p ON (p.organization = o.id)
|
LEFT OUTER JOIN dsm_organizationpids p ON (p.organization = o.id)
|
||||||
|
|
|
@ -0,0 +1,5 @@
|
||||||
|
[
|
||||||
|
{"paramName":"mt", "paramLongName":"master", "paramDescription": "should be local or yarn", "paramRequired": false},
|
||||||
|
{"paramName":"s", "paramLongName":"sourcePath", "paramDescription": "the source Path", "paramRequired": true},
|
||||||
|
{"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the path of the scholix dump", "paramRequired": true}
|
||||||
|
]
|
|
@ -0,0 +1,166 @@
|
||||||
|
{
|
||||||
|
"cites":{
|
||||||
|
"original":"Cites",
|
||||||
|
"inverse":"IsCitedBy"
|
||||||
|
},
|
||||||
|
"compiles":{
|
||||||
|
"original":"Compiles",
|
||||||
|
"inverse":"IsCompiledBy"
|
||||||
|
},
|
||||||
|
"continues":{
|
||||||
|
"original":"Continues",
|
||||||
|
"inverse":"IsContinuedBy"
|
||||||
|
},
|
||||||
|
"derives":{
|
||||||
|
"original":"IsSourceOf",
|
||||||
|
"inverse":"IsDerivedFrom"
|
||||||
|
},
|
||||||
|
"describes":{
|
||||||
|
"original":"Describes",
|
||||||
|
"inverse":"IsDescribedBy"
|
||||||
|
},
|
||||||
|
"documents":{
|
||||||
|
"original":"Documents",
|
||||||
|
"inverse":"IsDocumentedBy"
|
||||||
|
},
|
||||||
|
"hasmetadata":{
|
||||||
|
"original":"HasMetadata",
|
||||||
|
"inverse":"IsMetadataOf"
|
||||||
|
},
|
||||||
|
"hasassociationwith":{
|
||||||
|
"original":"HasAssociationWith",
|
||||||
|
"inverse":"HasAssociationWith"
|
||||||
|
},
|
||||||
|
"haspart":{
|
||||||
|
"original":"HasPart",
|
||||||
|
"inverse":"IsPartOf"
|
||||||
|
},
|
||||||
|
"hasversion":{
|
||||||
|
"original":"HasVersion",
|
||||||
|
"inverse":"IsVersionOf"
|
||||||
|
},
|
||||||
|
"iscitedby":{
|
||||||
|
"original":"IsCitedBy",
|
||||||
|
"inverse":"Cites"
|
||||||
|
},
|
||||||
|
"iscompiledby":{
|
||||||
|
"original":"IsCompiledBy",
|
||||||
|
"inverse":"Compiles"
|
||||||
|
},
|
||||||
|
"iscontinuedby":{
|
||||||
|
"original":"IsContinuedBy",
|
||||||
|
"inverse":"Continues"
|
||||||
|
},
|
||||||
|
"isderivedfrom":{
|
||||||
|
"original":"IsDerivedFrom",
|
||||||
|
"inverse":"IsSourceOf"
|
||||||
|
},
|
||||||
|
"isdescribedby":{
|
||||||
|
"original":"IsDescribedBy",
|
||||||
|
"inverse":"Describes"
|
||||||
|
},
|
||||||
|
"isdocumentedby":{
|
||||||
|
"original":"IsDocumentedBy",
|
||||||
|
"inverse":"Documents"
|
||||||
|
},
|
||||||
|
"isidenticalto":{
|
||||||
|
"original":"IsIdenticalTo",
|
||||||
|
"inverse":"IsIdenticalTo"
|
||||||
|
},
|
||||||
|
"ismetadatafor":{
|
||||||
|
"original":"IsMetadataFor",
|
||||||
|
"inverse":"IsMetadataOf"
|
||||||
|
},
|
||||||
|
"ismetadataof":{
|
||||||
|
"original":"IsMetadataOf",
|
||||||
|
"inverse":"IsMetadataFor"
|
||||||
|
},
|
||||||
|
"isnewversionof":{
|
||||||
|
"original":"IsNewVersionOf",
|
||||||
|
"inverse":"IsPreviousVersionOf"
|
||||||
|
},
|
||||||
|
"isobsoletedby":{
|
||||||
|
"original":"IsObsoletedBy",
|
||||||
|
"inverse":"Obsoletes"
|
||||||
|
},
|
||||||
|
"isoriginalformof":{
|
||||||
|
"original":"IsOriginalFormOf",
|
||||||
|
"inverse":"IsVariantFormOf"
|
||||||
|
},
|
||||||
|
"ispartof":{
|
||||||
|
"original":"IsPartOf",
|
||||||
|
"inverse":"HasPart"
|
||||||
|
},
|
||||||
|
"ispreviousversionof":{
|
||||||
|
"original":"IsPreviousVersionOf",
|
||||||
|
"inverse":"IsNewVersionOf"
|
||||||
|
},
|
||||||
|
"isreferencedby":{
|
||||||
|
"original":"IsReferencedBy",
|
||||||
|
"inverse":"References"
|
||||||
|
},
|
||||||
|
"isrelatedto":{
|
||||||
|
"original":"IsRelatedTo",
|
||||||
|
"inverse":"IsRelatedTo"
|
||||||
|
},
|
||||||
|
"isrequiredby":{
|
||||||
|
"original":"IsRequiredBy",
|
||||||
|
"inverse":"Requires"
|
||||||
|
},
|
||||||
|
"isreviewedby":{
|
||||||
|
"original":"IsReviewedBy",
|
||||||
|
"inverse":"Reviews"
|
||||||
|
},
|
||||||
|
"issourceof":{
|
||||||
|
"original":"IsSourceOf",
|
||||||
|
"inverse":"IsDerivedFrom"
|
||||||
|
},
|
||||||
|
"issupplementedby":{
|
||||||
|
"original":"IsSupplementedBy",
|
||||||
|
"inverse":"IsSupplementTo"
|
||||||
|
},
|
||||||
|
"issupplementto":{
|
||||||
|
"original":"IsSupplementTo",
|
||||||
|
"inverse":"IsSupplementedBy"
|
||||||
|
},
|
||||||
|
"isvariantformof":{
|
||||||
|
"original":"IsVariantFormOf",
|
||||||
|
"inverse":"IsOriginalFormOf"
|
||||||
|
},
|
||||||
|
"isversionof":{
|
||||||
|
"original":"IsVersionOf",
|
||||||
|
"inverse":"HasVersion"
|
||||||
|
},
|
||||||
|
"obsoletes":{
|
||||||
|
"original":"Obsoletes",
|
||||||
|
"inverse":"IsObsoletedBy"
|
||||||
|
},
|
||||||
|
"references":{
|
||||||
|
"original":"References",
|
||||||
|
"inverse":"IsReferencedBy"
|
||||||
|
},
|
||||||
|
"requires":{
|
||||||
|
"original":"Requires",
|
||||||
|
"inverse":"IsRequiredBy"
|
||||||
|
},
|
||||||
|
"related":{
|
||||||
|
"original":"IsRelatedTo",
|
||||||
|
"inverse":"IsRelatedTo"
|
||||||
|
},
|
||||||
|
"reviews":{
|
||||||
|
"original":"Reviews",
|
||||||
|
"inverse":"IsReviewedBy"
|
||||||
|
},
|
||||||
|
"unknown":{
|
||||||
|
"original":"Unknown",
|
||||||
|
"inverse":"Unknown"
|
||||||
|
},
|
||||||
|
"isamongtopnsimilardocuments": {
|
||||||
|
"original": "IsAmongTopNSimilarDocuments",
|
||||||
|
"inverse": "HasAmongTopNSimilarDocuments"
|
||||||
|
},
|
||||||
|
"hasamongtopnsimilardocuments": {
|
||||||
|
"original": "HasAmongTopNSimilarDocuments",
|
||||||
|
"inverse": "IsAmongTopNSimilarDocuments"
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,258 @@
|
||||||
|
package eu.dnetlib.dhp.sx.graph
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.{KeyValue, Result, StructuredProperty}
|
||||||
|
import eu.dnetlib.dhp.schema.sx.scholix.{
|
||||||
|
Scholix,
|
||||||
|
ScholixCollectedFrom,
|
||||||
|
ScholixEntityId,
|
||||||
|
ScholixIdentifier,
|
||||||
|
ScholixRelationship,
|
||||||
|
ScholixResource
|
||||||
|
}
|
||||||
|
import org.json4s
|
||||||
|
import org.json4s.DefaultFormats
|
||||||
|
import org.json4s.jackson.JsonMethods.parse
|
||||||
|
|
||||||
|
import scala.collection.JavaConverters._
|
||||||
|
import scala.io.Source
|
||||||
|
|
||||||
|
case class RelationInfo(
|
||||||
|
source: String,
|
||||||
|
target: String,
|
||||||
|
relclass: String,
|
||||||
|
id: String,
|
||||||
|
collectedfrom: Seq[RelKeyValue]
|
||||||
|
) {}
|
||||||
|
case class RelKeyValue(key: String, value: String) {}
|
||||||
|
|
||||||
|
object ScholexplorerUtils {
|
||||||
|
|
||||||
|
val OPENAIRE_IDENTIFIER_SCHEMA: String = "OpenAIRE Identifier"
|
||||||
|
val mapper = new ObjectMapper()
|
||||||
|
|
||||||
|
case class RelationVocabulary(original: String, inverse: String) {}
|
||||||
|
|
||||||
|
val relations: Map[String, RelationVocabulary] = {
|
||||||
|
val input = Source
|
||||||
|
.fromInputStream(
|
||||||
|
getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/relation/relations.json")
|
||||||
|
)
|
||||||
|
.mkString
|
||||||
|
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||||
|
|
||||||
|
lazy val json: json4s.JValue = parse(input)
|
||||||
|
|
||||||
|
json.extract[Map[String, RelationVocabulary]]
|
||||||
|
}
|
||||||
|
|
||||||
|
def invRel(rel: String): String = {
|
||||||
|
val semanticRelation = relations.getOrElse(rel.toLowerCase, null)
|
||||||
|
if (semanticRelation != null)
|
||||||
|
semanticRelation.inverse
|
||||||
|
else
|
||||||
|
null
|
||||||
|
}
|
||||||
|
|
||||||
|
def generateDatasourceOpenAIREURLS(id: String): String = {
|
||||||
|
if (id != null && id.length > 12)
|
||||||
|
s"https://explore.openaire.eu/search/dataprovider?datasourceId=${id.substring(3)}"
|
||||||
|
else
|
||||||
|
null
|
||||||
|
}
|
||||||
|
|
||||||
|
def findURLForPID(
|
||||||
|
pidValue: List[StructuredProperty],
|
||||||
|
urls: List[String]
|
||||||
|
): List[(StructuredProperty, String)] = {
|
||||||
|
pidValue.map { p =>
|
||||||
|
val pv = p.getValue
|
||||||
|
|
||||||
|
val r = urls.find(u => u.toLowerCase.contains(pv.toLowerCase))
|
||||||
|
(p, r.orNull)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def extractTypedIdentifierFromInstance(r: Result): List[ScholixIdentifier] = {
|
||||||
|
if (r.getInstance() == null || r.getInstance().isEmpty)
|
||||||
|
return List()
|
||||||
|
r.getInstance()
|
||||||
|
.asScala
|
||||||
|
.filter(i => i.getUrl != null && !i.getUrl.isEmpty)
|
||||||
|
.filter(i => i.getPid != null && i.getUrl != null)
|
||||||
|
.flatMap(i => findURLForPID(i.getPid.asScala.toList, i.getUrl.asScala.toList))
|
||||||
|
.map(i => new ScholixIdentifier(i._1.getValue, i._1.getQualifier.getClassid, i._2))
|
||||||
|
.distinct
|
||||||
|
.toList
|
||||||
|
}
|
||||||
|
|
||||||
|
def generateScholixResourceFromResult(result: Result): ScholixResource = {
|
||||||
|
|
||||||
|
if (result.getInstance() == null || result.getInstance().size() == 0)
|
||||||
|
return null
|
||||||
|
|
||||||
|
if (result.getPid == null || result.getPid.isEmpty)
|
||||||
|
return null
|
||||||
|
|
||||||
|
val r = new ScholixResource
|
||||||
|
r.setDnetIdentifier(result.getId)
|
||||||
|
|
||||||
|
val persistentIdentifiers: List[ScholixIdentifier] = extractTypedIdentifierFromInstance(result)
|
||||||
|
if (persistentIdentifiers.isEmpty)
|
||||||
|
return null
|
||||||
|
|
||||||
|
r.setIdentifier(persistentIdentifiers.asJava)
|
||||||
|
|
||||||
|
r.setObjectType(result.getResulttype.getClassid)
|
||||||
|
|
||||||
|
r.setObjectSubType(
|
||||||
|
result
|
||||||
|
.getInstance()
|
||||||
|
.asScala
|
||||||
|
.filter(i => i != null && i.getInstancetype != null)
|
||||||
|
.map(i => i.getInstancetype.getClassname)
|
||||||
|
.distinct
|
||||||
|
.head
|
||||||
|
)
|
||||||
|
|
||||||
|
if (result.getTitle != null && result.getTitle.asScala.nonEmpty) {
|
||||||
|
val titles: List[String] = result.getTitle.asScala.map(t => t.getValue).toList
|
||||||
|
if (titles.nonEmpty)
|
||||||
|
r.setTitle(titles.head)
|
||||||
|
else
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
if (result.getAuthor != null && !result.getAuthor.isEmpty) {
|
||||||
|
val authors: List[ScholixEntityId] =
|
||||||
|
result.getAuthor.asScala
|
||||||
|
.map(a => {
|
||||||
|
val entity = new ScholixEntityId()
|
||||||
|
entity.setName(a.getFullname)
|
||||||
|
if (a.getPid != null && a.getPid.size() > 0)
|
||||||
|
entity.setIdentifiers(
|
||||||
|
a.getPid.asScala
|
||||||
|
.map(sp => {
|
||||||
|
val id = new ScholixIdentifier()
|
||||||
|
id.setIdentifier(sp.getValue)
|
||||||
|
id.setSchema(sp.getQualifier.getClassid)
|
||||||
|
id
|
||||||
|
})
|
||||||
|
.take(3)
|
||||||
|
.toList
|
||||||
|
.asJava
|
||||||
|
)
|
||||||
|
entity
|
||||||
|
})
|
||||||
|
.toList
|
||||||
|
if (authors.nonEmpty)
|
||||||
|
r.setCreator(authors.asJava)
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
val dt: List[String] = result
|
||||||
|
.getInstance()
|
||||||
|
.asScala
|
||||||
|
.filter(i => i.getDateofacceptance != null)
|
||||||
|
.map(i => i.getDateofacceptance.getValue)
|
||||||
|
.toList
|
||||||
|
if (dt.nonEmpty)
|
||||||
|
r.setPublicationDate(dt.distinct.head)
|
||||||
|
|
||||||
|
r.setPublisher(
|
||||||
|
result
|
||||||
|
.getInstance()
|
||||||
|
.asScala
|
||||||
|
.map(i => i.getHostedby)
|
||||||
|
.filter(h => !"unknown".equalsIgnoreCase(h.getValue))
|
||||||
|
.map(h => {
|
||||||
|
val eid = new ScholixEntityId()
|
||||||
|
eid.setName(h.getValue)
|
||||||
|
val id = new ScholixIdentifier()
|
||||||
|
id.setIdentifier(h.getKey)
|
||||||
|
id.setSchema(OPENAIRE_IDENTIFIER_SCHEMA)
|
||||||
|
id.setUrl(generateDatasourceOpenAIREURLS(h.getKey))
|
||||||
|
eid.setIdentifiers(List(id).asJava)
|
||||||
|
eid
|
||||||
|
})
|
||||||
|
.distinct
|
||||||
|
.asJava
|
||||||
|
)
|
||||||
|
|
||||||
|
r.setCollectedFrom(
|
||||||
|
result.getCollectedfrom.asScala
|
||||||
|
.map(cf => {
|
||||||
|
val scf = new ScholixCollectedFrom()
|
||||||
|
scf.setProvisionMode("collected")
|
||||||
|
scf.setCompletionStatus("complete")
|
||||||
|
val eid = new ScholixEntityId()
|
||||||
|
eid.setName(cf.getValue)
|
||||||
|
val id = new ScholixIdentifier()
|
||||||
|
id.setIdentifier(cf.getKey)
|
||||||
|
id.setSchema(OPENAIRE_IDENTIFIER_SCHEMA)
|
||||||
|
id.setUrl(generateDatasourceOpenAIREURLS(cf.getKey))
|
||||||
|
eid.setIdentifiers(List(id).asJava)
|
||||||
|
scf.setProvider(eid)
|
||||||
|
scf
|
||||||
|
})
|
||||||
|
.asJava
|
||||||
|
)
|
||||||
|
|
||||||
|
r
|
||||||
|
}
|
||||||
|
|
||||||
|
def generateScholix(relation: RelationInfo, source: ScholixResource): Scholix = {
|
||||||
|
val s: Scholix = new Scholix
|
||||||
|
s.setSource(source)
|
||||||
|
if (relation.collectedfrom != null && relation.collectedfrom.nonEmpty)
|
||||||
|
s.setLinkprovider(
|
||||||
|
relation.collectedfrom
|
||||||
|
.map(cf => {
|
||||||
|
val eid = new ScholixEntityId()
|
||||||
|
eid.setName(cf.value)
|
||||||
|
val id = new ScholixIdentifier()
|
||||||
|
id.setIdentifier(cf.key)
|
||||||
|
id.setSchema(OPENAIRE_IDENTIFIER_SCHEMA)
|
||||||
|
id.setUrl(generateDatasourceOpenAIREURLS(cf.key))
|
||||||
|
eid.setIdentifiers(List(id).asJava)
|
||||||
|
eid
|
||||||
|
})
|
||||||
|
.toList
|
||||||
|
.asJava
|
||||||
|
)
|
||||||
|
else {
|
||||||
|
val eid = new ScholixEntityId()
|
||||||
|
eid.setName("OpenAIRE")
|
||||||
|
val id = new ScholixIdentifier()
|
||||||
|
id.setIdentifier("10|infrastruct_::f66f1bd369679b5b077dcdf006089556")
|
||||||
|
id.setSchema(OPENAIRE_IDENTIFIER_SCHEMA)
|
||||||
|
id.setUrl(generateDatasourceOpenAIREURLS(id.getIdentifier))
|
||||||
|
eid.setIdentifiers(List(id).asJava)
|
||||||
|
s.setLinkprovider(List(eid).asJava)
|
||||||
|
}
|
||||||
|
s.setIdentifier(relation.id)
|
||||||
|
val semanticRelation = relations.getOrElse(relation.relclass.toLowerCase, null)
|
||||||
|
if (semanticRelation == null)
|
||||||
|
return null
|
||||||
|
s.setRelationship(
|
||||||
|
new ScholixRelationship(semanticRelation.original, "datacite", semanticRelation.inverse)
|
||||||
|
)
|
||||||
|
s.setPublicationDate(source.getPublicationDate)
|
||||||
|
s.setPublisher(source.getPublisher)
|
||||||
|
val mockTarget = new ScholixResource
|
||||||
|
mockTarget.setDnetIdentifier(relation.target)
|
||||||
|
s.setTarget(mockTarget)
|
||||||
|
s
|
||||||
|
}
|
||||||
|
|
||||||
|
def updateTarget(s: Scholix, t: ScholixResource): String = {
|
||||||
|
|
||||||
|
s.setTarget(t)
|
||||||
|
val spublishers: Seq[ScholixEntityId] =
|
||||||
|
if (s.getPublisher != null && !s.getPublisher.isEmpty) s.getPublisher.asScala else List()
|
||||||
|
val tpublishers: Seq[ScholixEntityId] =
|
||||||
|
if (t.getPublisher != null && !t.getPublisher.isEmpty) t.getPublisher.asScala else List()
|
||||||
|
val mergedPublishers = spublishers.union(tpublishers).distinct.take(10).toList
|
||||||
|
s.setPublisher(mergedPublishers.asJava)
|
||||||
|
mapper.writeValueAsString(s)
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,141 @@
|
||||||
|
package eu.dnetlib.dhp.sx.graph
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.AbstractScalaApplication
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.{
|
||||||
|
KeyValue,
|
||||||
|
OtherResearchProduct,
|
||||||
|
Publication,
|
||||||
|
Relation,
|
||||||
|
Result,
|
||||||
|
Software,
|
||||||
|
Dataset => OafDataset
|
||||||
|
}
|
||||||
|
import eu.dnetlib.dhp.schema.sx.scholix.{Scholix, ScholixResource}
|
||||||
|
import org.apache.spark.sql.functions.{col, concat, expr, first, md5}
|
||||||
|
import org.apache.spark.sql.types.StructType
|
||||||
|
import org.apache.spark.sql._
|
||||||
|
import org.slf4j.{Logger, LoggerFactory}
|
||||||
|
|
||||||
|
class SparkCreateScholexplorerDump(propertyPath: String, args: Array[String], log: Logger)
|
||||||
|
extends AbstractScalaApplication(propertyPath, args, log: Logger) {
|
||||||
|
|
||||||
|
/** Here all the spark applications runs this method
|
||||||
|
* where the whole logic of the spark node is defined
|
||||||
|
*/
|
||||||
|
override def run(): Unit = {
|
||||||
|
val sourcePath = parser.get("sourcePath")
|
||||||
|
log.info("sourcePath: {}", sourcePath)
|
||||||
|
val targetPath = parser.get("targetPath")
|
||||||
|
log.info("targetPath: {}", targetPath)
|
||||||
|
generateBidirectionalRelations(sourcePath, targetPath, spark)
|
||||||
|
generateScholixResource(sourcePath, targetPath, spark)
|
||||||
|
generateScholix(targetPath, spark)
|
||||||
|
}
|
||||||
|
|
||||||
|
def generateScholixResource(inputPath: String, outputPath: String, spark: SparkSession): Unit = {
|
||||||
|
val entityMap: Map[String, StructType] = Map(
|
||||||
|
"publication" -> Encoders.bean(classOf[Publication]).schema,
|
||||||
|
"dataset" -> Encoders.bean(classOf[OafDataset]).schema,
|
||||||
|
"software" -> Encoders.bean(classOf[Software]).schema,
|
||||||
|
"otherresearchproduct" -> Encoders.bean(classOf[OtherResearchProduct]).schema
|
||||||
|
)
|
||||||
|
|
||||||
|
implicit val scholixResourceEncoder: Encoder[ScholixResource] = Encoders.bean(classOf[ScholixResource])
|
||||||
|
implicit val resultEncoder: Encoder[Result] = Encoders.bean(classOf[Result])
|
||||||
|
|
||||||
|
val resDs = spark.emptyDataset[ScholixResource]
|
||||||
|
val scholixResourceDS = entityMap.foldLeft[Dataset[ScholixResource]](resDs)((res, item) => {
|
||||||
|
println(s"adding ${item._1}")
|
||||||
|
res.union(
|
||||||
|
spark.read
|
||||||
|
.schema(item._2)
|
||||||
|
.json(s"$inputPath/${item._1}")
|
||||||
|
.as[Result]
|
||||||
|
.map(r => ScholexplorerUtils.generateScholixResourceFromResult(r))
|
||||||
|
.filter(s => s != null)
|
||||||
|
)
|
||||||
|
})
|
||||||
|
scholixResourceDS.write.mode(SaveMode.Overwrite).save(s"$outputPath/resource")
|
||||||
|
}
|
||||||
|
|
||||||
|
def generateBidirectionalRelations(inputPath: String, otuputPath: String, spark: SparkSession): Unit = {
|
||||||
|
val relSchema = Encoders.bean(classOf[Relation]).schema
|
||||||
|
|
||||||
|
val relDF = spark.read
|
||||||
|
.schema(relSchema)
|
||||||
|
.json(s"$inputPath/relation")
|
||||||
|
.where(
|
||||||
|
"datainfo.deletedbyinference is false and source like '50%' and target like '50%' " +
|
||||||
|
"and relClass <> 'merges' and relClass <> 'isMergedIn'"
|
||||||
|
)
|
||||||
|
.select("source", "target", "collectedfrom", "relClass")
|
||||||
|
|
||||||
|
def invRel: String => String = { s =>
|
||||||
|
ScholexplorerUtils.invRel(s)
|
||||||
|
}
|
||||||
|
|
||||||
|
import org.apache.spark.sql.functions.udf
|
||||||
|
val inverseRelationUDF = udf(invRel)
|
||||||
|
val inverseRelation = relDF.select(
|
||||||
|
col("target").alias("source"),
|
||||||
|
col("source").alias("target"),
|
||||||
|
col("collectedfrom"),
|
||||||
|
inverseRelationUDF(col("relClass")).alias("relClass")
|
||||||
|
)
|
||||||
|
|
||||||
|
val bidRel = inverseRelation
|
||||||
|
.union(relDF)
|
||||||
|
.withColumn("id", md5(concat(col("source"), col("relClass"), col("target"))))
|
||||||
|
.withColumn("cf", expr("transform(collectedfrom, x -> struct(x.key, x.value))"))
|
||||||
|
.drop("collectedfrom")
|
||||||
|
.withColumnRenamed("cf", "collectedfrom")
|
||||||
|
.groupBy(col("id"))
|
||||||
|
.agg(
|
||||||
|
first("source").alias("source"),
|
||||||
|
first("target").alias("target"),
|
||||||
|
first("relClass").alias("relClass"),
|
||||||
|
first("collectedfrom").alias("collectedfrom")
|
||||||
|
)
|
||||||
|
|
||||||
|
bidRel.write.mode(SaveMode.Overwrite).save(s"$otuputPath/relation")
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
def generateScholix(outputPath: String, spark: SparkSession): Unit = {
|
||||||
|
implicit val scholixResourceEncoder: Encoder[ScholixResource] = Encoders.bean(classOf[ScholixResource])
|
||||||
|
implicit val scholixEncoder: Encoder[Scholix] = Encoders.kryo(classOf[Scholix])
|
||||||
|
|
||||||
|
import spark.implicits._
|
||||||
|
val relations = spark.read.load(s"$outputPath/relation").as[RelationInfo]
|
||||||
|
val resource = spark.read.load(s"$outputPath/resource").as[ScholixResource]
|
||||||
|
|
||||||
|
val scholix_one_verse = relations
|
||||||
|
.joinWith(resource, relations("source") === resource("dnetIdentifier"), "inner")
|
||||||
|
.map(res => ScholexplorerUtils.generateScholix(res._1, res._2))
|
||||||
|
.map(s => (s.getIdentifier, s))(Encoders.tuple(Encoders.STRING, Encoders.kryo(classOf[Scholix])))
|
||||||
|
|
||||||
|
val resourceTarget = relations
|
||||||
|
.joinWith(resource, relations("target") === resource("dnetIdentifier"), "inner")
|
||||||
|
.map(res => (res._1.id, res._2))(Encoders.tuple(Encoders.STRING, Encoders.kryo(classOf[ScholixResource])))
|
||||||
|
|
||||||
|
scholix_one_verse
|
||||||
|
.joinWith(resourceTarget, scholix_one_verse("_1") === resourceTarget("_1"), "inner")
|
||||||
|
.map(k => ScholexplorerUtils.updateTarget(k._1._2, k._2._2))
|
||||||
|
.write
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.text(s"$outputPath/scholix")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
object SparkCreateScholexplorerDump {
|
||||||
|
val logger: Logger = LoggerFactory.getLogger(SparkCreateScholexplorerDump.getClass)
|
||||||
|
|
||||||
|
def main(args: Array[String]): Unit = {
|
||||||
|
new SparkCreateScholexplorerDump(
|
||||||
|
log = logger,
|
||||||
|
args = args,
|
||||||
|
propertyPath = "/eu/dnetlib/dhp/sx/create_scholix_dump_params.json"
|
||||||
|
).initialize().run()
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,26 @@
|
||||||
|
package eu.dnetlib.dhp.sx.graph.scholix
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.sx.scholix.ScholixResource
|
||||||
|
import eu.dnetlib.dhp.sx.graph.SparkCreateScholexplorerDump
|
||||||
|
import org.apache.spark.SparkConf
|
||||||
|
import org.apache.spark.sql.{Encoder, Encoders, SparkSession}
|
||||||
|
import org.junit.jupiter.api.Test
|
||||||
|
import org.objenesis.strategy.StdInstantiatorStrategy
|
||||||
|
|
||||||
|
class ScholixGenerationTest {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
def generateScholix(): Unit = {
|
||||||
|
|
||||||
|
val spark: SparkSession = SparkSession.builder().master("local[*]").getOrCreate()
|
||||||
|
val app = new SparkCreateScholexplorerDump(null, null, null)
|
||||||
|
// app.generateScholixResource("/home/sandro/Downloads/scholix_sample/", "/home/sandro/Downloads/scholix/", spark)
|
||||||
|
// app.generateBidirectionalRelations(
|
||||||
|
// "/home/sandro/Downloads/scholix_sample/",
|
||||||
|
// "/home/sandro/Downloads/scholix/",
|
||||||
|
// spark
|
||||||
|
// )
|
||||||
|
app.generateScholix("/home/sandro/Downloads/scholix/", spark)
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue