[eoscDump] ack to add not deduplicated datasources + oaffulltext at the level of the result

This commit is contained in:
Miriam Baglioni 2023-07-10 18:42:28 +02:00
parent ca94995e77
commit 831a4611d3
7 changed files with 198 additions and 84 deletions

View File

@ -142,6 +142,16 @@ public class Result implements Serializable {
@JsonSchema(description = "The set of relations associated to this result")
private List<Relation> relations;
private List<String> fulltext;
public List<String> getFulltext() {
return fulltext;
}
public void setFulltext(List<String> fulltext) {
this.fulltext = fulltext;
}
public Long getLastupdatetimestamp() {
return lastupdatetimestamp;
}

View File

@ -60,6 +60,7 @@ public class ResultMapper implements Serializable {
mapSubject(out, input);
out.setType(input.getResulttype().getClassid());
mapContext(communityMap, out, input);
mapFulltext(out, input);
} catch (ClassCastException cce) {
return null;
}
@ -69,6 +70,11 @@ public class ResultMapper implements Serializable {
}
private static void mapFulltext(Result out, eu.dnetlib.dhp.schema.oaf.Result input) {
if (Optional.ofNullable(input.getFulltext()).isPresent() && !input.getFulltext().isEmpty())
out.setFulltext(input.getFulltext().stream().map(ft -> ft.getValue()).collect(Collectors.toList()));
}
private static void mapContext(Map<String, String> communityMap, Result out,
eu.dnetlib.dhp.schema.oaf.Result input) {
Set<String> communities = communityMap.keySet();

View File

@ -91,9 +91,9 @@ public class SendToZenodoHDFS implements Serializable {
zenodoApiClient.sendMretadata(metadata);
}
if (Boolean.TRUE.equals(publish)) {
zenodoApiClient.publish();
}
// if (Boolean.TRUE.equals(publish)) {
// zenodoApiClient.publish();
// }
}
}

View File

@ -77,10 +77,23 @@ public class SelectEoscResultsJobStep1 implements Serializable {
.readPath(spark, inputPath, inputClazz)
.filter(
(FilterFunction<R>) r -> !r.getDataInfo().getDeletedbyinference() && !r.getDataInfo().getInvisible()
&& (r.getContext().stream().anyMatch(c -> c.getId().equals("eosc")) || r
.getCollectedfrom()
&& (r.getContext().stream().anyMatch(c -> c.getId().equals("eosc")) ||
r
.getCollectedfrom()
.stream()
.anyMatch(cf -> cf.getValue().equalsIgnoreCase("B2FIND")))
||
r.getInstance().stream().anyMatch(i -> i.getHostedby().getValue().equalsIgnoreCase("ARCHE")) ||
r
.getInstance()
.stream()
.anyMatch(cf -> cf.getValue().equalsIgnoreCase("B2FIND"))))
.anyMatch(i -> i.getHostedby().getValue().equalsIgnoreCase("LINDAT/CLARIN repository"))
||
r
.getInstance()
.stream()
.anyMatch(
i -> i.getHostedby().getValue().equalsIgnoreCase("Publications at Bielefeld University")))
.map(
(MapFunction<R, Result>) r -> (Result) ResultMapper

View File

@ -101,49 +101,81 @@ public class ZenodoAPIClient implements Serializable {
return responseCode;
}
public int uploadIS2(InputStream is, String fileName) throws IOException {
// public int newDeposition() throws IOException {
// String json = "{}";
// OkHttpClient httpClient = new OkHttpClient.Builder().connectTimeout(600, TimeUnit.SECONDS).build();
//
// RequestBody body = RequestBody.create(json, MEDIA_TYPE_JSON);
//
// Request request = new Request.Builder()
// .url(urlString)
// .addHeader("Content-Type", "application/json") // add request headers
// .addHeader("Authorization", "Bearer " + access_token)
// .post(body)
// .build();
//
// try (Response response = httpClient.newCall(request).execute()) {
//
// if (!response.isSuccessful())
// throw new IOException("Unexpected code " + response + response.body().string());
//
// // Get response body
// json = response.body().string();
//
// eu.dnetlib.dhp.common.api.zenodo.ZenodoModel newSubmission = new Gson()
// .fromJson(json, eu.dnetlib.dhp.common.api.zenodo.ZenodoModel.class);
// this.bucket = newSubmission.getLinks().getBucket();
// this.deposition_id = newSubmission.getId();
//
// return response.code();
//
// }
//
// }
final String crlf = "\r\n";
final String twoHyphens = "--";
final String boundary = "*****";
// public int uploadIS2(InputStream is, String fileName) throws IOException {
//
// final String crlf = "\r\n";
// final String twoHyphens = "--";
// final String boundary = "*****";
//
// HttpPut put = new HttpPut(bucket + "/" + fileName);
//
// put.addHeader(HttpHeaders.CONTENT_TYPE, "application/zip");
// put.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
//
// put.setEntity(new InputStreamEntity(is));
//
// int statusCode;
// try (CloseableHttpClient client = HttpClients.createDefault()) {
// CloseableHttpResponse response = client.execute(put);
// statusCode = response.getStatusLine().getStatusCode();
//
// }
//
// if (!checkOKStatus(statusCode)) {
// throw new IOException("Unexpected code " + statusCode);
// }
//
// return statusCode;
// }
HttpPut put = new HttpPut(bucket + "/" + fileName);
put.addHeader(HttpHeaders.CONTENT_TYPE, "application/zip");
put.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
put.setEntity(new InputStreamEntity(is));
int statusCode;
try (CloseableHttpClient client = HttpClients.createDefault()) {
CloseableHttpResponse response = client.execute(put);
statusCode = response.getStatusLine().getStatusCode();
}
if (!checkOKStatus(statusCode)) {
throw new IOException("Unexpected code " + statusCode);
}
return statusCode;
}
public int publish() throws IOException {
String json = "{}";
HttpPost post = new HttpPost(urlString + "/" + deposition_id + "/actions/publish");
post.addHeader(HttpHeaders.CONTENT_TYPE, "application/json");
post.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
post.setEntity(new StringEntity(json));
int statusCode;
try (CloseableHttpClient client = HttpClients.createDefault()) {
CloseableHttpResponse response = client.execute(post);
statusCode = response.getStatusLine().getStatusCode();
}
if (!checkOKStatus(statusCode)) {
throw new IOException("Unexpected code " + statusCode);
}
return statusCode;
}
// public int publish() throws IOException {
// String json = "{}";
// HttpPost post = new HttpPost(urlString + "/" + deposition_id + "/actions/publish");
// post.addHeader(HttpHeaders.CONTENT_TYPE, "application/json");
// post.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
// post.setEntity(new StringEntity(json));
// int statusCode;
// try (CloseableHttpClient client = HttpClients.createDefault()) {
// CloseableHttpResponse response = client.execute(post);
// statusCode = response.getStatusLine().getStatusCode();
// }
// if (!checkOKStatus(statusCode)) {
// throw new IOException("Unexpected code " + statusCode);
// }
// return statusCode;
// }
/**
* Upload files in Zenodo.
@ -266,38 +298,65 @@ public class ZenodoAPIClient implements Serializable {
* @throws IOException
* @throws MissingConceptDoiException
*/
// public int newVersion(String concept_rec_id) throws Exception, MissingConceptDoiException {
// setDepositionId(concept_rec_id, 1);
// String json = "{}";
//
// URL url = new URL(urlString + "/" + deposition_id + "/actions/newversion");
// HttpURLConnection conn = (HttpURLConnection) url.openConnection();
//
// conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
// conn.setDoOutput(true);
// conn.setRequestMethod("POST");
//
// try (OutputStream os = conn.getOutputStream()) {
// byte[] input = json.getBytes("utf-8");
// os.write(input, 0, input.length);
//
// }
//
// String body = getBody(conn);
//
// int responseCode = conn.getResponseCode();
//
// conn.disconnect();
// if (!checkOKStatus(responseCode))
// throw new IOException("Unexpected code " + responseCode + body);
//
// ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class);
// String latest_draft = zenodoModel.getLinks().getLatest_draft();
// deposition_id = latest_draft.substring(latest_draft.lastIndexOf("/") + 1);
// bucket = getBucket(latest_draft);
//
// return responseCode;
//
// }
public int newVersion(String concept_rec_id) throws Exception, MissingConceptDoiException {
setDepositionId(concept_rec_id, 1);
String json = "{}";
URL url = new URL(urlString + "/" + deposition_id + "/actions/newversion");
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
OkHttpClient httpClient = new OkHttpClient.Builder().connectTimeout(600, TimeUnit.SECONDS).build();
conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
conn.setDoOutput(true);
conn.setRequestMethod("POST");
RequestBody body = RequestBody.create(json, MEDIA_TYPE_JSON);
try (OutputStream os = conn.getOutputStream()) {
byte[] input = json.getBytes("utf-8");
os.write(input, 0, input.length);
Request request = new Request.Builder()
.url(urlString + "/" + deposition_id + "/actions/newversion")
.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token)
.post(body)
.build();
try (Response response = httpClient.newCall(request).execute()) {
if (!response.isSuccessful())
throw new IOException("Unexpected code " + response + response.body().string());
ZenodoModel zenodoModel = new Gson().fromJson(response.body().string(), ZenodoModel.class);
String latest_draft = zenodoModel.getLinks().getLatest_draft();
deposition_id = latest_draft.substring(latest_draft.lastIndexOf("/") + 1);
bucket = getBucket(latest_draft);
return response.code();
}
String body = getBody(conn);
int responseCode = conn.getResponseCode();
conn.disconnect();
if (!checkOKStatus(responseCode))
throw new IOException("Unexpected code " + responseCode + body);
ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class);
String latest_draft = zenodoModel.getLinks().getLatest_draft();
deposition_id = latest_draft.substring(latest_draft.lastIndexOf("/") + 1);
bucket = getBucket(latest_draft);
return responseCode;
}
/**
@ -361,20 +420,46 @@ public class ZenodoAPIClient implements Serializable {
}
private String getPrevDepositions(String page) throws Exception {
// private String getPrevDepositions(String page) throws Exception {
//
// HttpGet get = new HttpGet(urlString);
// URI uri = new URIBuilder(get.getURI()).addParameter("page", page).build();
//
// get.setURI(uri);
//
// get.addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString());
// get.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
// try (CloseableHttpClient client = HttpClients.createDefault()) {
// CloseableHttpResponse response = client.execute(get);
// final String body = EntityUtils.toString(response.getEntity(), StandardCharsets.UTF_8);
// return body;
// }
// }
HttpGet get = new HttpGet(urlString);
URI uri = new URIBuilder(get.getURI()).addParameter("page", page).build();
private String getPrevDepositions(String page) throws IOException {
get.setURI(uri);
OkHttpClient httpClient = new OkHttpClient.Builder().connectTimeout(600, TimeUnit.SECONDS).build();
HttpUrl.Builder urlBuilder = HttpUrl.parse(urlString).newBuilder();
urlBuilder.addQueryParameter("page", page);
String url = urlBuilder.build().toString();
Request request = new Request.Builder()
.url(url)
.addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()) // add request headers
.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token)
.get()
.build();
try (Response response = httpClient.newCall(request).execute()) {
if (!response.isSuccessful())
throw new IOException("Unexpected code " + response + response.body().string());
return response.body().string();
get.addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString());
get.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
try (CloseableHttpClient client = HttpClients.createDefault()) {
CloseableHttpResponse response = client.execute(get);
final String body = EntityUtils.toString(response.getEntity(), StandardCharsets.UTF_8);
return body;
}
}
private String getBucket(String inputUurl) throws IOException {

View File

@ -85,7 +85,7 @@
</property>
</configuration>
</global>
<start to="send_zenodo"/>
<start to="save_community_map"/>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>

View File

@ -85,7 +85,7 @@ public class ZenodoUploadTest {
System.out.println(client.sendMretadata(metadata));
System.out.println(client.publish());
// System.out.println(client.publish());
}
@ -136,7 +136,7 @@ public class ZenodoUploadTest {
}
System.out.println(client.publish());
// System.out.println(client.publish());
}