[eoscDump] ack to add not deduplicated datasources + oaffulltext at the level of the result

This commit is contained in:
Miriam Baglioni 2023-07-10 18:42:28 +02:00
parent ca94995e77
commit 831a4611d3
7 changed files with 198 additions and 84 deletions

View File

@ -142,6 +142,16 @@ public class Result implements Serializable {
@JsonSchema(description = "The set of relations associated to this result") @JsonSchema(description = "The set of relations associated to this result")
private List<Relation> relations; private List<Relation> relations;
private List<String> fulltext;
public List<String> getFulltext() {
return fulltext;
}
public void setFulltext(List<String> fulltext) {
this.fulltext = fulltext;
}
public Long getLastupdatetimestamp() { public Long getLastupdatetimestamp() {
return lastupdatetimestamp; return lastupdatetimestamp;
} }

View File

@ -60,6 +60,7 @@ public class ResultMapper implements Serializable {
mapSubject(out, input); mapSubject(out, input);
out.setType(input.getResulttype().getClassid()); out.setType(input.getResulttype().getClassid());
mapContext(communityMap, out, input); mapContext(communityMap, out, input);
mapFulltext(out, input);
} catch (ClassCastException cce) { } catch (ClassCastException cce) {
return null; return null;
} }
@ -69,6 +70,11 @@ public class ResultMapper implements Serializable {
} }
private static void mapFulltext(Result out, eu.dnetlib.dhp.schema.oaf.Result input) {
if (Optional.ofNullable(input.getFulltext()).isPresent() && !input.getFulltext().isEmpty())
out.setFulltext(input.getFulltext().stream().map(ft -> ft.getValue()).collect(Collectors.toList()));
}
private static void mapContext(Map<String, String> communityMap, Result out, private static void mapContext(Map<String, String> communityMap, Result out,
eu.dnetlib.dhp.schema.oaf.Result input) { eu.dnetlib.dhp.schema.oaf.Result input) {
Set<String> communities = communityMap.keySet(); Set<String> communities = communityMap.keySet();

View File

@ -91,9 +91,9 @@ public class SendToZenodoHDFS implements Serializable {
zenodoApiClient.sendMretadata(metadata); zenodoApiClient.sendMretadata(metadata);
} }
if (Boolean.TRUE.equals(publish)) { // if (Boolean.TRUE.equals(publish)) {
zenodoApiClient.publish(); // zenodoApiClient.publish();
} // }
} }
} }

View File

@ -77,10 +77,23 @@ public class SelectEoscResultsJobStep1 implements Serializable {
.readPath(spark, inputPath, inputClazz) .readPath(spark, inputPath, inputClazz)
.filter( .filter(
(FilterFunction<R>) r -> !r.getDataInfo().getDeletedbyinference() && !r.getDataInfo().getInvisible() (FilterFunction<R>) r -> !r.getDataInfo().getDeletedbyinference() && !r.getDataInfo().getInvisible()
&& (r.getContext().stream().anyMatch(c -> c.getId().equals("eosc")) || r && (r.getContext().stream().anyMatch(c -> c.getId().equals("eosc")) ||
.getCollectedfrom() r
.getCollectedfrom()
.stream()
.anyMatch(cf -> cf.getValue().equalsIgnoreCase("B2FIND")))
||
r.getInstance().stream().anyMatch(i -> i.getHostedby().getValue().equalsIgnoreCase("ARCHE")) ||
r
.getInstance()
.stream() .stream()
.anyMatch(cf -> cf.getValue().equalsIgnoreCase("B2FIND")))) .anyMatch(i -> i.getHostedby().getValue().equalsIgnoreCase("LINDAT/CLARIN repository"))
||
r
.getInstance()
.stream()
.anyMatch(
i -> i.getHostedby().getValue().equalsIgnoreCase("Publications at Bielefeld University")))
.map( .map(
(MapFunction<R, Result>) r -> (Result) ResultMapper (MapFunction<R, Result>) r -> (Result) ResultMapper

View File

@ -101,49 +101,81 @@ public class ZenodoAPIClient implements Serializable {
return responseCode; return responseCode;
} }
public int uploadIS2(InputStream is, String fileName) throws IOException { // public int newDeposition() throws IOException {
// String json = "{}";
// OkHttpClient httpClient = new OkHttpClient.Builder().connectTimeout(600, TimeUnit.SECONDS).build();
//
// RequestBody body = RequestBody.create(json, MEDIA_TYPE_JSON);
//
// Request request = new Request.Builder()
// .url(urlString)
// .addHeader("Content-Type", "application/json") // add request headers
// .addHeader("Authorization", "Bearer " + access_token)
// .post(body)
// .build();
//
// try (Response response = httpClient.newCall(request).execute()) {
//
// if (!response.isSuccessful())
// throw new IOException("Unexpected code " + response + response.body().string());
//
// // Get response body
// json = response.body().string();
//
// eu.dnetlib.dhp.common.api.zenodo.ZenodoModel newSubmission = new Gson()
// .fromJson(json, eu.dnetlib.dhp.common.api.zenodo.ZenodoModel.class);
// this.bucket = newSubmission.getLinks().getBucket();
// this.deposition_id = newSubmission.getId();
//
// return response.code();
//
// }
//
// }
final String crlf = "\r\n"; // public int uploadIS2(InputStream is, String fileName) throws IOException {
final String twoHyphens = "--"; //
final String boundary = "*****"; // final String crlf = "\r\n";
// final String twoHyphens = "--";
// final String boundary = "*****";
//
// HttpPut put = new HttpPut(bucket + "/" + fileName);
//
// put.addHeader(HttpHeaders.CONTENT_TYPE, "application/zip");
// put.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
//
// put.setEntity(new InputStreamEntity(is));
//
// int statusCode;
// try (CloseableHttpClient client = HttpClients.createDefault()) {
// CloseableHttpResponse response = client.execute(put);
// statusCode = response.getStatusLine().getStatusCode();
//
// }
//
// if (!checkOKStatus(statusCode)) {
// throw new IOException("Unexpected code " + statusCode);
// }
//
// return statusCode;
// }
HttpPut put = new HttpPut(bucket + "/" + fileName); // public int publish() throws IOException {
// String json = "{}";
put.addHeader(HttpHeaders.CONTENT_TYPE, "application/zip"); // HttpPost post = new HttpPost(urlString + "/" + deposition_id + "/actions/publish");
put.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); // post.addHeader(HttpHeaders.CONTENT_TYPE, "application/json");
// post.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
put.setEntity(new InputStreamEntity(is)); // post.setEntity(new StringEntity(json));
// int statusCode;
int statusCode; // try (CloseableHttpClient client = HttpClients.createDefault()) {
try (CloseableHttpClient client = HttpClients.createDefault()) { // CloseableHttpResponse response = client.execute(post);
CloseableHttpResponse response = client.execute(put); // statusCode = response.getStatusLine().getStatusCode();
statusCode = response.getStatusLine().getStatusCode(); // }
// if (!checkOKStatus(statusCode)) {
} // throw new IOException("Unexpected code " + statusCode);
// }
if (!checkOKStatus(statusCode)) { // return statusCode;
throw new IOException("Unexpected code " + statusCode); // }
}
return statusCode;
}
public int publish() throws IOException {
String json = "{}";
HttpPost post = new HttpPost(urlString + "/" + deposition_id + "/actions/publish");
post.addHeader(HttpHeaders.CONTENT_TYPE, "application/json");
post.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
post.setEntity(new StringEntity(json));
int statusCode;
try (CloseableHttpClient client = HttpClients.createDefault()) {
CloseableHttpResponse response = client.execute(post);
statusCode = response.getStatusLine().getStatusCode();
}
if (!checkOKStatus(statusCode)) {
throw new IOException("Unexpected code " + statusCode);
}
return statusCode;
}
/** /**
* Upload files in Zenodo. * Upload files in Zenodo.
@ -266,38 +298,65 @@ public class ZenodoAPIClient implements Serializable {
* @throws IOException * @throws IOException
* @throws MissingConceptDoiException * @throws MissingConceptDoiException
*/ */
// public int newVersion(String concept_rec_id) throws Exception, MissingConceptDoiException {
// setDepositionId(concept_rec_id, 1);
// String json = "{}";
//
// URL url = new URL(urlString + "/" + deposition_id + "/actions/newversion");
// HttpURLConnection conn = (HttpURLConnection) url.openConnection();
//
// conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
// conn.setDoOutput(true);
// conn.setRequestMethod("POST");
//
// try (OutputStream os = conn.getOutputStream()) {
// byte[] input = json.getBytes("utf-8");
// os.write(input, 0, input.length);
//
// }
//
// String body = getBody(conn);
//
// int responseCode = conn.getResponseCode();
//
// conn.disconnect();
// if (!checkOKStatus(responseCode))
// throw new IOException("Unexpected code " + responseCode + body);
//
// ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class);
// String latest_draft = zenodoModel.getLinks().getLatest_draft();
// deposition_id = latest_draft.substring(latest_draft.lastIndexOf("/") + 1);
// bucket = getBucket(latest_draft);
//
// return responseCode;
//
// }
public int newVersion(String concept_rec_id) throws Exception, MissingConceptDoiException { public int newVersion(String concept_rec_id) throws Exception, MissingConceptDoiException {
setDepositionId(concept_rec_id, 1); setDepositionId(concept_rec_id, 1);
String json = "{}"; String json = "{}";
URL url = new URL(urlString + "/" + deposition_id + "/actions/newversion"); OkHttpClient httpClient = new OkHttpClient.Builder().connectTimeout(600, TimeUnit.SECONDS).build();
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); RequestBody body = RequestBody.create(json, MEDIA_TYPE_JSON);
conn.setDoOutput(true);
conn.setRequestMethod("POST");
try (OutputStream os = conn.getOutputStream()) { Request request = new Request.Builder()
byte[] input = json.getBytes("utf-8"); .url(urlString + "/" + deposition_id + "/actions/newversion")
os.write(input, 0, input.length); .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token)
.post(body)
.build();
try (Response response = httpClient.newCall(request).execute()) {
if (!response.isSuccessful())
throw new IOException("Unexpected code " + response + response.body().string());
ZenodoModel zenodoModel = new Gson().fromJson(response.body().string(), ZenodoModel.class);
String latest_draft = zenodoModel.getLinks().getLatest_draft();
deposition_id = latest_draft.substring(latest_draft.lastIndexOf("/") + 1);
bucket = getBucket(latest_draft);
return response.code();
} }
String body = getBody(conn);
int responseCode = conn.getResponseCode();
conn.disconnect();
if (!checkOKStatus(responseCode))
throw new IOException("Unexpected code " + responseCode + body);
ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class);
String latest_draft = zenodoModel.getLinks().getLatest_draft();
deposition_id = latest_draft.substring(latest_draft.lastIndexOf("/") + 1);
bucket = getBucket(latest_draft);
return responseCode;
} }
/** /**
@ -361,20 +420,46 @@ public class ZenodoAPIClient implements Serializable {
} }
private String getPrevDepositions(String page) throws Exception { // private String getPrevDepositions(String page) throws Exception {
//
// HttpGet get = new HttpGet(urlString);
// URI uri = new URIBuilder(get.getURI()).addParameter("page", page).build();
//
// get.setURI(uri);
//
// get.addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString());
// get.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
// try (CloseableHttpClient client = HttpClients.createDefault()) {
// CloseableHttpResponse response = client.execute(get);
// final String body = EntityUtils.toString(response.getEntity(), StandardCharsets.UTF_8);
// return body;
// }
// }
HttpGet get = new HttpGet(urlString); private String getPrevDepositions(String page) throws IOException {
URI uri = new URIBuilder(get.getURI()).addParameter("page", page).build();
get.setURI(uri); OkHttpClient httpClient = new OkHttpClient.Builder().connectTimeout(600, TimeUnit.SECONDS).build();
HttpUrl.Builder urlBuilder = HttpUrl.parse(urlString).newBuilder();
urlBuilder.addQueryParameter("page", page);
String url = urlBuilder.build().toString();
Request request = new Request.Builder()
.url(url)
.addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()) // add request headers
.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token)
.get()
.build();
try (Response response = httpClient.newCall(request).execute()) {
if (!response.isSuccessful())
throw new IOException("Unexpected code " + response + response.body().string());
return response.body().string();
get.addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString());
get.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
try (CloseableHttpClient client = HttpClients.createDefault()) {
CloseableHttpResponse response = client.execute(get);
final String body = EntityUtils.toString(response.getEntity(), StandardCharsets.UTF_8);
return body;
} }
} }
private String getBucket(String inputUurl) throws IOException { private String getBucket(String inputUurl) throws IOException {

View File

@ -85,7 +85,7 @@
</property> </property>
</configuration> </configuration>
</global> </global>
<start to="send_zenodo"/> <start to="save_community_map"/>
<kill name="Kill"> <kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message> <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill> </kill>

View File

@ -85,7 +85,7 @@ public class ZenodoUploadTest {
System.out.println(client.sendMretadata(metadata)); System.out.println(client.sendMretadata(metadata));
System.out.println(client.publish()); // System.out.println(client.publish());
} }
@ -136,7 +136,7 @@ public class ZenodoUploadTest {
} }
System.out.println(client.publish()); // System.out.println(client.publish());
} }