From e9f24df21cce089aec7945b2f50f20cbe1acc062 Mon Sep 17 00:00:00 2001 From: Serafeim Chatzopoulos Date: Tue, 3 Oct 2023 20:57:57 +0300 Subject: [PATCH] Move SWH API Key from constants to workflow param --- .../java/eu/dnetlib/dhp/swh/ArchiveRepositoryURLs.java | 5 ++++- .../dhp/swh/CollectLastVisitRepositoryData.java | 5 ++++- .../java/eu/dnetlib/dhp/swh/utils/SWHConnection.java | 6 ++++-- .../java/eu/dnetlib/dhp/swh/utils/SWHConstants.java | 2 -- .../dnetlib/dhp/swh/input_archive_repository_urls.json | 6 ++++++ .../swh/input_collect_last_visit_repository_data.json | 6 ++++++ .../main/resources/eu/dnetlib/dhp/swh/job.properties | 2 ++ .../eu/dnetlib/dhp/swh/oozie_app/workflow.xml | 10 ++++++++++ .../java/eu/dnetlib/dhp/swh/SWHConnectionTest.java | 4 ++-- 9 files changed, 38 insertions(+), 8 deletions(-) diff --git a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/ArchiveRepositoryURLs.java b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/ArchiveRepositoryURLs.java index f02861953..baa510346 100644 --- a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/ArchiveRepositoryURLs.java +++ b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/ArchiveRepositoryURLs.java @@ -63,9 +63,12 @@ public class ArchiveRepositoryURLs { final Integer archiveThresholdInDays = Integer.parseInt(argumentParser.get("archiveThresholdInDays")); log.info("archiveThresholdInDays: {}", archiveThresholdInDays); + final String apiAccessToken = argumentParser.get("apiAccessToken"); + log.info("apiAccessToken: {}", apiAccessToken); + final HttpClientParams clientParams = SWHUtils.getClientParams(argumentParser); - swhConnection = new SWHConnection(clientParams); + swhConnection = new SWHConnection(clientParams, apiAccessToken); final FileSystem fs = FileSystem.get(getHadoopConfiguration(hdfsuri)); diff --git a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/CollectLastVisitRepositoryData.java b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/CollectLastVisitRepositoryData.java index ce1b0bb46..ebb9176ff 100644 --- a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/CollectLastVisitRepositoryData.java +++ b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/CollectLastVisitRepositoryData.java @@ -55,9 +55,12 @@ public class CollectLastVisitRepositoryData { final String outputPath = argumentParser.get("lastVisitsPath"); log.info("outputPath: {}", outputPath); + final String apiAccessToken = argumentParser.get("apiAccessToken"); + log.info("apiAccessToken: {}", apiAccessToken); + final HttpClientParams clientParams = SWHUtils.getClientParams(argumentParser); - swhConnection = new SWHConnection(clientParams); + swhConnection = new SWHConnection(clientParams, apiAccessToken); final FileSystem fs = FileSystem.get(getHadoopConfiguration(hdfsuri)); diff --git a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/utils/SWHConnection.java b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/utils/SWHConnection.java index 9c145fc19..80249e816 100644 --- a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/utils/SWHConnection.java +++ b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/utils/SWHConnection.java @@ -14,13 +14,15 @@ public class SWHConnection { HttpConnector2 conn; - public SWHConnection(HttpClientParams clientParams) { + public SWHConnection(HttpClientParams clientParams, String accessToken) { // set custom headers Map headers = new HashMap() { { put(HttpHeaders.ACCEPT, "application/json"); - put(HttpHeaders.AUTHORIZATION, String.format("Bearer %s", SWHConstants.ACCESS_TOKEN)); + if (accessToken != null) { + put(HttpHeaders.AUTHORIZATION, String.format("Bearer %s", accessToken)); + } } }; diff --git a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/utils/SWHConstants.java b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/utils/SWHConstants.java index efd0e708b..eae839cfd 100644 --- a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/utils/SWHConstants.java +++ b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/utils/SWHConstants.java @@ -6,8 +6,6 @@ public class SWHConstants { public static final String SWH_ARCHIVE_URL = "https://archive.softwareheritage.org/api/1/origin/save/%s/url/%s/"; - public static final String ACCESS_TOKEN = "eyJhbGciOiJIUzI1NiIsInR5cCIgOiAiSldUIiwia2lkIiA6ICJhMTMxYTQ1My1hM2IyLTQwMTUtODQ2Ny05MzAyZjk3MTFkOGEifQ.eyJpYXQiOjE2OTQ2MzYwMjAsImp0aSI6IjkwZjdkNTNjLTQ5YTktNGFiMy1hY2E0LTcwMTViMjEyZTNjNiIsImlzcyI6Imh0dHBzOi8vYXV0aC5zb2Z0d2FyZWhlcml0YWdlLm9yZy9hdXRoL3JlYWxtcy9Tb2Z0d2FyZUhlcml0YWdlIiwiYXVkIjoiaHR0cHM6Ly9hdXRoLnNvZnR3YXJlaGVyaXRhZ2Uub3JnL2F1dGgvcmVhbG1zL1NvZnR3YXJlSGVyaXRhZ2UiLCJzdWIiOiIzMTY5OWZkNC0xNmE0LTQxOWItYTdhMi00NjI5MDY4ZjI3OWEiLCJ0eXAiOiJPZmZsaW5lIiwiYXpwIjoic3doLXdlYiIsInNlc3Npb25fc3RhdGUiOiIzMjYzMzEwMS00ZDRkLTQwMjItODU2NC1iMzNlMTJiNTE3ZDkiLCJzY29wZSI6Im9wZW5pZCBvZmZsaW5lX2FjY2VzcyBwcm9maWxlIGVtYWlsIn0.XHj1VIZu1dZ4Ej32-oU84mFmaox9cLNjXosNxwZM0Xs"; - public static final String DEFAULT_VISIT_TYPE = "git"; public static final String VISIT_STATUS_NOT_FOUND = "not_found"; diff --git a/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/input_archive_repository_urls.json b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/input_archive_repository_urls.json index ce80d6f4a..e8671f71b 100644 --- a/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/input_archive_repository_urls.json +++ b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/input_archive_repository_urls.json @@ -46,5 +46,11 @@ "paramLongName": "archiveThresholdInDays", "paramDescription": "the thershold (in days) required to issue an archive request", "paramRequired": false + }, + { + "paramName": "aat", + "paramLongName": "apiAccessToken", + "paramDescription": "the API access token of the SWH API", + "paramRequired": false } ] \ No newline at end of file diff --git a/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/input_collect_last_visit_repository_data.json b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/input_collect_last_visit_repository_data.json index 8bf41f0ae..662582dfe 100644 --- a/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/input_collect_last_visit_repository_data.json +++ b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/input_collect_last_visit_repository_data.json @@ -40,5 +40,11 @@ "paramLongName": "requestMethod", "paramDescription": "the method of the requests to perform", "paramRequired": false + }, + { + "paramName": "aat", + "paramLongName": "apiAccessToken", + "paramDescription": "the API access token of the SWH API", + "paramRequired": false } ] \ No newline at end of file diff --git a/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/job.properties b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/job.properties index 114181944..35c068286 100644 --- a/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/job.properties +++ b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/job.properties @@ -8,6 +8,8 @@ archiveRequestsPath=${workingDir}/3_archive_requests.seq actionsetsPath=${workingDir}/4_actionsets graphPath=/tmp/prod_provision/graph/18_graph_blacklisted +apiAccessToken=eyJhbGciOiJIUzI1NiIsInR5cCIgOiAiSldUIiwia2lkIiA6ICJhMTMxYTQ1My1hM2IyLTQwMTUtODQ2Ny05MzAyZjk3MTFkOGEifQ.eyJpYXQiOjE2OTQ2MzYwMjAsImp0aSI6IjkwZjdkNTNjLTQ5YTktNGFiMy1hY2E0LTcwMTViMjEyZTNjNiIsImlzcyI6Imh0dHBzOi8vYXV0aC5zb2Z0d2FyZWhlcml0YWdlLm9yZy9hdXRoL3JlYWxtcy9Tb2Z0d2FyZUhlcml0YWdlIiwiYXVkIjoiaHR0cHM6Ly9hdXRoLnNvZnR3YXJlaGVyaXRhZ2Uub3JnL2F1dGgvcmVhbG1zL1NvZnR3YXJlSGVyaXRhZ2UiLCJzdWIiOiIzMTY5OWZkNC0xNmE0LTQxOWItYTdhMi00NjI5MDY4ZjI3OWEiLCJ0eXAiOiJPZmZsaW5lIiwiYXpwIjoic3doLXdlYiIsInNlc3Npb25fc3RhdGUiOiIzMjYzMzEwMS00ZDRkLTQwMjItODU2NC1iMzNlMTJiNTE3ZDkiLCJzY29wZSI6Im9wZW5pZCBvZmZsaW5lX2FjY2VzcyBwcm9maWxlIGVtYWlsIn0.XHj1VIZu1dZ4Ej32-oU84mFmaox9cLNjXosNxwZM0Xs + maxNumberOfRetry=2 retryDelay=1 requestDelay=100 diff --git a/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/oozie_app/workflow.xml b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/oozie_app/workflow.xml index c69ecd74d..64dc0d2aa 100644 --- a/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/oozie_app/workflow.xml @@ -38,6 +38,10 @@ requestDelay Delay between API requests (in ms) + + apiAccessToken + The API Key of the SWH API + softwareLimit Limit on the number of repo URLs to use (Optional); for debug purposes @@ -61,6 +65,10 @@ actionsetsPath ${actionsetsPath} + + apiAccessToken + ${apiAccessToken} + @@ -117,6 +125,7 @@ --requestDelay${requestDelay} --retryDelay${retryDelay} --requestMethodGET + --apiAccessToken${apiAccessToken} @@ -136,6 +145,7 @@ --requestDelay${requestDelay} --retryDelay${retryDelay} --requestMethodPOST + --apiAccessToken${apiAccessToken} diff --git a/dhp-workflows/dhp-swh/src/test/java/eu/dnetlib/dhp/swh/SWHConnectionTest.java b/dhp-workflows/dhp-swh/src/test/java/eu/dnetlib/dhp/swh/SWHConnectionTest.java index 28210f1b3..b19e0e7ac 100644 --- a/dhp-workflows/dhp-swh/src/test/java/eu/dnetlib/dhp/swh/SWHConnectionTest.java +++ b/dhp-workflows/dhp-swh/src/test/java/eu/dnetlib/dhp/swh/SWHConnectionTest.java @@ -25,7 +25,7 @@ public class SWHConnectionTest { HttpClientParams clientParams = new HttpClientParams(); clientParams.setRequestMethod("GET"); - SWHConnection swhConnection = new SWHConnection(clientParams); + SWHConnection swhConnection = new SWHConnection(clientParams, null); String repoUrl = "https://github.com/stanford-futuredata/FAST"; URL url = new URL(String.format(SWHConstants.SWH_LATEST_VISIT_URL, repoUrl)); @@ -43,7 +43,7 @@ public class SWHConnectionTest { HttpClientParams clientParams = new HttpClientParams(); clientParams.setRequestMethod("POST"); - SWHConnection swhConnection = new SWHConnection(clientParams); + SWHConnection swhConnection = new SWHConnection(clientParams, null); String repoUrl = "https://github.com/stanford-futuredata/FAST"; URL url = new URL(String.format(SWHConstants.SWH_ARCHIVE_URL, SWHConstants.DEFAULT_VISIT_TYPE, repoUrl));