dnet-hadoop/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.j...

31 lines
1.4 KiB
Plaintext

diff a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java (rejected hunks)
@@ -31,7 +32,6 @@ public class SparkDownloadOrcidAuthors {
static Logger logger = LoggerFactory.getLogger(SparkDownloadOrcidAuthors.class);
static final String DATE_FORMAT = "yyyy-MM-dd HH:mm:ss";
- static String lastUpdate;
public static void main(String[] args) throws Exception {
@@ -54,14 +54,18 @@ public class SparkDownloadOrcidAuthors {
final String token = parser.get("token");
final String lambdaFileName = parser.get("lambdaFileName");
logger.info("lambdaFileName: {}", lambdaFileName);
-
- lastUpdate = HDFSUtil.readFromTextFile(workingPath.concat("last_update.txt"));
+ final String hdfsServerUri = parser.get("hdfsServerUri");
SparkConf conf = new SparkConf();
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
+ String lastUpdate = HDFSUtil.readFromTextFile(hdfsServerUri, workingPath, "last_update.txt");
+ logger.info("lastUpdate: ", lastUpdate);
+ if (StringUtils.isBlank(lastUpdate)) {
+ throw new RuntimeException("last update info not found");
+ }
JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
LongAccumulator parsedRecordsAcc = spark.sparkContext().longAccumulator("parsed_records");