diff --git a/dhp-workflows/dhp-doiboost/pom.xml b/dhp-workflows/dhp-doiboost/pom.xml index 06f57de6b..01ac60b3a 100644 --- a/dhp-workflows/dhp-doiboost/pom.xml +++ b/dhp-workflows/dhp-doiboost/pom.xml @@ -4,7 +4,7 @@ dhp-workflows eu.dnetlib.dhp - 1.1.7-SNAPSHOT + 1.2.1-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidDownloader.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidDownloader.java index dad670941..f2251da2c 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidDownloader.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidDownloader.java @@ -57,6 +57,7 @@ public class OrcidDownloader extends OrcidDSManager { } catch (Throwable e) { Log.warn("Downloading " + orcidId, e.getMessage()); + } return new String(""); } @@ -147,6 +148,9 @@ public class OrcidDownloader extends OrcidDSManager { + downloadedRecordsCounter + " saved: " + savedRecordsCounter); + if (parsedRecordsCounter > REQ_MAX_TEST) { + break; + } } } long endDownload = System.currentTimeMillis(); @@ -194,7 +198,6 @@ public class OrcidDownloader extends OrcidDSManager { Log.warn("[" + orcidId + "] Parsing date: ", e.getMessage()); return true; } - return modifiedDateDt.after(lastUpdateDt); } } diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/mag/MAGMappingTest.scala b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/mag/MAGMappingTest.scala index ce26071b9..f60e10cf5 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/mag/MAGMappingTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/mag/MAGMappingTest.scala @@ -16,7 +16,7 @@ class MAGMappingTest { val mapper = new ObjectMapper() - @Test + //@Test def testMAGCSV(): Unit = { val conf: SparkConf = new SparkConf() @@ -31,7 +31,7 @@ class MAGMappingTest { import spark.implicits._ val d: Dataset[Papers] = spark.read.load("/data/doiboost/mag/datasets/Papers").as[Papers] logger.info(s"Total number of element: ${d.where(col("Doi").isNotNull).count()}") - implicit val mapEncoder = org.apache.spark.sql.Encoders.kryo[Papers] + //implicit val mapEncoder = org.apache.spark.sql.Encoders.bean[Papers] val result: RDD[Papers] = d.where(col("Doi").isNotNull).rdd.map { p: Papers => Tuple2(p.Doi, p) }.reduceByKey {case (p1:Papers, p2:Papers) => var r = if (p1==null) p2 else p1 if (p1!=null && p2!=null ) if (p1.CreatedDate.before(p2.CreatedDate))