forked from D-Net/dnet-hadoop
merged
This commit is contained in:
commit
f53e42bda7
|
@ -4,7 +4,7 @@
|
|||
<parent>
|
||||
<artifactId>dhp-workflows</artifactId>
|
||||
<groupId>eu.dnetlib.dhp</groupId>
|
||||
<version>1.1.7-SNAPSHOT</version>
|
||||
<version>1.2.1-SNAPSHOT</version>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
|
|
|
@ -57,6 +57,7 @@ public class OrcidDownloader extends OrcidDSManager {
|
|||
|
||||
} catch (Throwable e) {
|
||||
Log.warn("Downloading " + orcidId, e.getMessage());
|
||||
|
||||
}
|
||||
return new String("");
|
||||
}
|
||||
|
@ -147,6 +148,9 @@ public class OrcidDownloader extends OrcidDSManager {
|
|||
+ downloadedRecordsCounter
|
||||
+ " saved: "
|
||||
+ savedRecordsCounter);
|
||||
if (parsedRecordsCounter > REQ_MAX_TEST) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
long endDownload = System.currentTimeMillis();
|
||||
|
@ -194,7 +198,6 @@ public class OrcidDownloader extends OrcidDSManager {
|
|||
Log.warn("[" + orcidId + "] Parsing date: ", e.getMessage());
|
||||
return true;
|
||||
}
|
||||
|
||||
return modifiedDateDt.after(lastUpdateDt);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -16,7 +16,7 @@ class MAGMappingTest {
|
|||
val mapper = new ObjectMapper()
|
||||
|
||||
|
||||
@Test
|
||||
//@Test
|
||||
def testMAGCSV(): Unit = {
|
||||
|
||||
val conf: SparkConf = new SparkConf()
|
||||
|
@ -31,7 +31,7 @@ class MAGMappingTest {
|
|||
import spark.implicits._
|
||||
val d: Dataset[Papers] = spark.read.load("/data/doiboost/mag/datasets/Papers").as[Papers]
|
||||
logger.info(s"Total number of element: ${d.where(col("Doi").isNotNull).count()}")
|
||||
implicit val mapEncoder = org.apache.spark.sql.Encoders.kryo[Papers]
|
||||
//implicit val mapEncoder = org.apache.spark.sql.Encoders.bean[Papers]
|
||||
val result: RDD[Papers] = d.where(col("Doi").isNotNull).rdd.map { p: Papers => Tuple2(p.Doi, p) }.reduceByKey {case (p1:Papers, p2:Papers) =>
|
||||
var r = if (p1==null) p2 else p1
|
||||
if (p1!=null && p2!=null ) if (p1.CreatedDate.before(p2.CreatedDate))
|
||||
|
|
Loading…
Reference in New Issue