forked from antonis.lempesis/dnet-hadoop
[ORCID-no-doi] integrating PR#98 D-Net/dnet-hadoop#98
This commit is contained in:
parent
ee34cc51c3
commit
e686b8de8d
|
@ -1,31 +0,0 @@
|
|||
diff a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Relation.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Relation.java (rejected hunks)
|
||||
@@ -1,8 +1,6 @@
|
||||
|
||||
package eu.dnetlib.dhp.schema.oaf;
|
||||
|
||||
-import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||
-
|
||||
import static com.google.common.base.Preconditions.checkArgument;
|
||||
|
||||
import java.text.ParseException;
|
||||
@@ -10,6 +8,8 @@ import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
+import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||
+
|
||||
/**
|
||||
* Relation models any edge between two nodes in the OpenAIRE graph. It has a source id and a target id pointing to
|
||||
* graph node identifiers and it is further characterised by the semantic of the link through the fields relType,
|
||||
@@ -137,7 +137,10 @@ public class Relation extends Oaf {
|
||||
try {
|
||||
setValidationDate(ModelSupport.oldest(getValidationDate(), r.getValidationDate()));
|
||||
} catch (ParseException e) {
|
||||
- throw new IllegalArgumentException(String.format("invalid validation date format in relation [s:%s, t:%s]: %s", getSource(), getTarget(), getValidationDate()));
|
||||
+ throw new IllegalArgumentException(String
|
||||
+ .format(
|
||||
+ "invalid validation date format in relation [s:%s, t:%s]: %s", getSource(), getTarget(),
|
||||
+ getValidationDate()));
|
||||
}
|
||||
|
||||
super.mergeFrom(r);
|
|
@ -1,30 +0,0 @@
|
|||
diff a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java (rejected hunks)
|
||||
@@ -31,7 +32,6 @@ public class SparkDownloadOrcidAuthors {
|
||||
|
||||
static Logger logger = LoggerFactory.getLogger(SparkDownloadOrcidAuthors.class);
|
||||
static final String DATE_FORMAT = "yyyy-MM-dd HH:mm:ss";
|
||||
- static String lastUpdate;
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
|
||||
@@ -54,14 +54,18 @@ public class SparkDownloadOrcidAuthors {
|
||||
final String token = parser.get("token");
|
||||
final String lambdaFileName = parser.get("lambdaFileName");
|
||||
logger.info("lambdaFileName: {}", lambdaFileName);
|
||||
-
|
||||
- lastUpdate = HDFSUtil.readFromTextFile(workingPath.concat("last_update.txt"));
|
||||
+ final String hdfsServerUri = parser.get("hdfsServerUri");
|
||||
|
||||
SparkConf conf = new SparkConf();
|
||||
runWithSparkSession(
|
||||
conf,
|
||||
isSparkSessionManaged,
|
||||
spark -> {
|
||||
+ String lastUpdate = HDFSUtil.readFromTextFile(hdfsServerUri, workingPath, "last_update.txt");
|
||||
+ logger.info("lastUpdate: ", lastUpdate);
|
||||
+ if (StringUtils.isBlank(lastUpdate)) {
|
||||
+ throw new RuntimeException("last update info not found");
|
||||
+ }
|
||||
JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
||||
LongAccumulator parsedRecordsAcc = spark.sparkContext().longAccumulator("parsed_records");
|
|
@ -1,77 +0,0 @@
|
|||
diff a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java (rejected hunks)
|
||||
@@ -30,11 +30,11 @@ public class PublicationToOaf implements Serializable {
|
||||
|
||||
static Logger logger = LoggerFactory.getLogger(PublicationToOaf.class);
|
||||
|
||||
- public static final String ORCID = "ORCID";
|
||||
- public static final String ORCID_PID_TYPE_CLASSNAME = "Open Researcher and Contributor ID";
|
||||
public final static String orcidPREFIX = "orcid_______";
|
||||
public static final String OPENAIRE_PREFIX = "openaire____";
|
||||
public static final String SEPARATOR = "::";
|
||||
+ public static final String DEACTIVATED_NAME = "Given Names Deactivated";
|
||||
+ public static final String DEACTIVATED_SURNAME = "Family Name Deactivated";
|
||||
|
||||
private String dateOfCollection = "";
|
||||
private final LongAccumulator parsedPublications;
|
||||
@@ -72,13 +81,18 @@ public class PublicationToOaf implements Serializable {
|
||||
this.errorsNotFoundAuthors = null;
|
||||
this.errorsInvalidType = null;
|
||||
this.otherTypeFound = null;
|
||||
+ this.deactivatedAcc = null;
|
||||
+ this.titleNotProvidedAcc = null;
|
||||
+ this.noUrlAcc = null;
|
||||
this.dateOfCollection = null;
|
||||
}
|
||||
|
||||
private static Map<String, Pair<String, String>> datasources = new HashMap<String, Pair<String, String>>() {
|
||||
|
||||
{
|
||||
- put(ORCID.toLowerCase(), new Pair<>(ORCID, OPENAIRE_PREFIX + SEPARATOR + "orcid"));
|
||||
+ put(
|
||||
+ ModelConstants.ORCID,
|
||||
+ new Pair<>(ModelConstants.ORCID.toUpperCase(), OPENAIRE_PREFIX + SEPARATOR + "orcid"));
|
||||
|
||||
}
|
||||
};
|
||||
@@ -183,6 +197,12 @@ public class PublicationToOaf implements Serializable {
|
||||
}
|
||||
return null;
|
||||
}
|
||||
+ if (titles.stream().filter(t -> (t != null && t.equals("Title Not Supplied"))).count() > 0) {
|
||||
+ if (titleNotProvidedAcc != null) {
|
||||
+ titleNotProvidedAcc.add(1);
|
||||
+ }
|
||||
+ return null;
|
||||
+ }
|
||||
Qualifier q = mapQualifier("main title", "main title", "dnet:dataCite_title", "dnet:dataCite_title");
|
||||
publication
|
||||
.setTitle(
|
||||
@@ -527,24 +562,21 @@ public class PublicationToOaf implements Serializable {
|
||||
|
||||
private KeyValue createCollectedFrom() {
|
||||
KeyValue cf = new KeyValue();
|
||||
- cf.setValue(ORCID);
|
||||
+ cf.setValue(ModelConstants.ORCID.toUpperCase());
|
||||
cf.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + "806360c771262b4d6770e7cdf04b5c5a");
|
||||
return cf;
|
||||
}
|
||||
|
||||
private KeyValue createHostedBy() {
|
||||
- KeyValue hb = new KeyValue();
|
||||
- hb.setValue("Unknown Repository");
|
||||
- hb.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + "55045bd2a65019fd8e6741a755395c8c");
|
||||
- return hb;
|
||||
+ return ModelConstants.UNKNOWN_REPOSITORY;
|
||||
}
|
||||
|
||||
private StructuredProperty mapAuthorId(String orcidId) {
|
||||
final StructuredProperty sp = new StructuredProperty();
|
||||
sp.setValue(orcidId);
|
||||
final Qualifier q = new Qualifier();
|
||||
- q.setClassid(ORCID.toLowerCase());
|
||||
- q.setClassname(ORCID_PID_TYPE_CLASSNAME);
|
||||
+ q.setClassid(ModelConstants.ORCID);
|
||||
+ q.setClassname(ModelConstants.ORCID_CLASSNAME);
|
||||
q.setSchemeid(ModelConstants.DNET_PID_TYPES);
|
||||
q.setSchemename(ModelConstants.DNET_PID_TYPES);
|
||||
sp.setQualifier(q);
|
Loading…
Reference in New Issue