forked from D-Net/dnet-hadoop
[ORCID-no-doi] integrating PR#98 D-Net/dnet-hadoop#98
This commit is contained in:
parent
ee34cc51c3
commit
e686b8de8d
|
@ -1,31 +0,0 @@
|
||||||
diff a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Relation.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Relation.java (rejected hunks)
|
|
||||||
@@ -1,8 +1,6 @@
|
|
||||||
|
|
||||||
package eu.dnetlib.dhp.schema.oaf;
|
|
||||||
|
|
||||||
-import eu.dnetlib.dhp.schema.common.ModelSupport;
|
|
||||||
-
|
|
||||||
import static com.google.common.base.Preconditions.checkArgument;
|
|
||||||
|
|
||||||
import java.text.ParseException;
|
|
||||||
@@ -10,6 +8,8 @@ import java.util.*;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
import java.util.stream.Stream;
|
|
||||||
|
|
||||||
+import eu.dnetlib.dhp.schema.common.ModelSupport;
|
|
||||||
+
|
|
||||||
/**
|
|
||||||
* Relation models any edge between two nodes in the OpenAIRE graph. It has a source id and a target id pointing to
|
|
||||||
* graph node identifiers and it is further characterised by the semantic of the link through the fields relType,
|
|
||||||
@@ -137,7 +137,10 @@ public class Relation extends Oaf {
|
|
||||||
try {
|
|
||||||
setValidationDate(ModelSupport.oldest(getValidationDate(), r.getValidationDate()));
|
|
||||||
} catch (ParseException e) {
|
|
||||||
- throw new IllegalArgumentException(String.format("invalid validation date format in relation [s:%s, t:%s]: %s", getSource(), getTarget(), getValidationDate()));
|
|
||||||
+ throw new IllegalArgumentException(String
|
|
||||||
+ .format(
|
|
||||||
+ "invalid validation date format in relation [s:%s, t:%s]: %s", getSource(), getTarget(),
|
|
||||||
+ getValidationDate()));
|
|
||||||
}
|
|
||||||
|
|
||||||
super.mergeFrom(r);
|
|
|
@ -1,30 +0,0 @@
|
||||||
diff a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java (rejected hunks)
|
|
||||||
@@ -31,7 +32,6 @@ public class SparkDownloadOrcidAuthors {
|
|
||||||
|
|
||||||
static Logger logger = LoggerFactory.getLogger(SparkDownloadOrcidAuthors.class);
|
|
||||||
static final String DATE_FORMAT = "yyyy-MM-dd HH:mm:ss";
|
|
||||||
- static String lastUpdate;
|
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception {
|
|
||||||
|
|
||||||
@@ -54,14 +54,18 @@ public class SparkDownloadOrcidAuthors {
|
|
||||||
final String token = parser.get("token");
|
|
||||||
final String lambdaFileName = parser.get("lambdaFileName");
|
|
||||||
logger.info("lambdaFileName: {}", lambdaFileName);
|
|
||||||
-
|
|
||||||
- lastUpdate = HDFSUtil.readFromTextFile(workingPath.concat("last_update.txt"));
|
|
||||||
+ final String hdfsServerUri = parser.get("hdfsServerUri");
|
|
||||||
|
|
||||||
SparkConf conf = new SparkConf();
|
|
||||||
runWithSparkSession(
|
|
||||||
conf,
|
|
||||||
isSparkSessionManaged,
|
|
||||||
spark -> {
|
|
||||||
+ String lastUpdate = HDFSUtil.readFromTextFile(hdfsServerUri, workingPath, "last_update.txt");
|
|
||||||
+ logger.info("lastUpdate: ", lastUpdate);
|
|
||||||
+ if (StringUtils.isBlank(lastUpdate)) {
|
|
||||||
+ throw new RuntimeException("last update info not found");
|
|
||||||
+ }
|
|
||||||
JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
|
||||||
|
|
||||||
LongAccumulator parsedRecordsAcc = spark.sparkContext().longAccumulator("parsed_records");
|
|
|
@ -1,77 +0,0 @@
|
||||||
diff a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java (rejected hunks)
|
|
||||||
@@ -30,11 +30,11 @@ public class PublicationToOaf implements Serializable {
|
|
||||||
|
|
||||||
static Logger logger = LoggerFactory.getLogger(PublicationToOaf.class);
|
|
||||||
|
|
||||||
- public static final String ORCID = "ORCID";
|
|
||||||
- public static final String ORCID_PID_TYPE_CLASSNAME = "Open Researcher and Contributor ID";
|
|
||||||
public final static String orcidPREFIX = "orcid_______";
|
|
||||||
public static final String OPENAIRE_PREFIX = "openaire____";
|
|
||||||
public static final String SEPARATOR = "::";
|
|
||||||
+ public static final String DEACTIVATED_NAME = "Given Names Deactivated";
|
|
||||||
+ public static final String DEACTIVATED_SURNAME = "Family Name Deactivated";
|
|
||||||
|
|
||||||
private String dateOfCollection = "";
|
|
||||||
private final LongAccumulator parsedPublications;
|
|
||||||
@@ -72,13 +81,18 @@ public class PublicationToOaf implements Serializable {
|
|
||||||
this.errorsNotFoundAuthors = null;
|
|
||||||
this.errorsInvalidType = null;
|
|
||||||
this.otherTypeFound = null;
|
|
||||||
+ this.deactivatedAcc = null;
|
|
||||||
+ this.titleNotProvidedAcc = null;
|
|
||||||
+ this.noUrlAcc = null;
|
|
||||||
this.dateOfCollection = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static Map<String, Pair<String, String>> datasources = new HashMap<String, Pair<String, String>>() {
|
|
||||||
|
|
||||||
{
|
|
||||||
- put(ORCID.toLowerCase(), new Pair<>(ORCID, OPENAIRE_PREFIX + SEPARATOR + "orcid"));
|
|
||||||
+ put(
|
|
||||||
+ ModelConstants.ORCID,
|
|
||||||
+ new Pair<>(ModelConstants.ORCID.toUpperCase(), OPENAIRE_PREFIX + SEPARATOR + "orcid"));
|
|
||||||
|
|
||||||
}
|
|
||||||
};
|
|
||||||
@@ -183,6 +197,12 @@ public class PublicationToOaf implements Serializable {
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
+ if (titles.stream().filter(t -> (t != null && t.equals("Title Not Supplied"))).count() > 0) {
|
|
||||||
+ if (titleNotProvidedAcc != null) {
|
|
||||||
+ titleNotProvidedAcc.add(1);
|
|
||||||
+ }
|
|
||||||
+ return null;
|
|
||||||
+ }
|
|
||||||
Qualifier q = mapQualifier("main title", "main title", "dnet:dataCite_title", "dnet:dataCite_title");
|
|
||||||
publication
|
|
||||||
.setTitle(
|
|
||||||
@@ -527,24 +562,21 @@ public class PublicationToOaf implements Serializable {
|
|
||||||
|
|
||||||
private KeyValue createCollectedFrom() {
|
|
||||||
KeyValue cf = new KeyValue();
|
|
||||||
- cf.setValue(ORCID);
|
|
||||||
+ cf.setValue(ModelConstants.ORCID.toUpperCase());
|
|
||||||
cf.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + "806360c771262b4d6770e7cdf04b5c5a");
|
|
||||||
return cf;
|
|
||||||
}
|
|
||||||
|
|
||||||
private KeyValue createHostedBy() {
|
|
||||||
- KeyValue hb = new KeyValue();
|
|
||||||
- hb.setValue("Unknown Repository");
|
|
||||||
- hb.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + "55045bd2a65019fd8e6741a755395c8c");
|
|
||||||
- return hb;
|
|
||||||
+ return ModelConstants.UNKNOWN_REPOSITORY;
|
|
||||||
}
|
|
||||||
|
|
||||||
private StructuredProperty mapAuthorId(String orcidId) {
|
|
||||||
final StructuredProperty sp = new StructuredProperty();
|
|
||||||
sp.setValue(orcidId);
|
|
||||||
final Qualifier q = new Qualifier();
|
|
||||||
- q.setClassid(ORCID.toLowerCase());
|
|
||||||
- q.setClassname(ORCID_PID_TYPE_CLASSNAME);
|
|
||||||
+ q.setClassid(ModelConstants.ORCID);
|
|
||||||
+ q.setClassname(ModelConstants.ORCID_CLASSNAME);
|
|
||||||
q.setSchemeid(ModelConstants.DNET_PID_TYPES);
|
|
||||||
q.setSchemename(ModelConstants.DNET_PID_TYPES);
|
|
||||||
sp.setQualifier(q);
|
|
Loading…
Reference in New Issue