orcid-no-doi #98

Closed
enrico.ottonello wants to merge 34 commits from orcid-no-doi into stable_ids
3 changed files with 28 additions and 3 deletions
Showing only changes of commit bd3b16402b - Show all commits

View File

@ -8,7 +8,6 @@ import java.util.List;
import java.util.Objects;
import java.util.Optional;
import eu.dnetlib.doiboost.orcid.util.HDFSUtil;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.io.Text;
@ -38,6 +37,7 @@ import eu.dnetlib.dhp.schema.orcid.AuthorSummary;
import eu.dnetlib.dhp.schema.orcid.Work;
import eu.dnetlib.dhp.schema.orcid.WorkDetail;
import eu.dnetlib.doiboost.orcid.json.JsonHelper;
import eu.dnetlib.doiboost.orcid.util.HDFSUtil;
import eu.dnetlib.doiboost.orcidnodoi.oaf.PublicationToOaf;
import eu.dnetlib.doiboost.orcidnodoi.similarity.AuthorMatcher;
import scala.Tuple2;
@ -137,6 +137,8 @@ public class SparkGenEnrichedOrcidWorks {
.sparkContext()
.longAccumulator("errorsNotFoundAuthors");
final LongAccumulator errorsInvalidType = spark.sparkContext().longAccumulator("errorsInvalidType");
final LongAccumulator otherTypeFound = spark.sparkContext().longAccumulator("otherTypeFound");
final PublicationToOaf publicationToOaf = new PublicationToOaf(
parsedPublications,
enrichedPublications,
@ -144,7 +146,8 @@ public class SparkGenEnrichedOrcidWorks {
errorsInvalidTitle,
errorsNotFoundAuthors,
errorsInvalidType,
dateOfCollection);
otherTypeFound,
dateOfCollection);
JavaRDD<Publication> oafPublicationRDD = enrichedWorksRDD
.map(
e -> {
@ -173,6 +176,7 @@ public class SparkGenEnrichedOrcidWorks {
logger.info("errorsInvalidTitle: " + errorsInvalidTitle.value().toString());
logger.info("errorsNotFoundAuthors: " + errorsNotFoundAuthors.value().toString());
logger.info("errorsInvalidType: " + errorsInvalidType.value().toString());
logger.info("otherTypeFound: " + otherTypeFound.value().toString());
});
}
}

View File

@ -43,6 +43,7 @@ public class PublicationToOaf implements Serializable {
private final LongAccumulator errorsInvalidTitle;
private final LongAccumulator errorsNotFoundAuthors;
private final LongAccumulator errorsInvalidType;
private final LongAccumulator otherTypeFound;
public PublicationToOaf(
LongAccumulator parsedPublications,
@ -51,6 +52,7 @@ public class PublicationToOaf implements Serializable {
LongAccumulator errorsInvalidTitle,
LongAccumulator errorsNotFoundAuthors,
LongAccumulator errorsInvalidType,
LongAccumulator otherTypeFound,
String dateOfCollection) {
this.parsedPublications = parsedPublications;
this.enrichedPublications = enrichedPublications;
@ -58,6 +60,7 @@ public class PublicationToOaf implements Serializable {
this.errorsInvalidTitle = errorsInvalidTitle;
this.errorsNotFoundAuthors = errorsNotFoundAuthors;
this.errorsInvalidType = errorsInvalidType;
this.otherTypeFound = otherTypeFound;
this.dateOfCollection = dateOfCollection;
}
@ -68,6 +71,8 @@ public class PublicationToOaf implements Serializable {
this.errorsInvalidTitle = null;
this.errorsNotFoundAuthors = null;
this.errorsInvalidType = null;
this.otherTypeFound = null;
this.dateOfCollection = null;
}
private static Map<String, Pair<String, String>> datasources = new HashMap<String, Pair<String, String>>() {
@ -221,6 +226,14 @@ public class PublicationToOaf implements Serializable {
final String typeValue = typologiesMapping.get(type).get("value");
cobjValue = typologiesMapping.get(type).get("cobj");
// this dataset must contain only publication
if (cobjValue.equals("0020")) {
if (otherTypeFound != null) {
otherTypeFound.add(1);
}
return null;
}
final Instance instance = new Instance();
// Adding hostedby

View File

@ -31,5 +31,13 @@
"annotation": {"cobj":"0018", "value": "Annotation"},
"physical-object": {"cobj":"0028", "value": "PhysicalObject"},
"preprint": {"cobj":"0016", "value": "Preprint"},
"software": {"cobj":"0029", "value": "Software"}
"software": {"cobj":"0029", "value": "Software"},
"journal-issue": {"cobj":"0001", "value": "Article"},
"translation": {"cobj":"0038", "value": "Other literature type"},
"artistic-performance": {"cobj":"0020", "value": "Other ORP type"},
"online-resource": {"cobj":"0020", "value": "Other ORP type"},
"registered-copyright": {"cobj":"0020", "value": "Other ORP type"},
"trademark": {"cobj":"0020", "value": "Other ORP type"},
"invention": {"cobj":"0020", "value": "Other ORP type"},
"spin-off-company": {"cobj":"0020", "value": "Other ORP type"}
}