forked from D-Net/dnet-hadoop
added result typologies
This commit is contained in:
parent
ca1800510a
commit
bd3b16402b
|
@ -8,7 +8,6 @@ import java.util.List;
|
|||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
|
||||
import eu.dnetlib.doiboost.orcid.util.HDFSUtil;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.hadoop.io.Text;
|
||||
|
@ -38,6 +37,7 @@ import eu.dnetlib.dhp.schema.orcid.AuthorSummary;
|
|||
import eu.dnetlib.dhp.schema.orcid.Work;
|
||||
import eu.dnetlib.dhp.schema.orcid.WorkDetail;
|
||||
import eu.dnetlib.doiboost.orcid.json.JsonHelper;
|
||||
import eu.dnetlib.doiboost.orcid.util.HDFSUtil;
|
||||
import eu.dnetlib.doiboost.orcidnodoi.oaf.PublicationToOaf;
|
||||
import eu.dnetlib.doiboost.orcidnodoi.similarity.AuthorMatcher;
|
||||
import scala.Tuple2;
|
||||
|
@ -137,6 +137,8 @@ public class SparkGenEnrichedOrcidWorks {
|
|||
.sparkContext()
|
||||
.longAccumulator("errorsNotFoundAuthors");
|
||||
final LongAccumulator errorsInvalidType = spark.sparkContext().longAccumulator("errorsInvalidType");
|
||||
final LongAccumulator otherTypeFound = spark.sparkContext().longAccumulator("otherTypeFound");
|
||||
|
||||
final PublicationToOaf publicationToOaf = new PublicationToOaf(
|
||||
parsedPublications,
|
||||
enrichedPublications,
|
||||
|
@ -144,7 +146,8 @@ public class SparkGenEnrichedOrcidWorks {
|
|||
errorsInvalidTitle,
|
||||
errorsNotFoundAuthors,
|
||||
errorsInvalidType,
|
||||
dateOfCollection);
|
||||
otherTypeFound,
|
||||
dateOfCollection);
|
||||
JavaRDD<Publication> oafPublicationRDD = enrichedWorksRDD
|
||||
.map(
|
||||
e -> {
|
||||
|
@ -173,6 +176,7 @@ public class SparkGenEnrichedOrcidWorks {
|
|||
logger.info("errorsInvalidTitle: " + errorsInvalidTitle.value().toString());
|
||||
logger.info("errorsNotFoundAuthors: " + errorsNotFoundAuthors.value().toString());
|
||||
logger.info("errorsInvalidType: " + errorsInvalidType.value().toString());
|
||||
logger.info("otherTypeFound: " + otherTypeFound.value().toString());
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
|
@ -43,6 +43,7 @@ public class PublicationToOaf implements Serializable {
|
|||
private final LongAccumulator errorsInvalidTitle;
|
||||
private final LongAccumulator errorsNotFoundAuthors;
|
||||
private final LongAccumulator errorsInvalidType;
|
||||
private final LongAccumulator otherTypeFound;
|
||||
|
||||
public PublicationToOaf(
|
||||
LongAccumulator parsedPublications,
|
||||
|
@ -51,6 +52,7 @@ public class PublicationToOaf implements Serializable {
|
|||
LongAccumulator errorsInvalidTitle,
|
||||
LongAccumulator errorsNotFoundAuthors,
|
||||
LongAccumulator errorsInvalidType,
|
||||
LongAccumulator otherTypeFound,
|
||||
String dateOfCollection) {
|
||||
this.parsedPublications = parsedPublications;
|
||||
this.enrichedPublications = enrichedPublications;
|
||||
|
@ -58,6 +60,7 @@ public class PublicationToOaf implements Serializable {
|
|||
this.errorsInvalidTitle = errorsInvalidTitle;
|
||||
this.errorsNotFoundAuthors = errorsNotFoundAuthors;
|
||||
this.errorsInvalidType = errorsInvalidType;
|
||||
this.otherTypeFound = otherTypeFound;
|
||||
this.dateOfCollection = dateOfCollection;
|
||||
}
|
||||
|
||||
|
@ -68,6 +71,8 @@ public class PublicationToOaf implements Serializable {
|
|||
this.errorsInvalidTitle = null;
|
||||
this.errorsNotFoundAuthors = null;
|
||||
this.errorsInvalidType = null;
|
||||
this.otherTypeFound = null;
|
||||
this.dateOfCollection = null;
|
||||
}
|
||||
|
||||
private static Map<String, Pair<String, String>> datasources = new HashMap<String, Pair<String, String>>() {
|
||||
|
@ -221,6 +226,14 @@ public class PublicationToOaf implements Serializable {
|
|||
|
||||
final String typeValue = typologiesMapping.get(type).get("value");
|
||||
cobjValue = typologiesMapping.get(type).get("cobj");
|
||||
// this dataset must contain only publication
|
||||
if (cobjValue.equals("0020")) {
|
||||
if (otherTypeFound != null) {
|
||||
otherTypeFound.add(1);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
final Instance instance = new Instance();
|
||||
|
||||
// Adding hostedby
|
||||
|
|
|
@ -31,5 +31,13 @@
|
|||
"annotation": {"cobj":"0018", "value": "Annotation"},
|
||||
"physical-object": {"cobj":"0028", "value": "PhysicalObject"},
|
||||
"preprint": {"cobj":"0016", "value": "Preprint"},
|
||||
"software": {"cobj":"0029", "value": "Software"}
|
||||
"software": {"cobj":"0029", "value": "Software"},
|
||||
"journal-issue": {"cobj":"0001", "value": "Article"},
|
||||
"translation": {"cobj":"0038", "value": "Other literature type"},
|
||||
"artistic-performance": {"cobj":"0020", "value": "Other ORP type"},
|
||||
"online-resource": {"cobj":"0020", "value": "Other ORP type"},
|
||||
"registered-copyright": {"cobj":"0020", "value": "Other ORP type"},
|
||||
"trademark": {"cobj":"0020", "value": "Other ORP type"},
|
||||
"invention": {"cobj":"0020", "value": "Other ORP type"},
|
||||
"spin-off-company": {"cobj":"0020", "value": "Other ORP type"}
|
||||
}
|
Loading…
Reference in New Issue