forked from D-Net/dnet-hadoop
added result typologies
This commit is contained in:
parent
ca1800510a
commit
bd3b16402b
|
@ -8,7 +8,6 @@ import java.util.List;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
|
||||||
import eu.dnetlib.doiboost.orcid.util.HDFSUtil;
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.apache.hadoop.io.Text;
|
import org.apache.hadoop.io.Text;
|
||||||
|
@ -38,6 +37,7 @@ import eu.dnetlib.dhp.schema.orcid.AuthorSummary;
|
||||||
import eu.dnetlib.dhp.schema.orcid.Work;
|
import eu.dnetlib.dhp.schema.orcid.Work;
|
||||||
import eu.dnetlib.dhp.schema.orcid.WorkDetail;
|
import eu.dnetlib.dhp.schema.orcid.WorkDetail;
|
||||||
import eu.dnetlib.doiboost.orcid.json.JsonHelper;
|
import eu.dnetlib.doiboost.orcid.json.JsonHelper;
|
||||||
|
import eu.dnetlib.doiboost.orcid.util.HDFSUtil;
|
||||||
import eu.dnetlib.doiboost.orcidnodoi.oaf.PublicationToOaf;
|
import eu.dnetlib.doiboost.orcidnodoi.oaf.PublicationToOaf;
|
||||||
import eu.dnetlib.doiboost.orcidnodoi.similarity.AuthorMatcher;
|
import eu.dnetlib.doiboost.orcidnodoi.similarity.AuthorMatcher;
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
|
@ -137,6 +137,8 @@ public class SparkGenEnrichedOrcidWorks {
|
||||||
.sparkContext()
|
.sparkContext()
|
||||||
.longAccumulator("errorsNotFoundAuthors");
|
.longAccumulator("errorsNotFoundAuthors");
|
||||||
final LongAccumulator errorsInvalidType = spark.sparkContext().longAccumulator("errorsInvalidType");
|
final LongAccumulator errorsInvalidType = spark.sparkContext().longAccumulator("errorsInvalidType");
|
||||||
|
final LongAccumulator otherTypeFound = spark.sparkContext().longAccumulator("otherTypeFound");
|
||||||
|
|
||||||
final PublicationToOaf publicationToOaf = new PublicationToOaf(
|
final PublicationToOaf publicationToOaf = new PublicationToOaf(
|
||||||
parsedPublications,
|
parsedPublications,
|
||||||
enrichedPublications,
|
enrichedPublications,
|
||||||
|
@ -144,6 +146,7 @@ public class SparkGenEnrichedOrcidWorks {
|
||||||
errorsInvalidTitle,
|
errorsInvalidTitle,
|
||||||
errorsNotFoundAuthors,
|
errorsNotFoundAuthors,
|
||||||
errorsInvalidType,
|
errorsInvalidType,
|
||||||
|
otherTypeFound,
|
||||||
dateOfCollection);
|
dateOfCollection);
|
||||||
JavaRDD<Publication> oafPublicationRDD = enrichedWorksRDD
|
JavaRDD<Publication> oafPublicationRDD = enrichedWorksRDD
|
||||||
.map(
|
.map(
|
||||||
|
@ -173,6 +176,7 @@ public class SparkGenEnrichedOrcidWorks {
|
||||||
logger.info("errorsInvalidTitle: " + errorsInvalidTitle.value().toString());
|
logger.info("errorsInvalidTitle: " + errorsInvalidTitle.value().toString());
|
||||||
logger.info("errorsNotFoundAuthors: " + errorsNotFoundAuthors.value().toString());
|
logger.info("errorsNotFoundAuthors: " + errorsNotFoundAuthors.value().toString());
|
||||||
logger.info("errorsInvalidType: " + errorsInvalidType.value().toString());
|
logger.info("errorsInvalidType: " + errorsInvalidType.value().toString());
|
||||||
|
logger.info("otherTypeFound: " + otherTypeFound.value().toString());
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -43,6 +43,7 @@ public class PublicationToOaf implements Serializable {
|
||||||
private final LongAccumulator errorsInvalidTitle;
|
private final LongAccumulator errorsInvalidTitle;
|
||||||
private final LongAccumulator errorsNotFoundAuthors;
|
private final LongAccumulator errorsNotFoundAuthors;
|
||||||
private final LongAccumulator errorsInvalidType;
|
private final LongAccumulator errorsInvalidType;
|
||||||
|
private final LongAccumulator otherTypeFound;
|
||||||
|
|
||||||
public PublicationToOaf(
|
public PublicationToOaf(
|
||||||
LongAccumulator parsedPublications,
|
LongAccumulator parsedPublications,
|
||||||
|
@ -51,6 +52,7 @@ public class PublicationToOaf implements Serializable {
|
||||||
LongAccumulator errorsInvalidTitle,
|
LongAccumulator errorsInvalidTitle,
|
||||||
LongAccumulator errorsNotFoundAuthors,
|
LongAccumulator errorsNotFoundAuthors,
|
||||||
LongAccumulator errorsInvalidType,
|
LongAccumulator errorsInvalidType,
|
||||||
|
LongAccumulator otherTypeFound,
|
||||||
String dateOfCollection) {
|
String dateOfCollection) {
|
||||||
this.parsedPublications = parsedPublications;
|
this.parsedPublications = parsedPublications;
|
||||||
this.enrichedPublications = enrichedPublications;
|
this.enrichedPublications = enrichedPublications;
|
||||||
|
@ -58,6 +60,7 @@ public class PublicationToOaf implements Serializable {
|
||||||
this.errorsInvalidTitle = errorsInvalidTitle;
|
this.errorsInvalidTitle = errorsInvalidTitle;
|
||||||
this.errorsNotFoundAuthors = errorsNotFoundAuthors;
|
this.errorsNotFoundAuthors = errorsNotFoundAuthors;
|
||||||
this.errorsInvalidType = errorsInvalidType;
|
this.errorsInvalidType = errorsInvalidType;
|
||||||
|
this.otherTypeFound = otherTypeFound;
|
||||||
this.dateOfCollection = dateOfCollection;
|
this.dateOfCollection = dateOfCollection;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -68,6 +71,8 @@ public class PublicationToOaf implements Serializable {
|
||||||
this.errorsInvalidTitle = null;
|
this.errorsInvalidTitle = null;
|
||||||
this.errorsNotFoundAuthors = null;
|
this.errorsNotFoundAuthors = null;
|
||||||
this.errorsInvalidType = null;
|
this.errorsInvalidType = null;
|
||||||
|
this.otherTypeFound = null;
|
||||||
|
this.dateOfCollection = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Map<String, Pair<String, String>> datasources = new HashMap<String, Pair<String, String>>() {
|
private static Map<String, Pair<String, String>> datasources = new HashMap<String, Pair<String, String>>() {
|
||||||
|
@ -221,6 +226,14 @@ public class PublicationToOaf implements Serializable {
|
||||||
|
|
||||||
final String typeValue = typologiesMapping.get(type).get("value");
|
final String typeValue = typologiesMapping.get(type).get("value");
|
||||||
cobjValue = typologiesMapping.get(type).get("cobj");
|
cobjValue = typologiesMapping.get(type).get("cobj");
|
||||||
|
// this dataset must contain only publication
|
||||||
|
if (cobjValue.equals("0020")) {
|
||||||
|
if (otherTypeFound != null) {
|
||||||
|
otherTypeFound.add(1);
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
final Instance instance = new Instance();
|
final Instance instance = new Instance();
|
||||||
|
|
||||||
// Adding hostedby
|
// Adding hostedby
|
||||||
|
|
|
@ -31,5 +31,13 @@
|
||||||
"annotation": {"cobj":"0018", "value": "Annotation"},
|
"annotation": {"cobj":"0018", "value": "Annotation"},
|
||||||
"physical-object": {"cobj":"0028", "value": "PhysicalObject"},
|
"physical-object": {"cobj":"0028", "value": "PhysicalObject"},
|
||||||
"preprint": {"cobj":"0016", "value": "Preprint"},
|
"preprint": {"cobj":"0016", "value": "Preprint"},
|
||||||
"software": {"cobj":"0029", "value": "Software"}
|
"software": {"cobj":"0029", "value": "Software"},
|
||||||
|
"journal-issue": {"cobj":"0001", "value": "Article"},
|
||||||
|
"translation": {"cobj":"0038", "value": "Other literature type"},
|
||||||
|
"artistic-performance": {"cobj":"0020", "value": "Other ORP type"},
|
||||||
|
"online-resource": {"cobj":"0020", "value": "Other ORP type"},
|
||||||
|
"registered-copyright": {"cobj":"0020", "value": "Other ORP type"},
|
||||||
|
"trademark": {"cobj":"0020", "value": "Other ORP type"},
|
||||||
|
"invention": {"cobj":"0020", "value": "Other ORP type"},
|
||||||
|
"spin-off-company": {"cobj":"0020", "value": "Other ORP type"}
|
||||||
}
|
}
|
Loading…
Reference in New Issue