forked from D-Net/dnet-hadoop
merge with upstream
This commit is contained in:
commit
5dccbe13db
|
@ -0,0 +1,40 @@
|
||||||
|
package eu.dnetlib.dhp.schema.common;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||||
|
|
||||||
|
public class ModelConstants {
|
||||||
|
|
||||||
|
public static final String DNET_RESULT_TYPOLOGIES = "dnet:result_typologies";
|
||||||
|
|
||||||
|
public static final String DATASET_RESULTTYPE_CLASSID = "dataset";
|
||||||
|
public static final String PUBLICATION_RESULTTYPE_CLASSID = "publication";
|
||||||
|
public static final String SOFTWARE_RESULTTYPE_CLASSID = "software";
|
||||||
|
public static final String ORP_RESULTTYPE_CLASSID = "other";
|
||||||
|
|
||||||
|
public static Qualifier PUBLICATION_DEFAULT_RESULTTYPE = new Qualifier();
|
||||||
|
public static Qualifier DATASET_DEFAULT_RESULTTYPE = new Qualifier();
|
||||||
|
public static Qualifier SOFTWARE_DEFAULT_RESULTTYPE = new Qualifier();
|
||||||
|
public static Qualifier ORP_DEFAULT_RESULTTYPE = new Qualifier();
|
||||||
|
|
||||||
|
static {
|
||||||
|
PUBLICATION_DEFAULT_RESULTTYPE.setClassid(PUBLICATION_RESULTTYPE_CLASSID);
|
||||||
|
PUBLICATION_DEFAULT_RESULTTYPE.setClassname(PUBLICATION_RESULTTYPE_CLASSID);
|
||||||
|
PUBLICATION_DEFAULT_RESULTTYPE.setSchemeid(DNET_RESULT_TYPOLOGIES);
|
||||||
|
PUBLICATION_DEFAULT_RESULTTYPE.setSchemename(DNET_RESULT_TYPOLOGIES);
|
||||||
|
|
||||||
|
DATASET_DEFAULT_RESULTTYPE.setClassid(DATASET_RESULTTYPE_CLASSID);
|
||||||
|
DATASET_DEFAULT_RESULTTYPE.setClassname(DATASET_RESULTTYPE_CLASSID);
|
||||||
|
DATASET_DEFAULT_RESULTTYPE.setSchemeid(DNET_RESULT_TYPOLOGIES);
|
||||||
|
DATASET_DEFAULT_RESULTTYPE.setSchemename(DNET_RESULT_TYPOLOGIES);
|
||||||
|
|
||||||
|
SOFTWARE_DEFAULT_RESULTTYPE.setClassid(SOFTWARE_RESULTTYPE_CLASSID);
|
||||||
|
SOFTWARE_DEFAULT_RESULTTYPE.setClassname(SOFTWARE_RESULTTYPE_CLASSID);
|
||||||
|
SOFTWARE_DEFAULT_RESULTTYPE.setSchemeid(DNET_RESULT_TYPOLOGIES);
|
||||||
|
SOFTWARE_DEFAULT_RESULTTYPE.setSchemename(DNET_RESULT_TYPOLOGIES);
|
||||||
|
|
||||||
|
ORP_DEFAULT_RESULTTYPE.setClassid(ORP_RESULTTYPE_CLASSID);
|
||||||
|
ORP_DEFAULT_RESULTTYPE.setClassname(ORP_RESULTTYPE_CLASSID);
|
||||||
|
ORP_DEFAULT_RESULTTYPE.setSchemeid(DNET_RESULT_TYPOLOGIES);
|
||||||
|
ORP_DEFAULT_RESULTTYPE.setSchemename(DNET_RESULT_TYPOLOGIES);
|
||||||
|
}
|
||||||
|
}
|
|
@ -3,6 +3,8 @@ package eu.dnetlib.dhp.schema.common;
|
||||||
import com.google.common.collect.Maps;
|
import com.google.common.collect.Maps;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.function.Function;
|
||||||
|
|
||||||
/** Oaf model utility methods. */
|
/** Oaf model utility methods. */
|
||||||
public class ModelSupport {
|
public class ModelSupport {
|
||||||
|
@ -146,4 +148,66 @@ public class ModelSupport {
|
||||||
entityMapping.get(EntityType.valueOf(sourceType)).name(),
|
entityMapping.get(EntityType.valueOf(sourceType)).name(),
|
||||||
entityMapping.get(EntityType.valueOf(targetType)).name());
|
entityMapping.get(EntityType.valueOf(targetType)).name());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static <T extends Oaf> Function<T, String> idFn() {
|
||||||
|
return x -> {
|
||||||
|
if (isSubClass(x, Relation.class)) {
|
||||||
|
return idFnForRelation(x);
|
||||||
|
}
|
||||||
|
return idFnForOafEntity(x);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
private static <T extends Oaf> String idFnForRelation(T t) {
|
||||||
|
Relation r = (Relation) t;
|
||||||
|
return Optional.ofNullable(r.getSource())
|
||||||
|
.map(
|
||||||
|
source ->
|
||||||
|
Optional.ofNullable(r.getTarget())
|
||||||
|
.map(
|
||||||
|
target ->
|
||||||
|
Optional.ofNullable(r.getRelType())
|
||||||
|
.map(
|
||||||
|
relType ->
|
||||||
|
Optional.ofNullable(
|
||||||
|
r
|
||||||
|
.getSubRelType())
|
||||||
|
.map(
|
||||||
|
subRelType ->
|
||||||
|
Optional
|
||||||
|
.ofNullable(
|
||||||
|
r
|
||||||
|
.getRelClass())
|
||||||
|
.map(
|
||||||
|
relClass ->
|
||||||
|
String
|
||||||
|
.join(
|
||||||
|
source,
|
||||||
|
target,
|
||||||
|
relType,
|
||||||
|
subRelType,
|
||||||
|
relClass))
|
||||||
|
.orElse(
|
||||||
|
String
|
||||||
|
.join(
|
||||||
|
source,
|
||||||
|
target,
|
||||||
|
relType,
|
||||||
|
subRelType)))
|
||||||
|
.orElse(
|
||||||
|
String
|
||||||
|
.join(
|
||||||
|
source,
|
||||||
|
target,
|
||||||
|
relType)))
|
||||||
|
.orElse(
|
||||||
|
String.join(
|
||||||
|
source, target)))
|
||||||
|
.orElse(source))
|
||||||
|
.orElse(null);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static <T extends Oaf> String idFnForOafEntity(T t) {
|
||||||
|
return ((OafEntity) t).getId();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
package eu.dnetlib.dhp.schema.oaf;
|
package eu.dnetlib.dhp.schema.oaf;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Objects;
|
|
||||||
|
|
||||||
public class Dataset extends Result implements Serializable {
|
public class Dataset extends Result implements Serializable {
|
||||||
|
|
||||||
|
@ -20,6 +20,10 @@ public class Dataset extends Result implements Serializable {
|
||||||
|
|
||||||
private List<GeoLocation> geolocation;
|
private List<GeoLocation> geolocation;
|
||||||
|
|
||||||
|
public Dataset() {
|
||||||
|
setResulttype(ModelConstants.DATASET_DEFAULT_RESULTTYPE);
|
||||||
|
}
|
||||||
|
|
||||||
public Field<String> getStoragedate() {
|
public Field<String> getStoragedate() {
|
||||||
return storagedate;
|
return storagedate;
|
||||||
}
|
}
|
||||||
|
@ -111,32 +115,4 @@ public class Dataset extends Result implements Serializable {
|
||||||
|
|
||||||
mergeOAFDataInfo(d);
|
mergeOAFDataInfo(d);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean equals(Object o) {
|
|
||||||
if (this == o) return true;
|
|
||||||
if (o == null || getClass() != o.getClass()) return false;
|
|
||||||
if (!super.equals(o)) return false;
|
|
||||||
Dataset dataset = (Dataset) o;
|
|
||||||
return Objects.equals(storagedate, dataset.storagedate)
|
|
||||||
&& Objects.equals(device, dataset.device)
|
|
||||||
&& Objects.equals(size, dataset.size)
|
|
||||||
&& Objects.equals(version, dataset.version)
|
|
||||||
&& Objects.equals(lastmetadataupdate, dataset.lastmetadataupdate)
|
|
||||||
&& Objects.equals(metadataversionnumber, dataset.metadataversionnumber)
|
|
||||||
&& Objects.equals(geolocation, dataset.geolocation);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int hashCode() {
|
|
||||||
return Objects.hash(
|
|
||||||
super.hashCode(),
|
|
||||||
storagedate,
|
|
||||||
device,
|
|
||||||
size,
|
|
||||||
version,
|
|
||||||
lastmetadataupdate,
|
|
||||||
metadataversionnumber,
|
|
||||||
geolocation);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,7 +2,6 @@ package eu.dnetlib.dhp.schema.oaf;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Objects;
|
|
||||||
|
|
||||||
public class Datasource extends OafEntity implements Serializable {
|
public class Datasource extends OafEntity implements Serializable {
|
||||||
|
|
||||||
|
@ -512,88 +511,4 @@ public class Datasource extends OafEntity implements Serializable {
|
||||||
|
|
||||||
mergeOAFDataInfo(e);
|
mergeOAFDataInfo(e);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean equals(Object o) {
|
|
||||||
if (this == o) return true;
|
|
||||||
if (o == null || getClass() != o.getClass()) return false;
|
|
||||||
if (!super.equals(o)) return false;
|
|
||||||
Datasource that = (Datasource) o;
|
|
||||||
return Objects.equals(datasourcetype, that.datasourcetype)
|
|
||||||
&& Objects.equals(openairecompatibility, that.openairecompatibility)
|
|
||||||
&& Objects.equals(officialname, that.officialname)
|
|
||||||
&& Objects.equals(englishname, that.englishname)
|
|
||||||
&& Objects.equals(websiteurl, that.websiteurl)
|
|
||||||
&& Objects.equals(logourl, that.logourl)
|
|
||||||
&& Objects.equals(contactemail, that.contactemail)
|
|
||||||
&& Objects.equals(namespaceprefix, that.namespaceprefix)
|
|
||||||
&& Objects.equals(latitude, that.latitude)
|
|
||||||
&& Objects.equals(longitude, that.longitude)
|
|
||||||
&& Objects.equals(dateofvalidation, that.dateofvalidation)
|
|
||||||
&& Objects.equals(description, that.description)
|
|
||||||
&& Objects.equals(subjects, that.subjects)
|
|
||||||
&& Objects.equals(odnumberofitems, that.odnumberofitems)
|
|
||||||
&& Objects.equals(odnumberofitemsdate, that.odnumberofitemsdate)
|
|
||||||
&& Objects.equals(odpolicies, that.odpolicies)
|
|
||||||
&& Objects.equals(odlanguages, that.odlanguages)
|
|
||||||
&& Objects.equals(odcontenttypes, that.odcontenttypes)
|
|
||||||
&& Objects.equals(accessinfopackage, that.accessinfopackage)
|
|
||||||
&& Objects.equals(releasestartdate, that.releasestartdate)
|
|
||||||
&& Objects.equals(releaseenddate, that.releaseenddate)
|
|
||||||
&& Objects.equals(missionstatementurl, that.missionstatementurl)
|
|
||||||
&& Objects.equals(dataprovider, that.dataprovider)
|
|
||||||
&& Objects.equals(serviceprovider, that.serviceprovider)
|
|
||||||
&& Objects.equals(databaseaccesstype, that.databaseaccesstype)
|
|
||||||
&& Objects.equals(datauploadtype, that.datauploadtype)
|
|
||||||
&& Objects.equals(databaseaccessrestriction, that.databaseaccessrestriction)
|
|
||||||
&& Objects.equals(datauploadrestriction, that.datauploadrestriction)
|
|
||||||
&& Objects.equals(versioning, that.versioning)
|
|
||||||
&& Objects.equals(citationguidelineurl, that.citationguidelineurl)
|
|
||||||
&& Objects.equals(qualitymanagementkind, that.qualitymanagementkind)
|
|
||||||
&& Objects.equals(pidsystems, that.pidsystems)
|
|
||||||
&& Objects.equals(certificates, that.certificates)
|
|
||||||
&& Objects.equals(policies, that.policies)
|
|
||||||
&& Objects.equals(journal, that.journal);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int hashCode() {
|
|
||||||
return Objects.hash(
|
|
||||||
super.hashCode(),
|
|
||||||
datasourcetype,
|
|
||||||
openairecompatibility,
|
|
||||||
officialname,
|
|
||||||
englishname,
|
|
||||||
websiteurl,
|
|
||||||
logourl,
|
|
||||||
contactemail,
|
|
||||||
namespaceprefix,
|
|
||||||
latitude,
|
|
||||||
longitude,
|
|
||||||
dateofvalidation,
|
|
||||||
description,
|
|
||||||
subjects,
|
|
||||||
odnumberofitems,
|
|
||||||
odnumberofitemsdate,
|
|
||||||
odpolicies,
|
|
||||||
odlanguages,
|
|
||||||
odcontenttypes,
|
|
||||||
accessinfopackage,
|
|
||||||
releasestartdate,
|
|
||||||
releaseenddate,
|
|
||||||
missionstatementurl,
|
|
||||||
dataprovider,
|
|
||||||
serviceprovider,
|
|
||||||
databaseaccesstype,
|
|
||||||
datauploadtype,
|
|
||||||
databaseaccessrestriction,
|
|
||||||
datauploadrestriction,
|
|
||||||
versioning,
|
|
||||||
citationguidelineurl,
|
|
||||||
qualitymanagementkind,
|
|
||||||
pidsystems,
|
|
||||||
certificates,
|
|
||||||
policies,
|
|
||||||
journal);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -113,27 +113,11 @@ public abstract class OafEntity extends Oaf implements Serializable {
|
||||||
if (o == null || getClass() != o.getClass()) return false;
|
if (o == null || getClass() != o.getClass()) return false;
|
||||||
if (!super.equals(o)) return false;
|
if (!super.equals(o)) return false;
|
||||||
OafEntity oafEntity = (OafEntity) o;
|
OafEntity oafEntity = (OafEntity) o;
|
||||||
return Objects.equals(id, oafEntity.id)
|
return Objects.equals(id, oafEntity.id);
|
||||||
&& Objects.equals(originalId, oafEntity.originalId)
|
|
||||||
&& Objects.equals(collectedfrom, oafEntity.collectedfrom)
|
|
||||||
&& Objects.equals(pid, oafEntity.pid)
|
|
||||||
&& Objects.equals(dateofcollection, oafEntity.dateofcollection)
|
|
||||||
&& Objects.equals(dateoftransformation, oafEntity.dateoftransformation)
|
|
||||||
&& Objects.equals(extraInfo, oafEntity.extraInfo)
|
|
||||||
&& Objects.equals(oaiprovenance, oafEntity.oaiprovenance);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
return Objects.hash(
|
return Objects.hash(super.hashCode(), id);
|
||||||
super.hashCode(),
|
|
||||||
id,
|
|
||||||
originalId,
|
|
||||||
collectedfrom,
|
|
||||||
pid,
|
|
||||||
dateofcollection,
|
|
||||||
dateoftransformation,
|
|
||||||
extraInfo,
|
|
||||||
oaiprovenance);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,7 +2,6 @@ package eu.dnetlib.dhp.schema.oaf;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Objects;
|
|
||||||
|
|
||||||
public class Organization extends OafEntity implements Serializable {
|
public class Organization extends OafEntity implements Serializable {
|
||||||
|
|
||||||
|
@ -233,52 +232,4 @@ public class Organization extends OafEntity implements Serializable {
|
||||||
country = o.getCountry() != null && compareTrust(this, e) < 0 ? o.getCountry() : country;
|
country = o.getCountry() != null && compareTrust(this, e) < 0 ? o.getCountry() : country;
|
||||||
mergeOAFDataInfo(o);
|
mergeOAFDataInfo(o);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean equals(Object o) {
|
|
||||||
if (this == o) return true;
|
|
||||||
if (o == null || getClass() != o.getClass()) return false;
|
|
||||||
if (!super.equals(o)) return false;
|
|
||||||
Organization that = (Organization) o;
|
|
||||||
return Objects.equals(legalshortname, that.legalshortname)
|
|
||||||
&& Objects.equals(legalname, that.legalname)
|
|
||||||
&& Objects.equals(alternativeNames, that.alternativeNames)
|
|
||||||
&& Objects.equals(websiteurl, that.websiteurl)
|
|
||||||
&& Objects.equals(logourl, that.logourl)
|
|
||||||
&& Objects.equals(eclegalbody, that.eclegalbody)
|
|
||||||
&& Objects.equals(eclegalperson, that.eclegalperson)
|
|
||||||
&& Objects.equals(ecnonprofit, that.ecnonprofit)
|
|
||||||
&& Objects.equals(ecresearchorganization, that.ecresearchorganization)
|
|
||||||
&& Objects.equals(echighereducation, that.echighereducation)
|
|
||||||
&& Objects.equals(
|
|
||||||
ecinternationalorganizationeurinterests,
|
|
||||||
that.ecinternationalorganizationeurinterests)
|
|
||||||
&& Objects.equals(ecinternationalorganization, that.ecinternationalorganization)
|
|
||||||
&& Objects.equals(ecenterprise, that.ecenterprise)
|
|
||||||
&& Objects.equals(ecsmevalidated, that.ecsmevalidated)
|
|
||||||
&& Objects.equals(ecnutscode, that.ecnutscode)
|
|
||||||
&& Objects.equals(country, that.country);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int hashCode() {
|
|
||||||
return Objects.hash(
|
|
||||||
super.hashCode(),
|
|
||||||
legalshortname,
|
|
||||||
legalname,
|
|
||||||
alternativeNames,
|
|
||||||
websiteurl,
|
|
||||||
logourl,
|
|
||||||
eclegalbody,
|
|
||||||
eclegalperson,
|
|
||||||
ecnonprofit,
|
|
||||||
ecresearchorganization,
|
|
||||||
echighereducation,
|
|
||||||
ecinternationalorganizationeurinterests,
|
|
||||||
ecinternationalorganization,
|
|
||||||
ecenterprise,
|
|
||||||
ecsmevalidated,
|
|
||||||
ecnutscode,
|
|
||||||
country);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
package eu.dnetlib.dhp.schema.oaf;
|
package eu.dnetlib.dhp.schema.oaf;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Objects;
|
|
||||||
|
|
||||||
public class OtherResearchProduct extends Result implements Serializable {
|
public class OtherResearchProduct extends Result implements Serializable {
|
||||||
|
|
||||||
|
@ -12,6 +12,10 @@ public class OtherResearchProduct extends Result implements Serializable {
|
||||||
|
|
||||||
private List<Field<String>> tool;
|
private List<Field<String>> tool;
|
||||||
|
|
||||||
|
public OtherResearchProduct() {
|
||||||
|
setResulttype(ModelConstants.ORP_DEFAULT_RESULTTYPE);
|
||||||
|
}
|
||||||
|
|
||||||
public List<Field<String>> getContactperson() {
|
public List<Field<String>> getContactperson() {
|
||||||
return contactperson;
|
return contactperson;
|
||||||
}
|
}
|
||||||
|
@ -51,20 +55,4 @@ public class OtherResearchProduct extends Result implements Serializable {
|
||||||
tool = mergeLists(tool, o.getTool());
|
tool = mergeLists(tool, o.getTool());
|
||||||
mergeOAFDataInfo(e);
|
mergeOAFDataInfo(e);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean equals(Object o) {
|
|
||||||
if (this == o) return true;
|
|
||||||
if (o == null || getClass() != o.getClass()) return false;
|
|
||||||
if (!super.equals(o)) return false;
|
|
||||||
OtherResearchProduct that = (OtherResearchProduct) o;
|
|
||||||
return Objects.equals(contactperson, that.contactperson)
|
|
||||||
&& Objects.equals(contactgroup, that.contactgroup)
|
|
||||||
&& Objects.equals(tool, that.tool);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int hashCode() {
|
|
||||||
return Objects.hash(super.hashCode(), contactperson, contactgroup, tool);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,7 +2,6 @@ package eu.dnetlib.dhp.schema.oaf;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Objects;
|
|
||||||
|
|
||||||
public class Project extends OafEntity implements Serializable {
|
public class Project extends OafEntity implements Serializable {
|
||||||
|
|
||||||
|
@ -352,70 +351,4 @@ public class Project extends OafEntity implements Serializable {
|
||||||
: fundedamount;
|
: fundedamount;
|
||||||
mergeOAFDataInfo(e);
|
mergeOAFDataInfo(e);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean equals(Object o) {
|
|
||||||
if (this == o) return true;
|
|
||||||
if (o == null || getClass() != o.getClass()) return false;
|
|
||||||
if (!super.equals(o)) return false;
|
|
||||||
Project project = (Project) o;
|
|
||||||
return Objects.equals(websiteurl, project.websiteurl)
|
|
||||||
&& Objects.equals(code, project.code)
|
|
||||||
&& Objects.equals(acronym, project.acronym)
|
|
||||||
&& Objects.equals(title, project.title)
|
|
||||||
&& Objects.equals(startdate, project.startdate)
|
|
||||||
&& Objects.equals(enddate, project.enddate)
|
|
||||||
&& Objects.equals(callidentifier, project.callidentifier)
|
|
||||||
&& Objects.equals(keywords, project.keywords)
|
|
||||||
&& Objects.equals(duration, project.duration)
|
|
||||||
&& Objects.equals(ecsc39, project.ecsc39)
|
|
||||||
&& Objects.equals(oamandatepublications, project.oamandatepublications)
|
|
||||||
&& Objects.equals(ecarticle29_3, project.ecarticle29_3)
|
|
||||||
&& Objects.equals(subjects, project.subjects)
|
|
||||||
&& Objects.equals(fundingtree, project.fundingtree)
|
|
||||||
&& Objects.equals(contracttype, project.contracttype)
|
|
||||||
&& Objects.equals(optional1, project.optional1)
|
|
||||||
&& Objects.equals(optional2, project.optional2)
|
|
||||||
&& Objects.equals(jsonextrainfo, project.jsonextrainfo)
|
|
||||||
&& Objects.equals(contactfullname, project.contactfullname)
|
|
||||||
&& Objects.equals(contactfax, project.contactfax)
|
|
||||||
&& Objects.equals(contactphone, project.contactphone)
|
|
||||||
&& Objects.equals(contactemail, project.contactemail)
|
|
||||||
&& Objects.equals(summary, project.summary)
|
|
||||||
&& Objects.equals(currency, project.currency)
|
|
||||||
&& Objects.equals(totalcost, project.totalcost)
|
|
||||||
&& Objects.equals(fundedamount, project.fundedamount);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int hashCode() {
|
|
||||||
return Objects.hash(
|
|
||||||
super.hashCode(),
|
|
||||||
websiteurl,
|
|
||||||
code,
|
|
||||||
acronym,
|
|
||||||
title,
|
|
||||||
startdate,
|
|
||||||
enddate,
|
|
||||||
callidentifier,
|
|
||||||
keywords,
|
|
||||||
duration,
|
|
||||||
ecsc39,
|
|
||||||
oamandatepublications,
|
|
||||||
ecarticle29_3,
|
|
||||||
subjects,
|
|
||||||
fundingtree,
|
|
||||||
contracttype,
|
|
||||||
optional1,
|
|
||||||
optional2,
|
|
||||||
jsonextrainfo,
|
|
||||||
contactfullname,
|
|
||||||
contactfax,
|
|
||||||
contactphone,
|
|
||||||
contactemail,
|
|
||||||
summary,
|
|
||||||
currency,
|
|
||||||
totalcost,
|
|
||||||
fundedamount);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,13 +1,17 @@
|
||||||
package eu.dnetlib.dhp.schema.oaf;
|
package eu.dnetlib.dhp.schema.oaf;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.Objects;
|
|
||||||
|
|
||||||
public class Publication extends Result implements Serializable {
|
public class Publication extends Result implements Serializable {
|
||||||
|
|
||||||
// publication specific
|
// publication specific
|
||||||
private Journal journal;
|
private Journal journal;
|
||||||
|
|
||||||
|
public Publication() {
|
||||||
|
setResulttype(ModelConstants.PUBLICATION_DEFAULT_RESULTTYPE);
|
||||||
|
}
|
||||||
|
|
||||||
public Journal getJournal() {
|
public Journal getJournal() {
|
||||||
return journal;
|
return journal;
|
||||||
}
|
}
|
||||||
|
@ -29,18 +33,4 @@ public class Publication extends Result implements Serializable {
|
||||||
if (p.getJournal() != null && compareTrust(this, e) < 0) journal = p.getJournal();
|
if (p.getJournal() != null && compareTrust(this, e) < 0) journal = p.getJournal();
|
||||||
mergeOAFDataInfo(e);
|
mergeOAFDataInfo(e);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean equals(Object o) {
|
|
||||||
if (this == o) return true;
|
|
||||||
if (o == null || getClass() != o.getClass()) return false;
|
|
||||||
if (!super.equals(o)) return false;
|
|
||||||
Publication that = (Publication) o;
|
|
||||||
return Objects.equals(journal, that.journal);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int hashCode() {
|
|
||||||
return Objects.hash(super.hashCode(), journal);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,7 +3,6 @@ package eu.dnetlib.dhp.schema.oaf;
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Objects;
|
|
||||||
|
|
||||||
public class Result extends OafEntity implements Serializable {
|
public class Result extends OafEntity implements Serializable {
|
||||||
|
|
||||||
|
@ -231,6 +230,9 @@ public class Result extends OafEntity implements Serializable {
|
||||||
|
|
||||||
instance = mergeLists(instance, r.getInstance());
|
instance = mergeLists(instance, r.getInstance());
|
||||||
|
|
||||||
|
if (r.getBestaccessright() != null && compareTrust(this, r) < 0)
|
||||||
|
bestaccessright = r.getBestaccessright();
|
||||||
|
|
||||||
if (r.getResulttype() != null && compareTrust(this, r) < 0) resulttype = r.getResulttype();
|
if (r.getResulttype() != null && compareTrust(this, r) < 0) resulttype = r.getResulttype();
|
||||||
|
|
||||||
if (r.getLanguage() != null && compareTrust(this, r) < 0) language = r.getLanguage();
|
if (r.getLanguage() != null && compareTrust(this, r) < 0) language = r.getLanguage();
|
||||||
|
@ -286,60 +288,4 @@ public class Result extends OafEntity implements Serializable {
|
||||||
}
|
}
|
||||||
return a.size() > b.size() ? a : b;
|
return a.size() > b.size() ? a : b;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean equals(Object o) {
|
|
||||||
if (this == o) return true;
|
|
||||||
if (o == null || getClass() != o.getClass()) return false;
|
|
||||||
if (!super.equals(o)) return false;
|
|
||||||
Result result = (Result) o;
|
|
||||||
return Objects.equals(author, result.author)
|
|
||||||
&& Objects.equals(resulttype, result.resulttype)
|
|
||||||
&& Objects.equals(language, result.language)
|
|
||||||
&& Objects.equals(country, result.country)
|
|
||||||
&& Objects.equals(subject, result.subject)
|
|
||||||
&& Objects.equals(title, result.title)
|
|
||||||
&& Objects.equals(relevantdate, result.relevantdate)
|
|
||||||
&& Objects.equals(description, result.description)
|
|
||||||
&& Objects.equals(dateofacceptance, result.dateofacceptance)
|
|
||||||
&& Objects.equals(publisher, result.publisher)
|
|
||||||
&& Objects.equals(embargoenddate, result.embargoenddate)
|
|
||||||
&& Objects.equals(source, result.source)
|
|
||||||
&& Objects.equals(fulltext, result.fulltext)
|
|
||||||
&& Objects.equals(format, result.format)
|
|
||||||
&& Objects.equals(contributor, result.contributor)
|
|
||||||
&& Objects.equals(resourcetype, result.resourcetype)
|
|
||||||
&& Objects.equals(coverage, result.coverage)
|
|
||||||
&& Objects.equals(bestaccessright, result.bestaccessright)
|
|
||||||
&& Objects.equals(context, result.context)
|
|
||||||
&& Objects.equals(externalReference, result.externalReference)
|
|
||||||
&& Objects.equals(instance, result.instance);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int hashCode() {
|
|
||||||
return Objects.hash(
|
|
||||||
super.hashCode(),
|
|
||||||
author,
|
|
||||||
resulttype,
|
|
||||||
language,
|
|
||||||
country,
|
|
||||||
subject,
|
|
||||||
title,
|
|
||||||
relevantdate,
|
|
||||||
description,
|
|
||||||
dateofacceptance,
|
|
||||||
publisher,
|
|
||||||
embargoenddate,
|
|
||||||
source,
|
|
||||||
fulltext,
|
|
||||||
format,
|
|
||||||
contributor,
|
|
||||||
resourcetype,
|
|
||||||
coverage,
|
|
||||||
bestaccessright,
|
|
||||||
context,
|
|
||||||
externalReference,
|
|
||||||
instance);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
package eu.dnetlib.dhp.schema.oaf;
|
package eu.dnetlib.dhp.schema.oaf;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Objects;
|
|
||||||
|
|
||||||
public class Software extends Result implements Serializable {
|
public class Software extends Result implements Serializable {
|
||||||
|
|
||||||
|
@ -14,6 +14,10 @@ public class Software extends Result implements Serializable {
|
||||||
|
|
||||||
private Qualifier programmingLanguage;
|
private Qualifier programmingLanguage;
|
||||||
|
|
||||||
|
public Software() {
|
||||||
|
setResulttype(ModelConstants.SOFTWARE_DEFAULT_RESULTTYPE);
|
||||||
|
}
|
||||||
|
|
||||||
public List<Field<String>> getDocumentationUrl() {
|
public List<Field<String>> getDocumentationUrl() {
|
||||||
return documentationUrl;
|
return documentationUrl;
|
||||||
}
|
}
|
||||||
|
@ -71,26 +75,4 @@ public class Software extends Result implements Serializable {
|
||||||
|
|
||||||
mergeOAFDataInfo(e);
|
mergeOAFDataInfo(e);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean equals(Object o) {
|
|
||||||
if (this == o) return true;
|
|
||||||
if (o == null || getClass() != o.getClass()) return false;
|
|
||||||
if (!super.equals(o)) return false;
|
|
||||||
Software software = (Software) o;
|
|
||||||
return Objects.equals(documentationUrl, software.documentationUrl)
|
|
||||||
&& Objects.equals(license, software.license)
|
|
||||||
&& Objects.equals(codeRepositoryUrl, software.codeRepositoryUrl)
|
|
||||||
&& Objects.equals(programmingLanguage, software.programmingLanguage);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int hashCode() {
|
|
||||||
return Objects.hash(
|
|
||||||
super.hashCode(),
|
|
||||||
documentationUrl,
|
|
||||||
license,
|
|
||||||
codeRepositoryUrl,
|
|
||||||
programmingLanguage);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -166,10 +166,8 @@ public class PromoteActionPayloadForGraphTableJob {
|
||||||
actionPayloadClazz.getSimpleName(),
|
actionPayloadClazz.getSimpleName(),
|
||||||
rowClazz.getSimpleName());
|
rowClazz.getSimpleName());
|
||||||
|
|
||||||
SerializableSupplier<Function<G, String>> rowIdFn =
|
SerializableSupplier<Function<G, String>> rowIdFn = ModelSupport::idFn;
|
||||||
PromoteActionPayloadForGraphTableJob::idFn;
|
SerializableSupplier<Function<A, String>> actionPayloadIdFn = ModelSupport::idFn;
|
||||||
SerializableSupplier<Function<A, String>> actionPayloadIdFn =
|
|
||||||
PromoteActionPayloadForGraphTableJob::idFn;
|
|
||||||
SerializableSupplier<BiFunction<G, A, G>> mergeRowWithActionPayloadAndGetFn =
|
SerializableSupplier<BiFunction<G, A, G>> mergeRowWithActionPayloadAndGetFn =
|
||||||
MergeAndGet.functionFor(strategy);
|
MergeAndGet.functionFor(strategy);
|
||||||
SerializableSupplier<BiFunction<G, G, G>> mergeRowsAndGetFn =
|
SerializableSupplier<BiFunction<G, G, G>> mergeRowsAndGetFn =
|
||||||
|
@ -192,68 +190,6 @@ public class PromoteActionPayloadForGraphTableJob {
|
||||||
joinedAndMerged, rowIdFn, mergeRowsAndGetFn, zeroFn, isNotZeroFn, rowClazz);
|
joinedAndMerged, rowIdFn, mergeRowsAndGetFn, zeroFn, isNotZeroFn, rowClazz);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static <T extends Oaf> Function<T, String> idFn() {
|
|
||||||
return x -> {
|
|
||||||
if (isSubClass(x, Relation.class)) {
|
|
||||||
return idFnForRelation(x);
|
|
||||||
}
|
|
||||||
return idFnForOafEntity(x);
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
private static <T extends Oaf> String idFnForRelation(T t) {
|
|
||||||
Relation r = (Relation) t;
|
|
||||||
return Optional.ofNullable(r.getSource())
|
|
||||||
.map(
|
|
||||||
source ->
|
|
||||||
Optional.ofNullable(r.getTarget())
|
|
||||||
.map(
|
|
||||||
target ->
|
|
||||||
Optional.ofNullable(r.getRelType())
|
|
||||||
.map(
|
|
||||||
relType ->
|
|
||||||
Optional.ofNullable(
|
|
||||||
r
|
|
||||||
.getSubRelType())
|
|
||||||
.map(
|
|
||||||
subRelType ->
|
|
||||||
Optional
|
|
||||||
.ofNullable(
|
|
||||||
r
|
|
||||||
.getRelClass())
|
|
||||||
.map(
|
|
||||||
relClass ->
|
|
||||||
String
|
|
||||||
.join(
|
|
||||||
source,
|
|
||||||
target,
|
|
||||||
relType,
|
|
||||||
subRelType,
|
|
||||||
relClass))
|
|
||||||
.orElse(
|
|
||||||
String
|
|
||||||
.join(
|
|
||||||
source,
|
|
||||||
target,
|
|
||||||
relType,
|
|
||||||
subRelType)))
|
|
||||||
.orElse(
|
|
||||||
String
|
|
||||||
.join(
|
|
||||||
source,
|
|
||||||
target,
|
|
||||||
relType)))
|
|
||||||
.orElse(
|
|
||||||
String.join(
|
|
||||||
source, target)))
|
|
||||||
.orElse(source))
|
|
||||||
.orElse(null);
|
|
||||||
}
|
|
||||||
|
|
||||||
private static <T extends Oaf> String idFnForOafEntity(T t) {
|
|
||||||
return ((OafEntity) t).getId();
|
|
||||||
}
|
|
||||||
|
|
||||||
private static <T extends Oaf> SerializableSupplier<T> zeroFn(Class<T> clazz) {
|
private static <T extends Oaf> SerializableSupplier<T> zeroFn(Class<T> clazz) {
|
||||||
switch (clazz.getCanonicalName()) {
|
switch (clazz.getCanonicalName()) {
|
||||||
case "eu.dnetlib.dhp.schema.oaf.Dataset":
|
case "eu.dnetlib.dhp.schema.oaf.Dataset":
|
||||||
|
|
|
@ -25,6 +25,7 @@ public class DedupRecordFactory {
|
||||||
|
|
||||||
public static <T extends OafEntity> Dataset<T> createDedupRecord(
|
public static <T extends OafEntity> Dataset<T> createDedupRecord(
|
||||||
final SparkSession spark,
|
final SparkSession spark,
|
||||||
|
final DataInfo dataInfo,
|
||||||
final String mergeRelsInputPath,
|
final String mergeRelsInputPath,
|
||||||
final String entitiesInputPath,
|
final String entitiesInputPath,
|
||||||
final Class<T> clazz) {
|
final Class<T> clazz) {
|
||||||
|
@ -67,41 +68,39 @@ public class DedupRecordFactory {
|
||||||
Encoders.STRING())
|
Encoders.STRING())
|
||||||
.mapGroups(
|
.mapGroups(
|
||||||
(MapGroupsFunction<String, Tuple2<String, T>, T>)
|
(MapGroupsFunction<String, Tuple2<String, T>, T>)
|
||||||
(key, values) -> entityMerger(key, values, ts, clazz),
|
(key, values) -> entityMerger(key, values, ts, dataInfo),
|
||||||
Encoders.bean(clazz));
|
Encoders.bean(clazz));
|
||||||
}
|
}
|
||||||
|
|
||||||
private static <T extends OafEntity> T entityMerger(
|
private static <T extends OafEntity> T entityMerger(
|
||||||
String id, Iterator<Tuple2<String, T>> entities, long ts, Class<T> clazz) {
|
String id, Iterator<Tuple2<String, T>> entities, long ts, DataInfo dataInfo) {
|
||||||
try {
|
|
||||||
T entity = clazz.newInstance();
|
|
||||||
entity.setId(id);
|
|
||||||
entity.setDataInfo(new DataInfo());
|
|
||||||
entity.getDataInfo().setTrust("0.9");
|
|
||||||
entity.setLastupdatetimestamp(ts);
|
|
||||||
|
|
||||||
final Collection<String> dates = Lists.newArrayList();
|
T entity = entities.next()._2();
|
||||||
entities.forEachRemaining(
|
|
||||||
t -> {
|
|
||||||
T duplicate = t._2();
|
|
||||||
entity.mergeFrom(duplicate);
|
|
||||||
if (ModelSupport.isSubClass(duplicate, Result.class)) {
|
|
||||||
Result r1 = (Result) duplicate;
|
|
||||||
Result er = (Result) entity;
|
|
||||||
er.setAuthor(DedupUtility.mergeAuthor(er.getAuthor(), r1.getAuthor()));
|
|
||||||
|
|
||||||
if (er.getDateofacceptance() != null) {
|
final Collection<String> dates = Lists.newArrayList();
|
||||||
dates.add(r1.getDateofacceptance().getValue());
|
entities.forEachRemaining(
|
||||||
}
|
t -> {
|
||||||
|
T duplicate = t._2();
|
||||||
|
entity.mergeFrom(duplicate);
|
||||||
|
if (ModelSupport.isSubClass(duplicate, Result.class)) {
|
||||||
|
Result r1 = (Result) duplicate;
|
||||||
|
Result er = (Result) entity;
|
||||||
|
er.setAuthor(DedupUtility.mergeAuthor(er.getAuthor(), r1.getAuthor()));
|
||||||
|
|
||||||
|
if (r1.getDateofacceptance() != null) {
|
||||||
|
dates.add(r1.getDateofacceptance().getValue());
|
||||||
}
|
}
|
||||||
});
|
}
|
||||||
|
});
|
||||||
|
|
||||||
if (ModelSupport.isSubClass(entity, Result.class)) {
|
if (ModelSupport.isSubClass(entity, Result.class)) {
|
||||||
((Result) entity).setDateofacceptance(DatePicker.pick(dates));
|
((Result) entity).setDateofacceptance(DatePicker.pick(dates));
|
||||||
}
|
|
||||||
return entity;
|
|
||||||
} catch (IllegalAccessException | InstantiationException e) {
|
|
||||||
throw new RuntimeException(e);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
entity.setId(id);
|
||||||
|
entity.setLastupdatetimestamp(ts);
|
||||||
|
entity.setDataInfo(dataInfo);
|
||||||
|
|
||||||
|
return entity;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,7 +3,9 @@ package eu.dnetlib.dhp.oa.dedup;
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.schema.common.EntityType;
|
import eu.dnetlib.dhp.schema.common.EntityType;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.DataInfo;
|
||||||
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||||
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
|
@ -21,6 +23,10 @@ public class SparkCreateDedupRecord extends AbstractSparkAction {
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(SparkCreateDedupRecord.class);
|
private static final Logger log = LoggerFactory.getLogger(SparkCreateDedupRecord.class);
|
||||||
|
|
||||||
|
public static final String ROOT_TRUST = "0.8";
|
||||||
|
public static final String PROVENANCE_ACTION_CLASS = "sysimport:dedup";
|
||||||
|
public static final String PROVENANCE_ACTIONS = "dnet:provenanceActions";
|
||||||
|
|
||||||
public SparkCreateDedupRecord(ArgumentApplicationParser parser, SparkSession spark) {
|
public SparkCreateDedupRecord(ArgumentApplicationParser parser, SparkSession spark) {
|
||||||
super(parser, spark);
|
super(parser, spark);
|
||||||
}
|
}
|
||||||
|
@ -67,13 +73,30 @@ public class SparkCreateDedupRecord extends AbstractSparkAction {
|
||||||
DedupUtility.createMergeRelPath(workingPath, actionSetId, subEntity);
|
DedupUtility.createMergeRelPath(workingPath, actionSetId, subEntity);
|
||||||
final String entityPath = DedupUtility.createEntityPath(graphBasePath, subEntity);
|
final String entityPath = DedupUtility.createEntityPath(graphBasePath, subEntity);
|
||||||
|
|
||||||
Class<OafEntity> clazz = ModelSupport.entityTypes.get(EntityType.valueOf(subEntity));
|
final Class<OafEntity> clazz =
|
||||||
|
ModelSupport.entityTypes.get(EntityType.valueOf(subEntity));
|
||||||
DedupRecordFactory.createDedupRecord(spark, mergeRelPath, entityPath, clazz)
|
final DataInfo dataInfo = getDataInfo(dedupConf);
|
||||||
|
DedupRecordFactory.createDedupRecord(spark, dataInfo, mergeRelPath, entityPath, clazz)
|
||||||
.write()
|
.write()
|
||||||
.mode(SaveMode.Overwrite)
|
.mode(SaveMode.Overwrite)
|
||||||
.option("compression", "gzip")
|
.option("compression", "gzip")
|
||||||
.json(outputPath);
|
.json(outputPath);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static DataInfo getDataInfo(DedupConfig dedupConf) {
|
||||||
|
DataInfo info = new DataInfo();
|
||||||
|
info.setDeletedbyinference(false);
|
||||||
|
info.setInferred(true);
|
||||||
|
info.setInvisible(false);
|
||||||
|
info.setTrust(ROOT_TRUST);
|
||||||
|
info.setInferenceprovenance(dedupConf.getWf().getConfigurationId());
|
||||||
|
Qualifier provenance = new Qualifier();
|
||||||
|
provenance.setClassid(PROVENANCE_ACTION_CLASS);
|
||||||
|
provenance.setClassname(PROVENANCE_ACTION_CLASS);
|
||||||
|
provenance.setSchemeid(PROVENANCE_ACTIONS);
|
||||||
|
provenance.setSchemename(PROVENANCE_ACTIONS);
|
||||||
|
info.setProvenanceaction(provenance);
|
||||||
|
return info;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,7 +25,7 @@ public class ConnectedComponent implements Serializable {
|
||||||
if (docIds.size() > 1) {
|
if (docIds.size() > 1) {
|
||||||
final String s = getMin();
|
final String s = getMin();
|
||||||
String prefix = s.split("\\|")[0];
|
String prefix = s.split("\\|")[0];
|
||||||
ccId = prefix + "|dedup_______::" + DedupUtility.md5(s);
|
ccId = prefix + "|dedup_wf_001::" + DedupUtility.md5(s);
|
||||||
return ccId;
|
return ccId;
|
||||||
} else {
|
} else {
|
||||||
return docIds.iterator().next();
|
return docIds.iterator().next();
|
||||||
|
|
|
@ -57,7 +57,6 @@ public class SparkDedupTest implements Serializable {
|
||||||
.toURI())
|
.toURI())
|
||||||
.toFile()
|
.toFile()
|
||||||
.getAbsolutePath();
|
.getAbsolutePath();
|
||||||
|
|
||||||
testOutputBasePath =
|
testOutputBasePath =
|
||||||
createTempDirectory(SparkDedupTest.class.getSimpleName() + "-")
|
createTempDirectory(SparkDedupTest.class.getSimpleName() + "-")
|
||||||
.toAbsolutePath()
|
.toAbsolutePath()
|
||||||
|
@ -110,6 +109,22 @@ public class SparkDedupTest implements Serializable {
|
||||||
IOUtils.toString(
|
IOUtils.toString(
|
||||||
SparkDedupTest.class.getResourceAsStream(
|
SparkDedupTest.class.getResourceAsStream(
|
||||||
"/eu/dnetlib/dhp/dedup/conf/sw.curr.conf.json")));
|
"/eu/dnetlib/dhp/dedup/conf/sw.curr.conf.json")));
|
||||||
|
|
||||||
|
lenient()
|
||||||
|
.when(isLookUpService.getResourceProfileByQuery(Mockito.contains("dataset")))
|
||||||
|
.thenReturn(
|
||||||
|
IOUtils.toString(
|
||||||
|
SparkDedupTest.class.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/dedup/conf/ds.curr.conf.json")));
|
||||||
|
|
||||||
|
lenient()
|
||||||
|
.when(
|
||||||
|
isLookUpService.getResourceProfileByQuery(
|
||||||
|
Mockito.contains("otherresearchproduct")))
|
||||||
|
.thenReturn(
|
||||||
|
IOUtils.toString(
|
||||||
|
SparkDedupTest.class.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/dedup/conf/orp.curr.conf.json")));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -144,9 +159,25 @@ public class SparkDedupTest implements Serializable {
|
||||||
.load(testOutputBasePath + "/" + testActionSetId + "/software_simrel")
|
.load(testOutputBasePath + "/" + testActionSetId + "/software_simrel")
|
||||||
.count();
|
.count();
|
||||||
|
|
||||||
|
long ds_simrel =
|
||||||
|
spark.read()
|
||||||
|
.load(testOutputBasePath + "/" + testActionSetId + "/dataset_simrel")
|
||||||
|
.count();
|
||||||
|
|
||||||
|
long orp_simrel =
|
||||||
|
spark.read()
|
||||||
|
.load(
|
||||||
|
testOutputBasePath
|
||||||
|
+ "/"
|
||||||
|
+ testActionSetId
|
||||||
|
+ "/otherresearchproduct_simrel")
|
||||||
|
.count();
|
||||||
|
|
||||||
assertEquals(3432, orgs_simrel);
|
assertEquals(3432, orgs_simrel);
|
||||||
assertEquals(7260, pubs_simrel);
|
assertEquals(7260, pubs_simrel);
|
||||||
assertEquals(344, sw_simrel);
|
assertEquals(344, sw_simrel);
|
||||||
|
assertEquals(458, ds_simrel);
|
||||||
|
assertEquals(6740, orp_simrel);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -181,9 +212,25 @@ public class SparkDedupTest implements Serializable {
|
||||||
.load(testOutputBasePath + "/" + testActionSetId + "/software_mergerel")
|
.load(testOutputBasePath + "/" + testActionSetId + "/software_mergerel")
|
||||||
.count();
|
.count();
|
||||||
|
|
||||||
|
long ds_mergerel =
|
||||||
|
spark.read()
|
||||||
|
.load(testOutputBasePath + "/" + testActionSetId + "/dataset_mergerel")
|
||||||
|
.count();
|
||||||
|
|
||||||
|
long orp_mergerel =
|
||||||
|
spark.read()
|
||||||
|
.load(
|
||||||
|
testOutputBasePath
|
||||||
|
+ "/"
|
||||||
|
+ testActionSetId
|
||||||
|
+ "/otherresearchproduct_mergerel")
|
||||||
|
.count();
|
||||||
|
|
||||||
assertEquals(1276, orgs_mergerel);
|
assertEquals(1276, orgs_mergerel);
|
||||||
assertEquals(1460, pubs_mergerel);
|
assertEquals(1460, pubs_mergerel);
|
||||||
assertEquals(288, sw_mergerel);
|
assertEquals(288, sw_mergerel);
|
||||||
|
assertEquals(472, ds_mergerel);
|
||||||
|
assertEquals(714, orp_mergerel);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -222,10 +269,22 @@ public class SparkDedupTest implements Serializable {
|
||||||
long sw_deduprecord =
|
long sw_deduprecord =
|
||||||
jsc.textFile(testOutputBasePath + "/" + testActionSetId + "/software_deduprecord")
|
jsc.textFile(testOutputBasePath + "/" + testActionSetId + "/software_deduprecord")
|
||||||
.count();
|
.count();
|
||||||
|
long ds_deduprecord =
|
||||||
|
jsc.textFile(testOutputBasePath + "/" + testActionSetId + "/dataset_deduprecord")
|
||||||
|
.count();
|
||||||
|
long orp_deduprecord =
|
||||||
|
jsc.textFile(
|
||||||
|
testOutputBasePath
|
||||||
|
+ "/"
|
||||||
|
+ testActionSetId
|
||||||
|
+ "/otherresearchproduct_deduprecord")
|
||||||
|
.count();
|
||||||
|
|
||||||
assertEquals(82, orgs_deduprecord);
|
assertEquals(82, orgs_deduprecord);
|
||||||
assertEquals(66, pubs_deduprecord);
|
assertEquals(66, pubs_deduprecord);
|
||||||
assertEquals(51, sw_deduprecord);
|
assertEquals(51, sw_deduprecord);
|
||||||
|
assertEquals(96, ds_deduprecord);
|
||||||
|
assertEquals(89, orp_deduprecord);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -251,6 +310,9 @@ public class SparkDedupTest implements Serializable {
|
||||||
long projects = jsc.textFile(testDedupGraphBasePath + "/project").count();
|
long projects = jsc.textFile(testDedupGraphBasePath + "/project").count();
|
||||||
long datasource = jsc.textFile(testDedupGraphBasePath + "/datasource").count();
|
long datasource = jsc.textFile(testDedupGraphBasePath + "/datasource").count();
|
||||||
long softwares = jsc.textFile(testDedupGraphBasePath + "/software").count();
|
long softwares = jsc.textFile(testDedupGraphBasePath + "/software").count();
|
||||||
|
long dataset = jsc.textFile(testDedupGraphBasePath + "/dataset").count();
|
||||||
|
long otherresearchproduct =
|
||||||
|
jsc.textFile(testDedupGraphBasePath + "/otherresearchproduct").count();
|
||||||
|
|
||||||
long mergedOrgs =
|
long mergedOrgs =
|
||||||
spark.read()
|
spark.read()
|
||||||
|
@ -282,11 +344,37 @@ public class SparkDedupTest implements Serializable {
|
||||||
.distinct()
|
.distinct()
|
||||||
.count();
|
.count();
|
||||||
|
|
||||||
|
long mergedDs =
|
||||||
|
spark.read()
|
||||||
|
.load(testOutputBasePath + "/" + testActionSetId + "/dataset_mergerel")
|
||||||
|
.as(Encoders.bean(Relation.class))
|
||||||
|
.where("relClass=='merges'")
|
||||||
|
.javaRDD()
|
||||||
|
.map(Relation::getTarget)
|
||||||
|
.distinct()
|
||||||
|
.count();
|
||||||
|
|
||||||
|
long mergedOrp =
|
||||||
|
spark.read()
|
||||||
|
.load(
|
||||||
|
testOutputBasePath
|
||||||
|
+ "/"
|
||||||
|
+ testActionSetId
|
||||||
|
+ "/otherresearchproduct_mergerel")
|
||||||
|
.as(Encoders.bean(Relation.class))
|
||||||
|
.where("relClass=='merges'")
|
||||||
|
.javaRDD()
|
||||||
|
.map(Relation::getTarget)
|
||||||
|
.distinct()
|
||||||
|
.count();
|
||||||
|
|
||||||
assertEquals(897, publications);
|
assertEquals(897, publications);
|
||||||
assertEquals(835, organizations);
|
assertEquals(835, organizations);
|
||||||
assertEquals(100, projects);
|
assertEquals(100, projects);
|
||||||
assertEquals(100, datasource);
|
assertEquals(100, datasource);
|
||||||
assertEquals(200, softwares);
|
assertEquals(200, softwares);
|
||||||
|
assertEquals(388, dataset);
|
||||||
|
assertEquals(517, otherresearchproduct);
|
||||||
|
|
||||||
long deletedOrgs =
|
long deletedOrgs =
|
||||||
jsc.textFile(testDedupGraphBasePath + "/organization")
|
jsc.textFile(testDedupGraphBasePath + "/organization")
|
||||||
|
@ -303,9 +391,21 @@ public class SparkDedupTest implements Serializable {
|
||||||
.filter(this::isDeletedByInference)
|
.filter(this::isDeletedByInference)
|
||||||
.count();
|
.count();
|
||||||
|
|
||||||
|
long deletedDs =
|
||||||
|
jsc.textFile(testDedupGraphBasePath + "/dataset")
|
||||||
|
.filter(this::isDeletedByInference)
|
||||||
|
.count();
|
||||||
|
|
||||||
|
long deletedOrp =
|
||||||
|
jsc.textFile(testDedupGraphBasePath + "/otherresearchproduct")
|
||||||
|
.filter(this::isDeletedByInference)
|
||||||
|
.count();
|
||||||
|
|
||||||
assertEquals(mergedOrgs, deletedOrgs);
|
assertEquals(mergedOrgs, deletedOrgs);
|
||||||
assertEquals(mergedPubs, deletedPubs);
|
assertEquals(mergedPubs, deletedPubs);
|
||||||
assertEquals(mergedSw, deletedSw);
|
assertEquals(mergedSw, deletedSw);
|
||||||
|
assertEquals(mergedDs, deletedDs);
|
||||||
|
assertEquals(mergedOrp, deletedOrp);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
|
@ -11,7 +11,9 @@
|
||||||
"maxChildren" : "100",
|
"maxChildren" : "100",
|
||||||
"slidingWindowSize" : "200",
|
"slidingWindowSize" : "200",
|
||||||
"rootBuilder" : ["result", "resultProject_outcome_isProducedBy", "resultResult_publicationDataset_isRelatedTo", "resultResult_similarity_isAmongTopNSimilarDocuments", "resultResult_similarity_hasAmongTopNSimilarDocuments", "resultOrganization_affiliation_hasAuthorInstitution", "resultResult_part_hasPart", "resultResult_part_isPartOf", "resultResult_supplement_isSupplementTo", "resultResult_supplement_isSupplementedBy", "resultResult_version_isVersionOf" ],
|
"rootBuilder" : ["result", "resultProject_outcome_isProducedBy", "resultResult_publicationDataset_isRelatedTo", "resultResult_similarity_isAmongTopNSimilarDocuments", "resultResult_similarity_hasAmongTopNSimilarDocuments", "resultOrganization_affiliation_hasAuthorInstitution", "resultResult_part_hasPart", "resultResult_part_isPartOf", "resultResult_supplement_isSupplementTo", "resultResult_supplement_isSupplementedBy", "resultResult_version_isVersionOf" ],
|
||||||
"includeChildren" : "true"
|
"includeChildren" : "true",
|
||||||
|
"idPath" : "$.id",
|
||||||
|
"maxIterations" : 20
|
||||||
},
|
},
|
||||||
"pace" : {
|
"pace" : {
|
||||||
"clustering" : [
|
"clustering" : [
|
||||||
|
@ -70,7 +72,8 @@
|
||||||
"field": "title",
|
"field": "title",
|
||||||
"comparator": "levensteinTitle",
|
"comparator": "levensteinTitle",
|
||||||
"weight": 1.0,
|
"weight": 1.0,
|
||||||
"countIfUndefined": "true"
|
"countIfUndefined": "true",
|
||||||
|
"params": {}
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"threshold": 0.99,
|
"threshold": 0.99,
|
||||||
|
@ -85,7 +88,7 @@
|
||||||
{
|
{
|
||||||
"name" : "doi",
|
"name" : "doi",
|
||||||
"type" : "String",
|
"type" : "String",
|
||||||
"path" : "$.pid[@.qualifier.classid = 'doi'].value"
|
"path" : "$.pid[?(@.qualifier.classid == 'doi')].value"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name" : "pid",
|
"name" : "pid",
|
||||||
|
@ -96,7 +99,7 @@
|
||||||
{
|
{
|
||||||
"name" : "title",
|
"name" : "title",
|
||||||
"type" : "String",
|
"type" : "String",
|
||||||
"path" : "$.title[@.qualifier.classid = 'main title'].value",
|
"path" : "$.title[?(@.qualifier.classid == 'main title')].value",
|
||||||
"length" : 250,
|
"length" : 250,
|
||||||
"size" : 5
|
"size" : 5
|
||||||
},
|
},
|
||||||
|
|
|
@ -11,7 +11,9 @@
|
||||||
"maxChildren" : "100",
|
"maxChildren" : "100",
|
||||||
"slidingWindowSize" : "200",
|
"slidingWindowSize" : "200",
|
||||||
"rootBuilder" : [ "result", "resultProject_outcome_isProducedBy", "resultResult_publicationDataset_isRelatedTo", "resultResult_similarity_isAmongTopNSimilarDocuments", "resultResult_similarity_hasAmongTopNSimilarDocuments", "resultOrganization_affiliation_hasAuthorInstitution", "resultResult_part_hasPart", "resultResult_part_isPartOf", "resultResult_supplement_isSupplementTo", "resultResult_supplement_isSupplementedBy", "resultResult_version_isVersionOf" ],
|
"rootBuilder" : [ "result", "resultProject_outcome_isProducedBy", "resultResult_publicationDataset_isRelatedTo", "resultResult_similarity_isAmongTopNSimilarDocuments", "resultResult_similarity_hasAmongTopNSimilarDocuments", "resultOrganization_affiliation_hasAuthorInstitution", "resultResult_part_hasPart", "resultResult_part_isPartOf", "resultResult_supplement_isSupplementTo", "resultResult_supplement_isSupplementedBy", "resultResult_version_isVersionOf" ],
|
||||||
"includeChildren" : "true"
|
"includeChildren" : "true",
|
||||||
|
"idPath" : "$.id",
|
||||||
|
"maxIterations" : 20
|
||||||
},
|
},
|
||||||
"pace" : {
|
"pace" : {
|
||||||
"clustering" : [
|
"clustering" : [
|
||||||
|
@ -70,7 +72,8 @@
|
||||||
"field": "title",
|
"field": "title",
|
||||||
"comparator": "levensteinTitle",
|
"comparator": "levensteinTitle",
|
||||||
"weight": 1.0,
|
"weight": 1.0,
|
||||||
"countIfUndefined": "true"
|
"countIfUndefined": "true",
|
||||||
|
"params": {}
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"threshold": 0.99,
|
"threshold": 0.99,
|
||||||
|
@ -85,7 +88,7 @@
|
||||||
{
|
{
|
||||||
"name" : "doi",
|
"name" : "doi",
|
||||||
"type" : "String",
|
"type" : "String",
|
||||||
"path" : "$.pid[@.qualifier.classid = 'doi'}].value"
|
"path" : "$.pid[?(@.qualifier.classid == 'doi')].value"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name" : "pid",
|
"name" : "pid",
|
||||||
|
@ -96,7 +99,7 @@
|
||||||
{
|
{
|
||||||
"name" : "title",
|
"name" : "title",
|
||||||
"type" : "String",
|
"type" : "String",
|
||||||
"path" : "$.title[@.qualifier.classid = 'main title'].value",
|
"path" : "$.title[?(@.qualifier.classid == 'main title')].value",
|
||||||
"length" : 250,
|
"length" : 250,
|
||||||
"size" : 5
|
"size" : 5
|
||||||
},
|
},
|
||||||
|
|
|
@ -11,7 +11,9 @@
|
||||||
"maxChildren" : "100",
|
"maxChildren" : "100",
|
||||||
"slidingWindowSize" : "200",
|
"slidingWindowSize" : "200",
|
||||||
"rootBuilder" : [ "result", "resultProject_outcome_isProducedBy", "resultResult_publicationDataset_isRelatedTo", "resultResult_similarity_isAmongTopNSimilarDocuments", "resultResult_similarity_hasAmongTopNSimilarDocuments", "resultOrganization_affiliation_hasAuthorInstitution", "resultResult_part_hasPart", "resultResult_part_isPartOf", "resultResult_supplement_isSupplementTo", "resultResult_supplement_isSupplementedBy", "resultResult_version_isVersionOf" ],
|
"rootBuilder" : [ "result", "resultProject_outcome_isProducedBy", "resultResult_publicationDataset_isRelatedTo", "resultResult_similarity_isAmongTopNSimilarDocuments", "resultResult_similarity_hasAmongTopNSimilarDocuments", "resultOrganization_affiliation_hasAuthorInstitution", "resultResult_part_hasPart", "resultResult_part_isPartOf", "resultResult_supplement_isSupplementTo", "resultResult_supplement_isSupplementedBy", "resultResult_version_isVersionOf" ],
|
||||||
"includeChildren" : "true"
|
"includeChildren" : "true",
|
||||||
|
"idPath" : "$.id",
|
||||||
|
"maxIterations" : 20
|
||||||
},
|
},
|
||||||
"pace" : {
|
"pace" : {
|
||||||
"clustering" : [
|
"clustering" : [
|
||||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -15,6 +15,8 @@
|
||||||
<SCAN id="organization"/>
|
<SCAN id="organization"/>
|
||||||
<SCAN id="publication"/>
|
<SCAN id="publication"/>
|
||||||
<SCAN id="software"/>
|
<SCAN id="software"/>
|
||||||
|
<SCAN id="dataset"/>
|
||||||
|
<SCAN id="otherresearchproduct"/>
|
||||||
</SCAN_SEQUENCE>
|
</SCAN_SEQUENCE>
|
||||||
</DEDUPLICATION>
|
</DEDUPLICATION>
|
||||||
</CONFIGURATION>
|
</CONFIGURATION>
|
||||||
|
|
|
@ -25,7 +25,7 @@ public class ConnectedComponent implements Serializable {
|
||||||
if (docIds.size() > 1) {
|
if (docIds.size() > 1) {
|
||||||
final String s = getMin();
|
final String s = getMin();
|
||||||
String prefix = s.split("\\|")[0];
|
String prefix = s.split("\\|")[0];
|
||||||
ccId = prefix + "|dedup_______::" + DedupUtility.md5(s);
|
ccId = prefix + "|dedup_wf_001::" + DedupUtility.md5(s);
|
||||||
return ccId;
|
return ccId;
|
||||||
} else {
|
} else {
|
||||||
return docIds.iterator().next();
|
return docIds.iterator().next();
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
package eu.dnetlib.dhp.oa.graph;
|
package eu.dnetlib.dhp.oa.graph.hive;
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
|
||||||
|
|
||||||
|
@ -19,6 +19,8 @@ public class GraphHiveImporterJob {
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(GraphHiveImporterJob.class);
|
private static final Logger log = LoggerFactory.getLogger(GraphHiveImporterJob.class);
|
||||||
|
|
||||||
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception {
|
public static void main(String[] args) throws Exception {
|
||||||
|
|
||||||
final ArgumentApplicationParser parser =
|
final ArgumentApplicationParser parser =
|
||||||
|
@ -37,12 +39,12 @@ public class GraphHiveImporterJob {
|
||||||
String inputPath = parser.get("inputPath");
|
String inputPath = parser.get("inputPath");
|
||||||
log.info("inputPath: {}", inputPath);
|
log.info("inputPath: {}", inputPath);
|
||||||
|
|
||||||
String hiveMetastoreUris = parser.get("hiveMetastoreUris");
|
|
||||||
log.info("hiveMetastoreUris: {}", hiveMetastoreUris);
|
|
||||||
|
|
||||||
String hiveDbName = parser.get("hiveDbName");
|
String hiveDbName = parser.get("hiveDbName");
|
||||||
log.info("hiveDbName: {}", hiveDbName);
|
log.info("hiveDbName: {}", hiveDbName);
|
||||||
|
|
||||||
|
String hiveMetastoreUris = parser.get("hiveMetastoreUris");
|
||||||
|
log.info("hiveMetastoreUris: {}", hiveMetastoreUris);
|
||||||
|
|
||||||
SparkConf conf = new SparkConf();
|
SparkConf conf = new SparkConf();
|
||||||
conf.set("hive.metastore.uris", hiveMetastoreUris);
|
conf.set("hive.metastore.uris", hiveMetastoreUris);
|
||||||
|
|
||||||
|
@ -58,13 +60,13 @@ public class GraphHiveImporterJob {
|
||||||
spark.sql(String.format("DROP DATABASE IF EXISTS %s CASCADE", hiveDbName));
|
spark.sql(String.format("DROP DATABASE IF EXISTS %s CASCADE", hiveDbName));
|
||||||
spark.sql(String.format("CREATE DATABASE IF NOT EXISTS %s", hiveDbName));
|
spark.sql(String.format("CREATE DATABASE IF NOT EXISTS %s", hiveDbName));
|
||||||
|
|
||||||
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
// Read the input file and convert it into RDD of serializable object
|
// Read the input file and convert it into RDD of serializable object
|
||||||
ModelSupport.oafTypes.forEach(
|
ModelSupport.oafTypes.forEach(
|
||||||
(name, clazz) ->
|
(name, clazz) ->
|
||||||
spark.createDataset(
|
spark.createDataset(
|
||||||
sc.textFile(inputPath + "/" + name)
|
sc.textFile(inputPath + "/" + name)
|
||||||
.map(s -> new ObjectMapper().readValue(s, clazz))
|
.map(s -> OBJECT_MAPPER.readValue(s, clazz))
|
||||||
.rdd(),
|
.rdd(),
|
||||||
Encoders.bean(clazz))
|
Encoders.bean(clazz))
|
||||||
.write()
|
.write()
|
|
@ -10,7 +10,9 @@ import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.oaiIProvenance;
|
||||||
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.qualifier;
|
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.qualifier;
|
||||||
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty;
|
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.oa.graph.raw.common.MigrationConstants;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Author;
|
import eu.dnetlib.dhp.schema.oaf.Author;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Context;
|
||||||
import eu.dnetlib.dhp.schema.oaf.DataInfo;
|
import eu.dnetlib.dhp.schema.oaf.DataInfo;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Field;
|
import eu.dnetlib.dhp.schema.oaf.Field;
|
||||||
|
@ -46,25 +48,6 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
protected static final Qualifier MAIN_TITLE_QUALIFIER =
|
protected static final Qualifier MAIN_TITLE_QUALIFIER =
|
||||||
qualifier("main title", "main title", "dnet:dataCite_title", "dnet:dataCite_title");
|
qualifier("main title", "main title", "dnet:dataCite_title", "dnet:dataCite_title");
|
||||||
|
|
||||||
protected static final Qualifier PUBLICATION_RESULTTYPE_QUALIFIER =
|
|
||||||
qualifier(
|
|
||||||
"publication",
|
|
||||||
"publication",
|
|
||||||
"dnet:result_typologies",
|
|
||||||
"dnet:result_typologies");
|
|
||||||
protected static final Qualifier DATASET_RESULTTYPE_QUALIFIER =
|
|
||||||
qualifier("dataset", "dataset", "dnet:result_typologies", "dnet:result_typologies");
|
|
||||||
protected static final Qualifier SOFTWARE_RESULTTYPE_QUALIFIER =
|
|
||||||
qualifier("software", "software", "dnet:result_typologies", "dnet:result_typologies");
|
|
||||||
protected static final Qualifier OTHER_RESULTTYPE_QUALIFIER =
|
|
||||||
qualifier("other", "other", "dnet:result_typologies", "dnet:result_typologies");
|
|
||||||
protected static final Qualifier REPOSITORY_QUALIFIER =
|
|
||||||
qualifier(
|
|
||||||
"sysimport:crosswalk:repository",
|
|
||||||
"sysimport:crosswalk:repository",
|
|
||||||
"dnet:provenanceActions",
|
|
||||||
"dnet:provenanceActions");
|
|
||||||
|
|
||||||
protected AbstractMdRecordToOafMapper(final Map<String, String> code2name) {
|
protected AbstractMdRecordToOafMapper(final Map<String, String> code2name) {
|
||||||
this.code2name = code2name;
|
this.code2name = code2name;
|
||||||
}
|
}
|
||||||
|
@ -123,14 +106,14 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
case "publication":
|
case "publication":
|
||||||
final Publication p = new Publication();
|
final Publication p = new Publication();
|
||||||
populateResultFields(p, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp);
|
populateResultFields(p, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp);
|
||||||
p.setResulttype(PUBLICATION_RESULTTYPE_QUALIFIER);
|
p.setResulttype(MigrationConstants.PUBLICATION_RESULTTYPE_QUALIFIER);
|
||||||
p.setJournal(prepareJournal(doc, info));
|
p.setJournal(prepareJournal(doc, info));
|
||||||
oafs.add(p);
|
oafs.add(p);
|
||||||
break;
|
break;
|
||||||
case "dataset":
|
case "dataset":
|
||||||
final Dataset d = new Dataset();
|
final Dataset d = new Dataset();
|
||||||
populateResultFields(d, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp);
|
populateResultFields(d, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp);
|
||||||
d.setResulttype(DATASET_RESULTTYPE_QUALIFIER);
|
d.setResulttype(MigrationConstants.DATASET_RESULTTYPE_QUALIFIER);
|
||||||
d.setStoragedate(prepareDatasetStorageDate(doc, info));
|
d.setStoragedate(prepareDatasetStorageDate(doc, info));
|
||||||
d.setDevice(prepareDatasetDevice(doc, info));
|
d.setDevice(prepareDatasetDevice(doc, info));
|
||||||
d.setSize(prepareDatasetSize(doc, info));
|
d.setSize(prepareDatasetSize(doc, info));
|
||||||
|
@ -143,7 +126,7 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
case "software":
|
case "software":
|
||||||
final Software s = new Software();
|
final Software s = new Software();
|
||||||
populateResultFields(s, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp);
|
populateResultFields(s, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp);
|
||||||
s.setResulttype(SOFTWARE_RESULTTYPE_QUALIFIER);
|
s.setResulttype(MigrationConstants.SOFTWARE_RESULTTYPE_QUALIFIER);
|
||||||
s.setDocumentationUrl(prepareSoftwareDocumentationUrls(doc, info));
|
s.setDocumentationUrl(prepareSoftwareDocumentationUrls(doc, info));
|
||||||
s.setLicense(prepareSoftwareLicenses(doc, info));
|
s.setLicense(prepareSoftwareLicenses(doc, info));
|
||||||
s.setCodeRepositoryUrl(prepareSoftwareCodeRepositoryUrl(doc, info));
|
s.setCodeRepositoryUrl(prepareSoftwareCodeRepositoryUrl(doc, info));
|
||||||
|
@ -154,7 +137,7 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
default:
|
default:
|
||||||
final OtherResearchProduct o = new OtherResearchProduct();
|
final OtherResearchProduct o = new OtherResearchProduct();
|
||||||
populateResultFields(o, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp);
|
populateResultFields(o, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp);
|
||||||
o.setResulttype(OTHER_RESULTTYPE_QUALIFIER);
|
o.setResulttype(MigrationConstants.OTHER_RESULTTYPE_QUALIFIER);
|
||||||
o.setContactperson(prepareOtherResearchProductContactPersons(doc, info));
|
o.setContactperson(prepareOtherResearchProductContactPersons(doc, info));
|
||||||
o.setContactgroup(prepareOtherResearchProductContactGroups(doc, info));
|
o.setContactgroup(prepareOtherResearchProductContactGroups(doc, info));
|
||||||
o.setTool(prepareOtherResearchProductTools(doc, info));
|
o.setTool(prepareOtherResearchProductTools(doc, info));
|
||||||
|
@ -255,11 +238,25 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
r.setContributor(prepareContributors(doc, info));
|
r.setContributor(prepareContributors(doc, info));
|
||||||
r.setResourcetype(prepareResourceType(doc, info));
|
r.setResourcetype(prepareResourceType(doc, info));
|
||||||
r.setCoverage(prepareCoverages(doc, info));
|
r.setCoverage(prepareCoverages(doc, info));
|
||||||
r.setContext(new ArrayList<>()); // NOT PRESENT IN MDSTORES
|
r.setContext(prepareContexts(doc, info));
|
||||||
r.setExternalReference(new ArrayList<>()); // NOT PRESENT IN MDSTORES
|
r.setExternalReference(new ArrayList<>()); // NOT PRESENT IN MDSTORES
|
||||||
r.setInstance(prepareInstances(doc, info, collectedFrom, hostedBy));
|
r.setInstance(prepareInstances(doc, info, collectedFrom, hostedBy));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private List<Context> prepareContexts(final Document doc, final DataInfo info) {
|
||||||
|
final List<Context> list = new ArrayList<>();
|
||||||
|
for (final Object o : doc.selectNodes("//oaf:concept")) {
|
||||||
|
final String cid = ((Node) o).valueOf("@id");
|
||||||
|
if (StringUtils.isNotBlank(cid)) {
|
||||||
|
final Context c = new Context();
|
||||||
|
c.setId(cid);
|
||||||
|
c.setDataInfo(Arrays.asList(info));
|
||||||
|
list.add(c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return list;
|
||||||
|
}
|
||||||
|
|
||||||
protected abstract Qualifier prepareResourceType(Document doc, DataInfo info);
|
protected abstract Qualifier prepareResourceType(Document doc, DataInfo info);
|
||||||
|
|
||||||
protected abstract List<Instance> prepareInstances(
|
protected abstract List<Instance> prepareInstances(
|
||||||
|
@ -433,7 +430,13 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
final Node n = doc.selectSingleNode("//oaf:datainfo");
|
final Node n = doc.selectSingleNode("//oaf:datainfo");
|
||||||
|
|
||||||
if (n == null) {
|
if (n == null) {
|
||||||
return dataInfo(false, null, false, false, REPOSITORY_QUALIFIER, "0.9");
|
return dataInfo(
|
||||||
|
false,
|
||||||
|
null,
|
||||||
|
false,
|
||||||
|
false,
|
||||||
|
MigrationConstants.REPOSITORY_PROVENANCE_ACTIONS,
|
||||||
|
"0.9");
|
||||||
}
|
}
|
||||||
|
|
||||||
final String paClassId = n.valueOf("./oaf:provenanceaction/@classid");
|
final String paClassId = n.valueOf("./oaf:provenanceaction/@classid");
|
||||||
|
|
|
@ -2,16 +2,18 @@ package eu.dnetlib.dhp.oa.graph.raw;
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.common.HdfsSupport;
|
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.apache.hadoop.io.compress.GzipCodec;
|
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.JavaSparkContext;
|
import org.apache.spark.api.java.function.FilterFunction;
|
||||||
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SaveMode;
|
||||||
import org.apache.spark.sql.SparkSession;
|
import org.apache.spark.sql.SparkSession;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
@ -20,8 +22,6 @@ public class DispatchEntitiesApplication {
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(DispatchEntitiesApplication.class);
|
private static final Logger log = LoggerFactory.getLogger(DispatchEntitiesApplication.class);
|
||||||
|
|
||||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
|
||||||
|
|
||||||
public static void main(final String[] args) throws Exception {
|
public static void main(final String[] args) throws Exception {
|
||||||
final ArgumentApplicationParser parser =
|
final ArgumentApplicationParser parser =
|
||||||
new ArgumentApplicationParser(
|
new ArgumentApplicationParser(
|
||||||
|
@ -45,15 +45,9 @@ public class DispatchEntitiesApplication {
|
||||||
isSparkSessionManaged,
|
isSparkSessionManaged,
|
||||||
spark -> {
|
spark -> {
|
||||||
removeOutputDir(spark, targetPath);
|
removeOutputDir(spark, targetPath);
|
||||||
|
ModelSupport.oafTypes
|
||||||
processEntity(spark, Publication.class, sourcePath, targetPath);
|
.values()
|
||||||
processEntity(spark, Dataset.class, sourcePath, targetPath);
|
.forEach(clazz -> processEntity(spark, clazz, sourcePath, targetPath));
|
||||||
processEntity(spark, Software.class, sourcePath, targetPath);
|
|
||||||
processEntity(spark, OtherResearchProduct.class, sourcePath, targetPath);
|
|
||||||
processEntity(spark, Datasource.class, sourcePath, targetPath);
|
|
||||||
processEntity(spark, Organization.class, sourcePath, targetPath);
|
|
||||||
processEntity(spark, Project.class, sourcePath, targetPath);
|
|
||||||
processEntity(spark, Relation.class, sourcePath, targetPath);
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -64,26 +58,18 @@ public class DispatchEntitiesApplication {
|
||||||
final String targetPath) {
|
final String targetPath) {
|
||||||
final String type = clazz.getSimpleName().toLowerCase();
|
final String type = clazz.getSimpleName().toLowerCase();
|
||||||
|
|
||||||
log.info(String.format("Processing entities (%s) in file: %s", type, sourcePath));
|
log.info("Processing entities ({}) in file: {}", type, sourcePath);
|
||||||
|
|
||||||
/*
|
|
||||||
spark.read()
|
spark.read()
|
||||||
.textFile(sourcePath)
|
|
||||||
.filter((FilterFunction<String>) value -> isEntityType(value, type))
|
|
||||||
.map((MapFunction<String, String>) value -> StringUtils.substringAfter(value, "|"), Encoders.STRING())
|
|
||||||
.map((MapFunction<String, T>) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz))
|
|
||||||
.write()
|
|
||||||
.mode(SaveMode.Overwrite)
|
|
||||||
.parquet(targetPath + "/" + type);
|
|
||||||
|
|
||||||
*/
|
|
||||||
|
|
||||||
JavaSparkContext.fromSparkContext(spark.sparkContext())
|
|
||||||
.textFile(sourcePath)
|
.textFile(sourcePath)
|
||||||
.filter(l -> isEntityType(l, type))
|
.filter((FilterFunction<String>) value -> isEntityType(value, type))
|
||||||
.map(l -> StringUtils.substringAfter(l, "|"))
|
.map(
|
||||||
.saveAsTextFile(
|
(MapFunction<String, String>) l -> StringUtils.substringAfter(l, "|"),
|
||||||
targetPath + "/" + type, GzipCodec.class); // use repartition(XXX) ???
|
Encoders.STRING())
|
||||||
|
.write()
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.text(targetPath + "/" + type);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static boolean isEntityType(final String line, final String type) {
|
private static boolean isEntityType(final String line, final String type) {
|
||||||
|
|
|
@ -6,6 +6,7 @@ import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.common.HdfsSupport;
|
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||||
import eu.dnetlib.dhp.oa.graph.raw.common.DbClient;
|
import eu.dnetlib.dhp.oa.graph.raw.common.DbClient;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.sql.SQLException;
|
import java.sql.SQLException;
|
||||||
|
@ -29,6 +30,8 @@ public class GenerateEntitiesApplication {
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(GenerateEntitiesApplication.class);
|
private static final Logger log = LoggerFactory.getLogger(GenerateEntitiesApplication.class);
|
||||||
|
|
||||||
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
public static void main(final String[] args) throws Exception {
|
public static void main(final String[] args) throws Exception {
|
||||||
final ArgumentApplicationParser parser =
|
final ArgumentApplicationParser parser =
|
||||||
new ArgumentApplicationParser(
|
new ArgumentApplicationParser(
|
||||||
|
@ -78,7 +81,7 @@ public class GenerateEntitiesApplication {
|
||||||
log.info("Generate entities from files:");
|
log.info("Generate entities from files:");
|
||||||
existingSourcePaths.forEach(log::info);
|
existingSourcePaths.forEach(log::info);
|
||||||
|
|
||||||
JavaRDD<String> inputRdd = sc.emptyRDD();
|
JavaRDD<Oaf> inputRdd = sc.emptyRDD();
|
||||||
|
|
||||||
for (final String sp : existingSourcePaths) {
|
for (final String sp : existingSourcePaths) {
|
||||||
inputRdd =
|
inputRdd =
|
||||||
|
@ -86,15 +89,29 @@ public class GenerateEntitiesApplication {
|
||||||
sc.sequenceFile(sp, Text.class, Text.class)
|
sc.sequenceFile(sp, Text.class, Text.class)
|
||||||
.map(k -> new Tuple2<>(k._1().toString(), k._2().toString()))
|
.map(k -> new Tuple2<>(k._1().toString(), k._2().toString()))
|
||||||
.map(k -> convertToListOaf(k._1(), k._2(), code2name))
|
.map(k -> convertToListOaf(k._1(), k._2(), code2name))
|
||||||
.flatMap(list -> list.iterator())
|
.flatMap(list -> list.iterator()));
|
||||||
.map(
|
|
||||||
oaf ->
|
|
||||||
oaf.getClass().getSimpleName().toLowerCase()
|
|
||||||
+ "|"
|
|
||||||
+ convertToJson(oaf)));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
inputRdd.saveAsTextFile(targetPath, GzipCodec.class);
|
inputRdd.mapToPair(oaf -> new Tuple2<>(ModelSupport.idFn().apply(oaf), oaf))
|
||||||
|
.reduceByKey((o1, o2) -> merge(o1, o2))
|
||||||
|
.map(Tuple2::_2)
|
||||||
|
.map(
|
||||||
|
oaf ->
|
||||||
|
oaf.getClass().getSimpleName().toLowerCase()
|
||||||
|
+ "|"
|
||||||
|
+ OBJECT_MAPPER.writeValueAsString(oaf))
|
||||||
|
.saveAsTextFile(targetPath, GzipCodec.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Oaf merge(Oaf o1, Oaf o2) {
|
||||||
|
if (ModelSupport.isSubClass(o1, OafEntity.class)) {
|
||||||
|
((OafEntity) o1).mergeFrom((OafEntity) o2);
|
||||||
|
} else if (ModelSupport.isSubClass(o1, Relation.class)) {
|
||||||
|
((Relation) o1).mergeFrom((Relation) o2);
|
||||||
|
} else {
|
||||||
|
throw new RuntimeException("invalid Oaf type:" + o1.getClass().getCanonicalName());
|
||||||
|
}
|
||||||
|
return o1;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static List<Oaf> convertToListOaf(
|
private static List<Oaf> convertToListOaf(
|
||||||
|
@ -120,9 +137,10 @@ public class GenerateEntitiesApplication {
|
||||||
return Arrays.asList(convertFromJson(s, Dataset.class));
|
return Arrays.asList(convertFromJson(s, Dataset.class));
|
||||||
case "software":
|
case "software":
|
||||||
return Arrays.asList(convertFromJson(s, Software.class));
|
return Arrays.asList(convertFromJson(s, Software.class));
|
||||||
case "otherresearchproducts":
|
case "otherresearchproduct":
|
||||||
default:
|
|
||||||
return Arrays.asList(convertFromJson(s, OtherResearchProduct.class));
|
return Arrays.asList(convertFromJson(s, OtherResearchProduct.class));
|
||||||
|
default:
|
||||||
|
throw new RuntimeException("type not managed: " + type.toLowerCase());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -150,17 +168,9 @@ public class GenerateEntitiesApplication {
|
||||||
return map;
|
return map;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static String convertToJson(final Oaf oaf) {
|
|
||||||
try {
|
|
||||||
return new ObjectMapper().writeValueAsString(oaf);
|
|
||||||
} catch (final Exception e) {
|
|
||||||
throw new RuntimeException(e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static Oaf convertFromJson(final String s, final Class<? extends Oaf> clazz) {
|
private static Oaf convertFromJson(final String s, final Class<? extends Oaf> clazz) {
|
||||||
try {
|
try {
|
||||||
return new ObjectMapper().readValue(s, clazz);
|
return OBJECT_MAPPER.readValue(s, clazz);
|
||||||
} catch (final Exception e) {
|
} catch (final Exception e) {
|
||||||
log.error("Error parsing object of class: " + clazz);
|
log.error("Error parsing object of class: " + clazz);
|
||||||
log.error(s);
|
log.error(s);
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
package eu.dnetlib.dhp.oa.graph.raw;
|
package eu.dnetlib.dhp.oa.graph.raw;
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelSupport.isSubClass;
|
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
@ -10,7 +9,6 @@ import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
import java.util.function.Function;
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.JavaSparkContext;
|
import org.apache.spark.api.java.JavaSparkContext;
|
||||||
|
@ -83,7 +81,9 @@ public class MergeClaimsApplication {
|
||||||
readFromPath(spark, rawPath, clazz)
|
readFromPath(spark, rawPath, clazz)
|
||||||
.map(
|
.map(
|
||||||
(MapFunction<T, Tuple2<String, T>>)
|
(MapFunction<T, Tuple2<String, T>>)
|
||||||
value -> new Tuple2<>(idFn().apply(value), value),
|
value ->
|
||||||
|
new Tuple2<>(
|
||||||
|
ModelSupport.idFn().apply(value), value),
|
||||||
Encoders.tuple(Encoders.STRING(), Encoders.kryo(clazz)));
|
Encoders.tuple(Encoders.STRING(), Encoders.kryo(clazz)));
|
||||||
|
|
||||||
final JavaSparkContext jsc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
final JavaSparkContext jsc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
@ -92,14 +92,11 @@ public class MergeClaimsApplication {
|
||||||
.getValue()
|
.getValue()
|
||||||
.map(
|
.map(
|
||||||
(MapFunction<T, Tuple2<String, T>>)
|
(MapFunction<T, Tuple2<String, T>>)
|
||||||
value -> new Tuple2<>(idFn().apply(value), value),
|
value ->
|
||||||
|
new Tuple2<>(
|
||||||
|
ModelSupport.idFn().apply(value), value),
|
||||||
Encoders.tuple(Encoders.STRING(), Encoders.kryo(clazz)));
|
Encoders.tuple(Encoders.STRING(), Encoders.kryo(clazz)));
|
||||||
|
|
||||||
/*
|
|
||||||
Dataset<Tuple2<String, T>> claim = readFromPath(spark, claimPath, clazz)
|
|
||||||
.map((MapFunction<T, Tuple2<String, T>>) value -> new Tuple2<>(idFn().apply(value), value), Encoders.tuple(Encoders.STRING(), Encoders.kryo(clazz)));
|
|
||||||
*/
|
|
||||||
|
|
||||||
raw.joinWith(claim, raw.col("_1").equalTo(claim.col("_1")), "full_outer")
|
raw.joinWith(claim, raw.col("_1").equalTo(claim.col("_1")), "full_outer")
|
||||||
.map(
|
.map(
|
||||||
(MapFunction<Tuple2<Tuple2<String, T>, Tuple2<String, T>>, T>)
|
(MapFunction<Tuple2<Tuple2<String, T>, Tuple2<String, T>>, T>)
|
||||||
|
@ -131,78 +128,12 @@ public class MergeClaimsApplication {
|
||||||
.map(
|
.map(
|
||||||
(MapFunction<String, T>) value -> OBJECT_MAPPER.readValue(value, clazz),
|
(MapFunction<String, T>) value -> OBJECT_MAPPER.readValue(value, clazz),
|
||||||
Encoders.bean(clazz))
|
Encoders.bean(clazz))
|
||||||
.filter((FilterFunction<T>) value -> Objects.nonNull(idFn().apply(value)));
|
.filter(
|
||||||
/*
|
(FilterFunction<T>)
|
||||||
return spark.read()
|
value -> Objects.nonNull(ModelSupport.idFn().apply(value)));
|
||||||
.load(path)
|
|
||||||
.as(Encoders.bean(clazz))
|
|
||||||
.filter((FilterFunction<T>) value -> Objects.nonNull(idFn().apply(value)));
|
|
||||||
*/
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void removeOutputDir(SparkSession spark, String path) {
|
private static void removeOutputDir(SparkSession spark, String path) {
|
||||||
HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
|
HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
|
||||||
}
|
}
|
||||||
|
|
||||||
private static <T extends Oaf> Function<T, String> idFn() {
|
|
||||||
return x -> {
|
|
||||||
if (isSubClass(x, Relation.class)) {
|
|
||||||
return idFnForRelation(x);
|
|
||||||
}
|
|
||||||
return idFnForOafEntity(x);
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
private static <T extends Oaf> String idFnForRelation(T t) {
|
|
||||||
Relation r = (Relation) t;
|
|
||||||
return Optional.ofNullable(r.getSource())
|
|
||||||
.map(
|
|
||||||
source ->
|
|
||||||
Optional.ofNullable(r.getTarget())
|
|
||||||
.map(
|
|
||||||
target ->
|
|
||||||
Optional.ofNullable(r.getRelType())
|
|
||||||
.map(
|
|
||||||
relType ->
|
|
||||||
Optional.ofNullable(
|
|
||||||
r
|
|
||||||
.getSubRelType())
|
|
||||||
.map(
|
|
||||||
subRelType ->
|
|
||||||
Optional
|
|
||||||
.ofNullable(
|
|
||||||
r
|
|
||||||
.getRelClass())
|
|
||||||
.map(
|
|
||||||
relClass ->
|
|
||||||
String
|
|
||||||
.join(
|
|
||||||
source,
|
|
||||||
target,
|
|
||||||
relType,
|
|
||||||
subRelType,
|
|
||||||
relClass))
|
|
||||||
.orElse(
|
|
||||||
String
|
|
||||||
.join(
|
|
||||||
source,
|
|
||||||
target,
|
|
||||||
relType,
|
|
||||||
subRelType)))
|
|
||||||
.orElse(
|
|
||||||
String
|
|
||||||
.join(
|
|
||||||
source,
|
|
||||||
target,
|
|
||||||
relType)))
|
|
||||||
.orElse(
|
|
||||||
String.join(
|
|
||||||
source, target)))
|
|
||||||
.orElse(source))
|
|
||||||
.orElse(null);
|
|
||||||
}
|
|
||||||
|
|
||||||
private static <T extends Oaf> String idFnForOafEntity(T t) {
|
|
||||||
return ((OafEntity) t).getId();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -13,6 +13,7 @@ import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProper
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.oa.graph.raw.common.AbstractMigrationApplication;
|
import eu.dnetlib.dhp.oa.graph.raw.common.AbstractMigrationApplication;
|
||||||
import eu.dnetlib.dhp.oa.graph.raw.common.DbClient;
|
import eu.dnetlib.dhp.oa.graph.raw.common.DbClient;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.raw.common.MigrationConstants;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Context;
|
import eu.dnetlib.dhp.schema.oaf.Context;
|
||||||
import eu.dnetlib.dhp.schema.oaf.DataInfo;
|
import eu.dnetlib.dhp.schema.oaf.DataInfo;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
||||||
|
@ -49,13 +50,6 @@ import org.apache.commons.logging.LogFactory;
|
||||||
public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
|
public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
|
||||||
implements Closeable {
|
implements Closeable {
|
||||||
|
|
||||||
private static final Qualifier ENTITYREGISTRY_PROVENANCE_ACTION =
|
|
||||||
qualifier(
|
|
||||||
"sysimport:crosswalk:entityregistry",
|
|
||||||
"sysimport:crosswalk:entityregistry",
|
|
||||||
"dnet:provenance_actions",
|
|
||||||
"dnet:provenance_actions");
|
|
||||||
|
|
||||||
private static final Log log = LogFactory.getLog(MigrateDbEntitiesApplication.class);
|
private static final Log log = LogFactory.getLog(MigrateDbEntitiesApplication.class);
|
||||||
|
|
||||||
private final DbClient dbClient;
|
private final DbClient dbClient;
|
||||||
|
@ -402,12 +396,16 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
|
||||||
|
|
||||||
if (rs.getString("target_type").equals("dataset")) {
|
if (rs.getString("target_type").equals("dataset")) {
|
||||||
r = new Dataset();
|
r = new Dataset();
|
||||||
|
r.setResulttype(MigrationConstants.DATASET_RESULTTYPE_QUALIFIER);
|
||||||
} else if (rs.getString("target_type").equals("software")) {
|
} else if (rs.getString("target_type").equals("software")) {
|
||||||
r = new Software();
|
r = new Software();
|
||||||
|
r.setResulttype(MigrationConstants.SOFTWARE_RESULTTYPE_QUALIFIER);
|
||||||
} else if (rs.getString("target_type").equals("other")) {
|
} else if (rs.getString("target_type").equals("other")) {
|
||||||
r = new OtherResearchProduct();
|
r = new OtherResearchProduct();
|
||||||
|
r.setResulttype(MigrationConstants.OTHER_RESULTTYPE_QUALIFIER);
|
||||||
} else {
|
} else {
|
||||||
r = new Publication();
|
r = new Publication();
|
||||||
|
r.setResulttype(MigrationConstants.PUBLICATION_RESULTTYPE_QUALIFIER);
|
||||||
}
|
}
|
||||||
r.setId(createOpenaireId(50, rs.getString("target_id"), false));
|
r.setId(createOpenaireId(50, rs.getString("target_id"), false));
|
||||||
r.setLastupdatetimestamp(lastUpdateTimestamp);
|
r.setLastupdatetimestamp(lastUpdateTimestamp);
|
||||||
|
@ -484,7 +482,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
|
||||||
inferenceprovenance,
|
inferenceprovenance,
|
||||||
inferred,
|
inferred,
|
||||||
false,
|
false,
|
||||||
ENTITYREGISTRY_PROVENANCE_ACTION,
|
MigrationConstants.ENTITYREGISTRY_PROVENANCE_ACTION,
|
||||||
trust);
|
trust);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,8 +1,19 @@
|
||||||
package eu.dnetlib.dhp.oa.graph.raw;
|
package eu.dnetlib.dhp.oa.graph.raw;
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.*;
|
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.createOpenaireId;
|
||||||
|
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.field;
|
||||||
|
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.Author;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.DataInfo;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Field;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.GeoLocation;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Instance;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
@ -62,33 +73,44 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
final DataInfo info,
|
final DataInfo info,
|
||||||
final KeyValue collectedfrom,
|
final KeyValue collectedfrom,
|
||||||
final KeyValue hostedby) {
|
final KeyValue hostedby) {
|
||||||
final List<Instance> res = new ArrayList<>();
|
|
||||||
|
final Instance instance = new Instance();
|
||||||
|
instance.setUrl(new ArrayList<>());
|
||||||
|
instance.setInstancetype(
|
||||||
|
prepareQualifier(
|
||||||
|
doc,
|
||||||
|
"//dr:CobjCategory",
|
||||||
|
"dnet:publication_resource",
|
||||||
|
"dnet:publication_resource"));
|
||||||
|
instance.setCollectedfrom(collectedfrom);
|
||||||
|
instance.setHostedby(hostedby);
|
||||||
|
instance.setDateofacceptance(field(doc.valueOf("//oaf:dateAccepted"), info));
|
||||||
|
instance.setDistributionlocation(doc.valueOf("//oaf:distributionlocation"));
|
||||||
|
instance.setAccessright(
|
||||||
|
prepareQualifier(
|
||||||
|
doc, "//oaf:accessrights", "dnet:access_modes", "dnet:access_modes"));
|
||||||
|
instance.setLicense(field(doc.valueOf("//oaf:license"), info));
|
||||||
|
instance.setRefereed(field(doc.valueOf("//oaf:refereed"), info));
|
||||||
|
instance.setProcessingchargeamount(
|
||||||
|
field(doc.valueOf("//oaf:processingchargeamount"), info));
|
||||||
|
instance.setProcessingchargecurrency(
|
||||||
|
field(doc.valueOf("//oaf:processingchargeamount/@currency"), info));
|
||||||
|
|
||||||
for (final Object o :
|
for (final Object o :
|
||||||
doc.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='URL']")) {
|
doc.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='URL']")) {
|
||||||
final Instance instance = new Instance();
|
instance.getUrl().add(((Node) o).getText().trim());
|
||||||
instance.setUrl(Arrays.asList(((Node) o).getText().trim()));
|
|
||||||
instance.setInstancetype(
|
|
||||||
prepareQualifier(
|
|
||||||
doc,
|
|
||||||
"//dr:CobjCategory",
|
|
||||||
"dnet:publication_resource",
|
|
||||||
"dnet:publication_resource"));
|
|
||||||
instance.setCollectedfrom(collectedfrom);
|
|
||||||
instance.setHostedby(hostedby);
|
|
||||||
instance.setDateofacceptance(field(doc.valueOf("//oaf:dateAccepted"), info));
|
|
||||||
instance.setDistributionlocation(doc.valueOf("//oaf:distributionlocation"));
|
|
||||||
instance.setAccessright(
|
|
||||||
prepareQualifier(
|
|
||||||
doc, "//oaf:accessrights", "dnet:access_modes", "dnet:access_modes"));
|
|
||||||
instance.setLicense(field(doc.valueOf("//oaf:license"), info));
|
|
||||||
instance.setRefereed(field(doc.valueOf("//oaf:refereed"), info));
|
|
||||||
instance.setProcessingchargeamount(
|
|
||||||
field(doc.valueOf("//oaf:processingchargeamount"), info));
|
|
||||||
instance.setProcessingchargecurrency(
|
|
||||||
field(doc.valueOf("//oaf:processingchargeamount/@currency"), info));
|
|
||||||
res.add(instance);
|
|
||||||
}
|
}
|
||||||
return res;
|
for (final Object o : doc.selectNodes("//datacite:identifier[@identifierType='URL']")) {
|
||||||
|
instance.getUrl().add(((Node) o).getText().trim());
|
||||||
|
}
|
||||||
|
for (final Object o :
|
||||||
|
doc.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='DOI']")) {
|
||||||
|
instance.getUrl().add("http://dx.doi.org/" + ((Node) o).getText().trim());
|
||||||
|
}
|
||||||
|
for (final Object o : doc.selectNodes("//datacite:identifier[@identifierType='DOI']")) {
|
||||||
|
instance.getUrl().add("http://dx.doi.org/" + ((Node) o).getText().trim());
|
||||||
|
}
|
||||||
|
return Arrays.asList(instance);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -0,0 +1,33 @@
|
||||||
|
package eu.dnetlib.dhp.oa.graph.raw.common;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.qualifier;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||||
|
|
||||||
|
public class MigrationConstants {
|
||||||
|
|
||||||
|
public static final Qualifier PUBLICATION_RESULTTYPE_QUALIFIER =
|
||||||
|
qualifier(
|
||||||
|
"publication",
|
||||||
|
"publication",
|
||||||
|
"dnet:result_typologies",
|
||||||
|
"dnet:result_typologies");
|
||||||
|
public static final Qualifier DATASET_RESULTTYPE_QUALIFIER =
|
||||||
|
qualifier("dataset", "dataset", "dnet:result_typologies", "dnet:result_typologies");
|
||||||
|
public static final Qualifier SOFTWARE_RESULTTYPE_QUALIFIER =
|
||||||
|
qualifier("software", "software", "dnet:result_typologies", "dnet:result_typologies");
|
||||||
|
public static final Qualifier OTHER_RESULTTYPE_QUALIFIER =
|
||||||
|
qualifier("other", "other", "dnet:result_typologies", "dnet:result_typologies");
|
||||||
|
public static final Qualifier REPOSITORY_PROVENANCE_ACTIONS =
|
||||||
|
qualifier(
|
||||||
|
"sysimport:crosswalk:repository",
|
||||||
|
"sysimport:crosswalk:repository",
|
||||||
|
"dnet:provenanceActions",
|
||||||
|
"dnet:provenanceActions");
|
||||||
|
public static final Qualifier ENTITYREGISTRY_PROVENANCE_ACTION =
|
||||||
|
qualifier(
|
||||||
|
"sysimport:crosswalk:entityregistry",
|
||||||
|
"sysimport:crosswalk:entityregistry",
|
||||||
|
"dnet:provenanceActions",
|
||||||
|
"dnet:provenanceActions");
|
||||||
|
}
|
|
@ -12,19 +12,15 @@
|
||||||
<value>true</value>
|
<value>true</value>
|
||||||
</property>
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>oozie.action.sharelib.for.spark</name>
|
<name>hiveMetastoreUris</name>
|
||||||
<value>spark2</value>
|
|
||||||
</property>
|
|
||||||
<property>
|
|
||||||
<name>hive_metastore_uris</name>
|
|
||||||
<value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
|
<value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
|
||||||
</property>
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>hive_jdbc_url</name>
|
<name>hiveJdbcUrl</name>
|
||||||
<value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000</value>
|
<value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000</value>
|
||||||
</property>
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>hive_db_name</name>
|
<name>hiveDbName</name>
|
||||||
<value>openaire</value>
|
<value>openaire</value>
|
||||||
</property>
|
</property>
|
||||||
</configuration>
|
</configuration>
|
|
@ -1,10 +1,10 @@
|
||||||
DROP VIEW IF EXISTS ${hive_db_name}.result;
|
DROP VIEW IF EXISTS ${hiveDbName}.result;
|
||||||
|
|
||||||
CREATE VIEW IF NOT EXISTS result as
|
CREATE VIEW IF NOT EXISTS result as
|
||||||
select id, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, embargoenddate, resourcetype, context, instance from ${hive_db_name}.publication p
|
select id, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, embargoenddate, resourcetype, context, instance from ${hiveDbName}.publication p
|
||||||
union all
|
union all
|
||||||
select id, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, embargoenddate, resourcetype, context, instance from ${hive_db_name}.dataset d
|
select id, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, embargoenddate, resourcetype, context, instance from ${hiveDbName}.dataset d
|
||||||
union all
|
union all
|
||||||
select id, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, embargoenddate, resourcetype, context, instance from ${hive_db_name}.software s
|
select id, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, embargoenddate, resourcetype, context, instance from ${hiveDbName}.software s
|
||||||
union all
|
union all
|
||||||
select id, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, embargoenddate, resourcetype, context, instance from ${hive_db_name}.otherresearchproduct o;
|
select id, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, embargoenddate, resourcetype, context, instance from ${hiveDbName}.otherresearchproduct o;
|
||||||
|
|
|
@ -2,13 +2,21 @@
|
||||||
|
|
||||||
<parameters>
|
<parameters>
|
||||||
<property>
|
<property>
|
||||||
<name>sourcePath</name>
|
<name>inputPath</name>
|
||||||
<description>the source path</description>
|
<description>the source path</description>
|
||||||
</property>
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>hive_db_name</name>
|
<name>hiveDbName</name>
|
||||||
<description>the target hive database name</description>
|
<description>the target hive database name</description>
|
||||||
</property>
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>hiveJdbcUrl</name>
|
||||||
|
<description>hive server jdbc url</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>hiveMetastoreUris</name>
|
||||||
|
<description>hive server metastore URIs</description>
|
||||||
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>sparkDriverMemory</name>
|
<name>sparkDriverMemory</name>
|
||||||
<description>memory for driver process</description>
|
<description>memory for driver process</description>
|
||||||
|
@ -75,7 +83,7 @@
|
||||||
<master>yarn</master>
|
<master>yarn</master>
|
||||||
<mode>cluster</mode>
|
<mode>cluster</mode>
|
||||||
<name>MapGraphAsHiveDB</name>
|
<name>MapGraphAsHiveDB</name>
|
||||||
<class>eu.dnetlib.dhp.oa.graph.GraphHiveImporterJob</class>
|
<class>eu.dnetlib.dhp.oa.graph.hive.GraphHiveImporterJob</class>
|
||||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
<spark-opts>
|
<spark-opts>
|
||||||
--executor-memory=${sparkExecutorMemory}
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
@ -87,9 +95,9 @@
|
||||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
|
<arg>--inputPath</arg><arg>${inputPath}</arg>
|
||||||
<arg>--hive_db_name</arg><arg>${hive_db_name}</arg>
|
<arg>--hiveDbName</arg><arg>${hiveDbName}</arg>
|
||||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
<arg>--hiveMetastoreUris</arg><arg>${hiveMetastoreUris}</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="PostProcessing"/>
|
<ok to="PostProcessing"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
@ -102,12 +110,12 @@
|
||||||
<configuration>
|
<configuration>
|
||||||
<property>
|
<property>
|
||||||
<name>hive.metastore.uris</name>
|
<name>hive.metastore.uris</name>
|
||||||
<value>${hive_metastore_uris}</value>
|
<value>${hiveMetastoreUris}</value>
|
||||||
</property>
|
</property>
|
||||||
</configuration>
|
</configuration>
|
||||||
<jdbc-url>${hive_jdbc_url}/${hive_db_name}</jdbc-url>
|
<jdbc-url>${hiveJdbcUrl}/${hiveDbName}</jdbc-url>
|
||||||
<script>lib/scripts/postprocessing.sql</script>
|
<script>lib/scripts/postprocessing.sql</script>
|
||||||
<param>hive_db_name=${hive_db_name}</param>
|
<param>hiveDbName=${hiveDbName}</param>
|
||||||
</hive2>
|
</hive2>
|
||||||
<ok to="End"/>
|
<ok to="End"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
package eu.dnetlib.dhp.oa.graph;
|
package eu.dnetlib.dhp.oa.graph;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.oa.graph.hive.GraphHiveImporterJob;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
|
|
|
@ -54,6 +54,7 @@ public class MappersTest {
|
||||||
assertTrue(p.getSubject().size() > 0);
|
assertTrue(p.getSubject().size() > 0);
|
||||||
assertTrue(StringUtils.isNotBlank(p.getJournal().getIssnOnline()));
|
assertTrue(StringUtils.isNotBlank(p.getJournal().getIssnOnline()));
|
||||||
assertTrue(StringUtils.isNotBlank(p.getJournal().getName()));
|
assertTrue(StringUtils.isNotBlank(p.getJournal().getName()));
|
||||||
|
assertTrue(p.getInstance().size() > 0);
|
||||||
|
|
||||||
assertValidId(r1.getSource());
|
assertValidId(r1.getSource());
|
||||||
assertValidId(r1.getTarget());
|
assertValidId(r1.getTarget());
|
||||||
|
@ -96,6 +97,9 @@ public class MappersTest {
|
||||||
assertTrue(StringUtils.isNotBlank(d.getTitle().get(0).getValue()));
|
assertTrue(StringUtils.isNotBlank(d.getTitle().get(0).getValue()));
|
||||||
assertTrue(d.getAuthor().size() > 0);
|
assertTrue(d.getAuthor().size() > 0);
|
||||||
assertTrue(d.getSubject().size() > 0);
|
assertTrue(d.getSubject().size() > 0);
|
||||||
|
assertTrue(d.getInstance().size() > 0);
|
||||||
|
assertTrue(d.getContext().size() > 0);
|
||||||
|
assertTrue(d.getContext().get(0).getId().length() > 0);
|
||||||
|
|
||||||
assertValidId(r1.getSource());
|
assertValidId(r1.getSource());
|
||||||
assertValidId(r1.getTarget());
|
assertValidId(r1.getTarget());
|
||||||
|
@ -129,6 +133,7 @@ public class MappersTest {
|
||||||
assertTrue(StringUtils.isNotBlank(s.getTitle().get(0).getValue()));
|
assertTrue(StringUtils.isNotBlank(s.getTitle().get(0).getValue()));
|
||||||
assertTrue(s.getAuthor().size() > 0);
|
assertTrue(s.getAuthor().size() > 0);
|
||||||
assertTrue(s.getSubject().size() > 0);
|
assertTrue(s.getSubject().size() > 0);
|
||||||
|
assertTrue(s.getInstance().size() > 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void assertValidId(final String id) {
|
private void assertValidId(final String id) {
|
||||||
|
|
|
@ -216,6 +216,7 @@ public class CreateRelatedEntitiesJob_phase2 {
|
||||||
(MapFunction<String, E>)
|
(MapFunction<String, E>)
|
||||||
value -> OBJECT_MAPPER.readValue(value, entityClazz),
|
value -> OBJECT_MAPPER.readValue(value, entityClazz),
|
||||||
Encoders.bean(entityClazz))
|
Encoders.bean(entityClazz))
|
||||||
|
.filter("dataInfo.invisible == false")
|
||||||
.map(
|
.map(
|
||||||
(MapFunction<E, TypedRow>)
|
(MapFunction<E, TypedRow>)
|
||||||
value ->
|
value ->
|
||||||
|
|
8
pom.xml
8
pom.xml
|
@ -292,6 +292,12 @@
|
||||||
<groupId>eu.dnetlib</groupId>
|
<groupId>eu.dnetlib</groupId>
|
||||||
<artifactId>dnet-actionmanager-common</artifactId>
|
<artifactId>dnet-actionmanager-common</artifactId>
|
||||||
<version>6.0.5</version>
|
<version>6.0.5</version>
|
||||||
|
<exclusions>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>org.apache.hadoop</groupId>
|
||||||
|
<artifactId>hadoop-common</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
</exclusions>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>eu.dnetlib</groupId>
|
<groupId>eu.dnetlib</groupId>
|
||||||
|
@ -307,7 +313,7 @@
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>eu.dnetlib</groupId>
|
<groupId>eu.dnetlib</groupId>
|
||||||
<artifactId>dnet-pace-core</artifactId>
|
<artifactId>dnet-pace-core</artifactId>
|
||||||
<version>4.0.0</version>
|
<version>4.0.1</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>eu.dnetlib</groupId>
|
<groupId>eu.dnetlib</groupId>
|
||||||
|
|
Loading…
Reference in New Issue