2020-04-30 11:05:17 +02:00
|
|
|
|
2020-04-10 17:53:07 +02:00
|
|
|
package eu.dnetlib.dhp.oa.graph.raw;
|
|
|
|
|
2021-04-07 14:27:43 +02:00
|
|
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
2021-04-27 15:44:01 +02:00
|
|
|
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*;
|
2020-04-18 12:42:58 +02:00
|
|
|
|
2020-04-30 11:05:17 +02:00
|
|
|
import java.io.Closeable;
|
|
|
|
import java.io.IOException;
|
|
|
|
import java.sql.Array;
|
|
|
|
import java.sql.ResultSet;
|
|
|
|
import java.sql.SQLException;
|
2020-11-09 12:58:36 +01:00
|
|
|
import java.util.ArrayList;
|
|
|
|
import java.util.Arrays;
|
|
|
|
import java.util.Date;
|
|
|
|
import java.util.List;
|
2020-04-30 11:05:17 +02:00
|
|
|
import java.util.function.Consumer;
|
|
|
|
import java.util.function.Function;
|
2020-07-30 16:13:38 +02:00
|
|
|
import java.util.function.Predicate;
|
2020-10-02 09:37:08 +02:00
|
|
|
import java.util.stream.Collectors;
|
2020-04-30 11:05:17 +02:00
|
|
|
|
|
|
|
import org.apache.commons.io.IOUtils;
|
|
|
|
import org.apache.commons.lang3.StringUtils;
|
2020-05-30 10:52:56 +02:00
|
|
|
import org.slf4j.Logger;
|
|
|
|
import org.slf4j.LoggerFactory;
|
2020-04-30 11:05:17 +02:00
|
|
|
|
2020-04-20 11:09:27 +02:00
|
|
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
2020-05-11 13:59:42 +02:00
|
|
|
import eu.dnetlib.dhp.common.DbClient;
|
2021-01-28 09:51:17 +01:00
|
|
|
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
2020-04-20 11:09:27 +02:00
|
|
|
import eu.dnetlib.dhp.oa.graph.raw.common.AbstractMigrationApplication;
|
2021-02-10 11:51:50 +01:00
|
|
|
import eu.dnetlib.dhp.oa.graph.raw.common.MigrateAction;
|
2020-07-30 16:13:38 +02:00
|
|
|
import eu.dnetlib.dhp.oa.graph.raw.common.VerifyNsPrefixPredicate;
|
2021-03-31 17:07:13 +02:00
|
|
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
2020-04-20 18:53:06 +02:00
|
|
|
import eu.dnetlib.dhp.schema.oaf.Context;
|
|
|
|
import eu.dnetlib.dhp.schema.oaf.DataInfo;
|
|
|
|
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
|
|
|
import eu.dnetlib.dhp.schema.oaf.Datasource;
|
|
|
|
import eu.dnetlib.dhp.schema.oaf.Field;
|
|
|
|
import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
|
|
|
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
|
|
|
import eu.dnetlib.dhp.schema.oaf.Organization;
|
|
|
|
import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
|
|
|
|
import eu.dnetlib.dhp.schema.oaf.Project;
|
|
|
|
import eu.dnetlib.dhp.schema.oaf.Publication;
|
|
|
|
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
|
|
|
import eu.dnetlib.dhp.schema.oaf.Relation;
|
|
|
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
|
|
|
import eu.dnetlib.dhp.schema.oaf.Software;
|
|
|
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
2020-06-09 17:20:40 +02:00
|
|
|
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
2020-04-20 11:09:27 +02:00
|
|
|
|
2020-05-14 15:07:24 +02:00
|
|
|
public class MigrateDbEntitiesApplication extends AbstractMigrationApplication implements Closeable {
|
2020-04-29 12:56:58 +02:00
|
|
|
|
2020-05-29 18:17:30 +02:00
|
|
|
private static final Logger log = LoggerFactory.getLogger(MigrateDbEntitiesApplication.class);
|
2020-04-29 12:56:58 +02:00
|
|
|
|
2021-06-23 11:49:42 +02:00
|
|
|
private static final DataInfo DATA_INFO_CLAIM = dataInfo(
|
|
|
|
false, null, false, false,
|
|
|
|
qualifier(USER_CLAIM, USER_CLAIM, DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS), "0.9");
|
|
|
|
|
|
|
|
private static final List<KeyValue> COLLECTED_FROM_CLAIM = listKeyValues(
|
|
|
|
createOpenaireId(10, "infrastruct_::openaire", true), "OpenAIRE");
|
|
|
|
|
2020-05-06 13:20:02 +02:00
|
|
|
public static final String SOURCE_TYPE = "source_type";
|
|
|
|
public static final String TARGET_TYPE = "target_type";
|
|
|
|
|
2020-04-29 12:56:58 +02:00
|
|
|
private final DbClient dbClient;
|
|
|
|
|
|
|
|
private final long lastUpdateTimestamp;
|
|
|
|
|
2020-05-29 12:03:51 +02:00
|
|
|
private final VocabularyGroup vocs;
|
|
|
|
|
2020-04-29 12:56:58 +02:00
|
|
|
public static void main(final String[] args) throws Exception {
|
|
|
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
2020-04-30 12:45:28 +02:00
|
|
|
IOUtils
|
|
|
|
.toString(
|
|
|
|
MigrateDbEntitiesApplication.class
|
|
|
|
.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/migrate_db_entities_parameters.json")));
|
2020-04-29 12:56:58 +02:00
|
|
|
|
|
|
|
parser.parseArgument(args);
|
|
|
|
|
|
|
|
final String dbUrl = parser.get("postgresUrl");
|
2020-05-29 18:17:30 +02:00
|
|
|
log.info("postgresUrl: {}", dbUrl);
|
|
|
|
|
2020-04-29 12:56:58 +02:00
|
|
|
final String dbUser = parser.get("postgresUser");
|
2020-05-29 18:17:30 +02:00
|
|
|
log.info("postgresUser: {}", dbUser);
|
|
|
|
|
2020-04-29 12:56:58 +02:00
|
|
|
final String dbPassword = parser.get("postgresPassword");
|
2020-05-29 18:17:30 +02:00
|
|
|
log.info("postgresPassword: xxx");
|
|
|
|
|
2020-06-25 09:28:13 +02:00
|
|
|
final String dbSchema = parser.get("dbschema");
|
|
|
|
log.info("dbSchema {}: " + dbSchema);
|
|
|
|
|
2020-05-29 18:17:30 +02:00
|
|
|
final String isLookupUrl = parser.get("isLookupUrl");
|
|
|
|
log.info("isLookupUrl: {}", isLookupUrl);
|
2020-04-29 12:56:58 +02:00
|
|
|
|
|
|
|
final String hdfsPath = parser.get("hdfsPath");
|
2020-05-29 18:17:30 +02:00
|
|
|
log.info("hdfsPath: {}", hdfsPath);
|
2020-04-29 12:56:58 +02:00
|
|
|
|
2020-07-30 16:13:38 +02:00
|
|
|
final String nsPrefixBlacklist = parser.get("nsPrefixBlacklist");
|
|
|
|
log.info("nsPrefixBlacklist: {}", nsPrefixBlacklist);
|
|
|
|
|
|
|
|
final Predicate<Oaf> verifyNamespacePrefix = new VerifyNsPrefixPredicate(nsPrefixBlacklist);
|
|
|
|
|
2021-02-10 11:51:50 +01:00
|
|
|
final MigrateAction process = parser.get("action") != null ? MigrateAction.valueOf(parser.get("action"))
|
|
|
|
: MigrateAction.openaire;
|
|
|
|
log.info("migrateAction: {}", process);
|
2020-04-29 12:56:58 +02:00
|
|
|
|
|
|
|
try (final MigrateDbEntitiesApplication smdbe = new MigrateDbEntitiesApplication(hdfsPath, dbUrl, dbUser,
|
2020-05-29 12:03:51 +02:00
|
|
|
dbPassword, isLookupUrl)) {
|
2020-04-29 12:56:58 +02:00
|
|
|
|
2021-02-10 11:51:50 +01:00
|
|
|
switch (process) {
|
|
|
|
case claims:
|
|
|
|
log.info("Processing claims...");
|
|
|
|
smdbe.execute("queryClaims.sql", smdbe::processClaims);
|
|
|
|
break;
|
|
|
|
case openaire:
|
|
|
|
log.info("Processing datasources...");
|
|
|
|
smdbe.execute("queryDatasources.sql", smdbe::processDatasource, verifyNamespacePrefix);
|
|
|
|
|
|
|
|
log.info("Processing projects...");
|
|
|
|
if (dbSchema.equalsIgnoreCase("beta")) {
|
|
|
|
smdbe.execute("queryProjects.sql", smdbe::processProject, verifyNamespacePrefix);
|
|
|
|
} else {
|
|
|
|
smdbe.execute("queryProjects_production.sql", smdbe::processProject, verifyNamespacePrefix);
|
|
|
|
}
|
|
|
|
|
|
|
|
log.info("Processing Organizations...");
|
|
|
|
smdbe.execute("queryOrganizations.sql", smdbe::processOrganization, verifyNamespacePrefix);
|
|
|
|
|
|
|
|
log.info("Processing relationsNoRemoval ds <-> orgs ...");
|
|
|
|
smdbe
|
|
|
|
.execute(
|
|
|
|
"queryDatasourceOrganization.sql", smdbe::processDatasourceOrganization,
|
|
|
|
verifyNamespacePrefix);
|
|
|
|
|
|
|
|
log.info("Processing projects <-> orgs ...");
|
|
|
|
smdbe
|
|
|
|
.execute(
|
|
|
|
"queryProjectOrganization.sql", smdbe::processProjectOrganization, verifyNamespacePrefix);
|
|
|
|
break;
|
2021-04-07 14:27:43 +02:00
|
|
|
case openorgs_dedup: // generates organization entities and relations for openorgs dedup
|
2021-02-10 11:51:50 +01:00
|
|
|
log.info("Processing Openorgs...");
|
|
|
|
smdbe
|
|
|
|
.execute(
|
2021-03-29 10:07:12 +02:00
|
|
|
"queryOpenOrgsForOrgsDedup.sql", smdbe::processOrganization, verifyNamespacePrefix);
|
2021-02-10 11:51:50 +01:00
|
|
|
|
2021-04-07 14:27:43 +02:00
|
|
|
log.info("Processing Openorgs Sim Rels...");
|
2021-03-29 10:07:12 +02:00
|
|
|
smdbe.execute("queryOpenOrgsSimilarityForOrgsDedup.sql", smdbe::processOrgOrgSimRels);
|
|
|
|
break;
|
|
|
|
|
2021-04-07 14:27:43 +02:00
|
|
|
case openorgs: // generates organization entities and relations for provision
|
2021-03-29 10:07:12 +02:00
|
|
|
log.info("Processing Openorgs For Provision...");
|
|
|
|
smdbe
|
|
|
|
.execute(
|
|
|
|
"queryOpenOrgsForProvision.sql", smdbe::processOrganization, verifyNamespacePrefix);
|
|
|
|
|
|
|
|
log.info("Processing Openorgs Merge Rels...");
|
2021-04-07 14:27:43 +02:00
|
|
|
smdbe.execute("queryOpenOrgsSimilarityForProvision.sql", smdbe::processOrgOrgMergeRels);
|
2021-03-24 15:51:27 +01:00
|
|
|
break;
|
|
|
|
|
|
|
|
case openaire_organizations:
|
|
|
|
|
|
|
|
log.info("Processing Organizations...");
|
|
|
|
smdbe.execute("queryOrganizations.sql", smdbe::processOrganization, verifyNamespacePrefix);
|
2021-02-10 11:51:50 +01:00
|
|
|
break;
|
2020-04-29 12:56:58 +02:00
|
|
|
}
|
|
|
|
log.info("All done.");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-05-29 12:03:51 +02:00
|
|
|
protected MigrateDbEntitiesApplication(final VocabularyGroup vocs) { // ONLY FOR UNIT TEST
|
2020-04-29 12:56:58 +02:00
|
|
|
super();
|
|
|
|
this.dbClient = null;
|
|
|
|
this.lastUpdateTimestamp = new Date().getTime();
|
2020-05-29 12:03:51 +02:00
|
|
|
this.vocs = vocs;
|
2020-04-29 12:56:58 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
public MigrateDbEntitiesApplication(
|
2020-05-29 12:03:51 +02:00
|
|
|
final String hdfsPath, final String dbUrl, final String dbUser, final String dbPassword,
|
|
|
|
final String isLookupUrl)
|
2020-04-30 12:45:28 +02:00
|
|
|
throws Exception {
|
2020-04-29 12:56:58 +02:00
|
|
|
super(hdfsPath);
|
|
|
|
this.dbClient = new DbClient(dbUrl, dbUser, dbPassword);
|
|
|
|
this.lastUpdateTimestamp = new Date().getTime();
|
2020-06-09 17:20:40 +02:00
|
|
|
this.vocs = VocabularyGroup.loadVocsFromIS(ISLookupClientFactory.getLookUpService(isLookupUrl));
|
2020-04-29 12:56:58 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
public void execute(final String sqlFile, final Function<ResultSet, List<Oaf>> producer)
|
2020-07-30 16:13:38 +02:00
|
|
|
throws Exception {
|
|
|
|
execute(sqlFile, producer, oaf -> true);
|
|
|
|
}
|
|
|
|
|
2020-11-09 12:58:36 +01:00
|
|
|
public void execute(final String sqlFile,
|
|
|
|
final Function<ResultSet, List<Oaf>> producer,
|
2020-07-30 16:13:38 +02:00
|
|
|
final Predicate<Oaf> predicate)
|
2020-04-30 12:45:28 +02:00
|
|
|
throws Exception {
|
2020-04-29 12:56:58 +02:00
|
|
|
final String sql = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/sql/" + sqlFile));
|
|
|
|
|
2020-07-30 16:13:38 +02:00
|
|
|
final Consumer<ResultSet> consumer = rs -> producer.apply(rs).forEach(oaf -> {
|
|
|
|
if (predicate.test(oaf)) {
|
|
|
|
emitOaf(oaf);
|
|
|
|
}
|
|
|
|
});
|
2020-04-29 12:56:58 +02:00
|
|
|
|
|
|
|
dbClient.processResults(sql, consumer);
|
|
|
|
}
|
|
|
|
|
|
|
|
public List<Oaf> processDatasource(final ResultSet rs) {
|
|
|
|
try {
|
|
|
|
final DataInfo info = prepareDataInfo(rs);
|
|
|
|
|
|
|
|
final Datasource ds = new Datasource();
|
|
|
|
|
|
|
|
ds.setId(createOpenaireId(10, rs.getString("datasourceid"), true));
|
2020-10-02 09:43:24 +02:00
|
|
|
ds
|
|
|
|
.setOriginalId(
|
|
|
|
Arrays
|
2020-11-09 12:58:36 +01:00
|
|
|
.asList((String[]) rs.getArray("identities").getArray())
|
2020-10-02 09:43:24 +02:00
|
|
|
.stream()
|
|
|
|
.filter(StringUtils::isNotBlank)
|
|
|
|
.collect(Collectors.toList()));
|
2020-04-29 12:56:58 +02:00
|
|
|
ds
|
2020-04-30 12:45:28 +02:00
|
|
|
.setCollectedfrom(
|
|
|
|
listKeyValues(
|
|
|
|
createOpenaireId(10, rs.getString("collectedfromid"), true),
|
|
|
|
rs.getString("collectedfromname")));
|
2020-04-29 12:56:58 +02:00
|
|
|
ds.setPid(new ArrayList<>());
|
|
|
|
ds.setDateofcollection(asString(rs.getDate("dateofcollection")));
|
|
|
|
ds.setDateoftransformation(null); // Value not returned by the SQL query
|
|
|
|
ds.setExtraInfo(new ArrayList<>()); // Values not present in the DB
|
|
|
|
ds.setOaiprovenance(null); // Values not present in the DB
|
|
|
|
ds.setDatasourcetype(prepareQualifierSplitting(rs.getString("datasourcetype")));
|
|
|
|
ds.setOpenairecompatibility(prepareQualifierSplitting(rs.getString("openairecompatibility")));
|
|
|
|
ds.setOfficialname(field(rs.getString("officialname"), info));
|
|
|
|
ds.setEnglishname(field(rs.getString("englishname"), info));
|
|
|
|
ds.setWebsiteurl(field(rs.getString("websiteurl"), info));
|
|
|
|
ds.setLogourl(field(rs.getString("logourl"), info));
|
|
|
|
ds.setContactemail(field(rs.getString("contactemail"), info));
|
|
|
|
ds.setNamespaceprefix(field(rs.getString("namespaceprefix"), info));
|
|
|
|
ds.setLatitude(field(Double.toString(rs.getDouble("latitude")), info));
|
|
|
|
ds.setLongitude(field(Double.toString(rs.getDouble("longitude")), info));
|
|
|
|
ds.setDateofvalidation(field(asString(rs.getDate("dateofvalidation")), info));
|
|
|
|
ds.setDescription(field(rs.getString("description"), info));
|
|
|
|
ds.setSubjects(prepareListOfStructProps(rs.getArray("subjects"), info));
|
|
|
|
ds.setOdnumberofitems(field(Double.toString(rs.getInt("odnumberofitems")), info));
|
|
|
|
ds.setOdnumberofitemsdate(field(asString(rs.getDate("odnumberofitemsdate")), info));
|
|
|
|
ds.setOdpolicies(field(rs.getString("odpolicies"), info));
|
|
|
|
ds.setOdlanguages(prepareListFields(rs.getArray("odlanguages"), info));
|
|
|
|
ds.setOdcontenttypes(prepareListFields(rs.getArray("odcontenttypes"), info));
|
|
|
|
ds.setAccessinfopackage(prepareListFields(rs.getArray("accessinfopackage"), info));
|
|
|
|
ds.setReleasestartdate(field(asString(rs.getDate("releasestartdate")), info));
|
|
|
|
ds.setReleaseenddate(field(asString(rs.getDate("releaseenddate")), info));
|
|
|
|
ds.setMissionstatementurl(field(rs.getString("missionstatementurl"), info));
|
|
|
|
ds.setDataprovider(field(rs.getBoolean("dataprovider"), info));
|
|
|
|
ds.setServiceprovider(field(rs.getBoolean("serviceprovider"), info));
|
|
|
|
ds.setDatabaseaccesstype(field(rs.getString("databaseaccesstype"), info));
|
|
|
|
ds.setDatauploadtype(field(rs.getString("datauploadtype"), info));
|
|
|
|
ds.setDatabaseaccessrestriction(field(rs.getString("databaseaccessrestriction"), info));
|
|
|
|
ds.setDatauploadrestriction(field(rs.getString("datauploadrestriction"), info));
|
|
|
|
ds.setVersioning(field(rs.getBoolean("versioning"), info));
|
|
|
|
ds.setCitationguidelineurl(field(rs.getString("citationguidelineurl"), info));
|
|
|
|
ds.setQualitymanagementkind(field(rs.getString("qualitymanagementkind"), info));
|
|
|
|
ds.setPidsystems(field(rs.getString("pidsystems"), info));
|
|
|
|
ds.setCertificates(field(rs.getString("certificates"), info));
|
|
|
|
ds.setPolicies(new ArrayList<>()); // The sql query returns an empty array
|
2020-10-02 09:43:24 +02:00
|
|
|
ds
|
|
|
|
.setJournal(
|
2020-10-02 09:37:08 +02:00
|
|
|
journal(
|
2020-11-09 12:58:36 +01:00
|
|
|
rs.getString("officialname"), rs.getString("issnPrinted"), rs.getString("issnOnline"),
|
|
|
|
rs.getString("issnLinking"), info)); // Journal
|
2020-04-29 12:56:58 +02:00
|
|
|
ds.setDataInfo(info);
|
|
|
|
ds.setLastupdatetimestamp(lastUpdateTimestamp);
|
|
|
|
|
|
|
|
return Arrays.asList(ds);
|
|
|
|
} catch (final Exception e) {
|
|
|
|
throw new RuntimeException(e);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
public List<Oaf> processProject(final ResultSet rs) {
|
|
|
|
try {
|
|
|
|
final DataInfo info = prepareDataInfo(rs);
|
|
|
|
|
|
|
|
final Project p = new Project();
|
|
|
|
|
|
|
|
p.setId(createOpenaireId(40, rs.getString("projectid"), true));
|
|
|
|
p.setOriginalId(Arrays.asList(rs.getString("projectid")));
|
|
|
|
p
|
2020-04-30 12:45:28 +02:00
|
|
|
.setCollectedfrom(
|
|
|
|
listKeyValues(
|
|
|
|
createOpenaireId(10, rs.getString("collectedfromid"), true),
|
|
|
|
rs.getString("collectedfromname")));
|
2020-04-29 12:56:58 +02:00
|
|
|
p.setPid(new ArrayList<>());
|
|
|
|
p.setDateofcollection(asString(rs.getDate("dateofcollection")));
|
|
|
|
p.setDateoftransformation(asString(rs.getDate("dateoftransformation")));
|
|
|
|
p.setExtraInfo(new ArrayList<>()); // Values not present in the DB
|
|
|
|
p.setOaiprovenance(null); // Values not present in the DB
|
|
|
|
p.setWebsiteurl(field(rs.getString("websiteurl"), info));
|
|
|
|
p.setCode(field(rs.getString("code"), info));
|
|
|
|
p.setAcronym(field(rs.getString("acronym"), info));
|
|
|
|
p.setTitle(field(rs.getString("title"), info));
|
|
|
|
p.setStartdate(field(asString(rs.getDate("startdate")), info));
|
|
|
|
p.setEnddate(field(asString(rs.getDate("enddate")), info));
|
|
|
|
p.setCallidentifier(field(rs.getString("callidentifier"), info));
|
|
|
|
p.setKeywords(field(rs.getString("keywords"), info));
|
|
|
|
p.setDuration(field(Integer.toString(rs.getInt("duration")), info));
|
|
|
|
p.setEcsc39(field(Boolean.toString(rs.getBoolean("ecsc39")), info));
|
|
|
|
p
|
2020-04-30 12:45:28 +02:00
|
|
|
.setOamandatepublications(field(Boolean.toString(rs.getBoolean("oamandatepublications")), info));
|
2020-04-29 12:56:58 +02:00
|
|
|
p.setEcarticle29_3(field(Boolean.toString(rs.getBoolean("ecarticle29_3")), info));
|
|
|
|
p.setSubjects(prepareListOfStructProps(rs.getArray("subjects"), info));
|
|
|
|
p.setFundingtree(prepareListFields(rs.getArray("fundingtree"), info));
|
|
|
|
p.setContracttype(prepareQualifierSplitting(rs.getString("contracttype")));
|
|
|
|
p.setOptional1(field(rs.getString("optional1"), info));
|
|
|
|
p.setOptional2(field(rs.getString("optional2"), info));
|
|
|
|
p.setJsonextrainfo(field(rs.getString("jsonextrainfo"), info));
|
|
|
|
p.setContactfullname(field(rs.getString("contactfullname"), info));
|
|
|
|
p.setContactfax(field(rs.getString("contactfax"), info));
|
|
|
|
p.setContactphone(field(rs.getString("contactphone"), info));
|
|
|
|
p.setContactemail(field(rs.getString("contactemail"), info));
|
|
|
|
p.setSummary(field(rs.getString("summary"), info));
|
|
|
|
p.setCurrency(field(rs.getString("currency"), info));
|
|
|
|
p.setTotalcost(new Float(rs.getDouble("totalcost")));
|
|
|
|
p.setFundedamount(new Float(rs.getDouble("fundedamount")));
|
|
|
|
p.setDataInfo(info);
|
|
|
|
p.setLastupdatetimestamp(lastUpdateTimestamp);
|
|
|
|
|
|
|
|
return Arrays.asList(p);
|
|
|
|
|
|
|
|
} catch (final Exception e) {
|
|
|
|
throw new RuntimeException(e);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
public List<Oaf> processOrganization(final ResultSet rs) {
|
|
|
|
try {
|
|
|
|
final DataInfo info = prepareDataInfo(rs);
|
|
|
|
|
|
|
|
final Organization o = new Organization();
|
|
|
|
|
|
|
|
o.setId(createOpenaireId(20, rs.getString("organizationid"), true));
|
|
|
|
o.setOriginalId(Arrays.asList(rs.getString("organizationid")));
|
|
|
|
o
|
2020-04-30 12:45:28 +02:00
|
|
|
.setCollectedfrom(
|
|
|
|
listKeyValues(
|
|
|
|
createOpenaireId(10, rs.getString("collectedfromid"), true),
|
|
|
|
rs.getString("collectedfromname")));
|
2020-11-09 12:58:36 +01:00
|
|
|
o.setPid(prepareListOfStructProps(rs.getArray("pid"), info));
|
2020-04-29 12:56:58 +02:00
|
|
|
o.setDateofcollection(asString(rs.getDate("dateofcollection")));
|
|
|
|
o.setDateoftransformation(asString(rs.getDate("dateoftransformation")));
|
|
|
|
o.setExtraInfo(new ArrayList<>()); // Values not present in the DB
|
|
|
|
o.setOaiprovenance(null); // Values not present in the DB
|
|
|
|
o.setLegalshortname(field(rs.getString("legalshortname"), info));
|
|
|
|
o.setLegalname(field(rs.getString("legalname"), info));
|
2021-04-20 12:45:21 +02:00
|
|
|
o.setAlternativeNames(prepareListFields(rs.getArray("alternativenames"), info));
|
2020-04-29 12:56:58 +02:00
|
|
|
o.setWebsiteurl(field(rs.getString("websiteurl"), info));
|
|
|
|
o.setLogourl(field(rs.getString("logourl"), info));
|
|
|
|
o.setEclegalbody(field(Boolean.toString(rs.getBoolean("eclegalbody")), info));
|
|
|
|
o.setEclegalperson(field(Boolean.toString(rs.getBoolean("eclegalperson")), info));
|
|
|
|
o.setEcnonprofit(field(Boolean.toString(rs.getBoolean("ecnonprofit")), info));
|
|
|
|
o
|
2020-04-30 12:45:28 +02:00
|
|
|
.setEcresearchorganization(field(Boolean.toString(rs.getBoolean("ecresearchorganization")), info));
|
2020-04-29 12:56:58 +02:00
|
|
|
o.setEchighereducation(field(Boolean.toString(rs.getBoolean("echighereducation")), info));
|
|
|
|
o
|
2020-04-30 12:45:28 +02:00
|
|
|
.setEcinternationalorganizationeurinterests(
|
|
|
|
field(Boolean.toString(rs.getBoolean("ecinternationalorganizationeurinterests")), info));
|
2020-04-29 12:56:58 +02:00
|
|
|
o
|
2020-04-30 12:45:28 +02:00
|
|
|
.setEcinternationalorganization(
|
|
|
|
field(Boolean.toString(rs.getBoolean("ecinternationalorganization")), info));
|
2020-04-29 12:56:58 +02:00
|
|
|
o.setEcenterprise(field(Boolean.toString(rs.getBoolean("ecenterprise")), info));
|
|
|
|
o.setEcsmevalidated(field(Boolean.toString(rs.getBoolean("ecsmevalidated")), info));
|
|
|
|
o.setEcnutscode(field(Boolean.toString(rs.getBoolean("ecnutscode")), info));
|
|
|
|
o.setCountry(prepareQualifierSplitting(rs.getString("country")));
|
|
|
|
o.setDataInfo(info);
|
|
|
|
o.setLastupdatetimestamp(lastUpdateTimestamp);
|
|
|
|
|
|
|
|
return Arrays.asList(o);
|
|
|
|
} catch (final Exception e) {
|
|
|
|
throw new RuntimeException(e);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
public List<Oaf> processDatasourceOrganization(final ResultSet rs) {
|
|
|
|
try {
|
|
|
|
final DataInfo info = prepareDataInfo(rs);
|
|
|
|
final String orgId = createOpenaireId(20, rs.getString("organization"), true);
|
|
|
|
final String dsId = createOpenaireId(10, rs.getString("datasource"), true);
|
2020-04-30 12:45:28 +02:00
|
|
|
final List<KeyValue> collectedFrom = listKeyValues(
|
|
|
|
createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname"));
|
2020-04-29 12:56:58 +02:00
|
|
|
|
|
|
|
final Relation r1 = new Relation();
|
2020-05-06 13:20:02 +02:00
|
|
|
r1.setRelType(DATASOURCE_ORGANIZATION);
|
|
|
|
r1.setSubRelType(PROVISION);
|
|
|
|
r1.setRelClass(IS_PROVIDED_BY);
|
2020-04-29 12:56:58 +02:00
|
|
|
r1.setSource(dsId);
|
|
|
|
r1.setTarget(orgId);
|
|
|
|
r1.setCollectedfrom(collectedFrom);
|
|
|
|
r1.setDataInfo(info);
|
|
|
|
r1.setLastupdatetimestamp(lastUpdateTimestamp);
|
|
|
|
|
|
|
|
final Relation r2 = new Relation();
|
2020-05-06 13:20:02 +02:00
|
|
|
r2.setRelType(DATASOURCE_ORGANIZATION);
|
|
|
|
r2.setSubRelType(PROVISION);
|
|
|
|
r2.setRelClass(PROVIDES);
|
2020-04-29 12:56:58 +02:00
|
|
|
r2.setSource(orgId);
|
|
|
|
r2.setTarget(dsId);
|
|
|
|
r2.setCollectedfrom(collectedFrom);
|
|
|
|
r2.setDataInfo(info);
|
|
|
|
r2.setLastupdatetimestamp(lastUpdateTimestamp);
|
|
|
|
|
|
|
|
return Arrays.asList(r1, r2);
|
|
|
|
} catch (final Exception e) {
|
|
|
|
throw new RuntimeException(e);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
public List<Oaf> processProjectOrganization(final ResultSet rs) {
|
|
|
|
try {
|
|
|
|
final DataInfo info = prepareDataInfo(rs);
|
|
|
|
final String orgId = createOpenaireId(20, rs.getString("resporganization"), true);
|
|
|
|
final String projectId = createOpenaireId(40, rs.getString("project"), true);
|
2020-04-30 12:45:28 +02:00
|
|
|
final List<KeyValue> collectedFrom = listKeyValues(
|
|
|
|
createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname"));
|
2020-04-29 12:56:58 +02:00
|
|
|
|
|
|
|
final Relation r1 = new Relation();
|
2020-05-06 13:20:02 +02:00
|
|
|
r1.setRelType(PROJECT_ORGANIZATION);
|
|
|
|
r1.setSubRelType(PARTICIPATION);
|
|
|
|
r1.setRelClass(HAS_PARTICIPANT);
|
2020-04-29 12:56:58 +02:00
|
|
|
r1.setSource(projectId);
|
|
|
|
r1.setTarget(orgId);
|
|
|
|
r1.setCollectedfrom(collectedFrom);
|
|
|
|
r1.setDataInfo(info);
|
|
|
|
r1.setLastupdatetimestamp(lastUpdateTimestamp);
|
|
|
|
|
|
|
|
final Relation r2 = new Relation();
|
2020-05-06 13:20:02 +02:00
|
|
|
r2.setRelType(PROJECT_ORGANIZATION);
|
|
|
|
r2.setSubRelType(PARTICIPATION);
|
|
|
|
r2.setRelClass(IS_PARTICIPANT);
|
2020-04-29 12:56:58 +02:00
|
|
|
r2.setSource(orgId);
|
|
|
|
r2.setTarget(projectId);
|
|
|
|
r2.setCollectedfrom(collectedFrom);
|
|
|
|
r2.setDataInfo(info);
|
|
|
|
r2.setLastupdatetimestamp(lastUpdateTimestamp);
|
|
|
|
|
|
|
|
return Arrays.asList(r1, r2);
|
|
|
|
} catch (final Exception e) {
|
|
|
|
throw new RuntimeException(e);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
public List<Oaf> processClaims(final ResultSet rs) {
|
|
|
|
try {
|
2021-06-23 11:49:42 +02:00
|
|
|
final String sourceType = rs.getString(SOURCE_TYPE);
|
|
|
|
final String targetType = rs.getString(TARGET_TYPE);
|
|
|
|
if (sourceType.equals("context")) {
|
2020-04-29 12:56:58 +02:00
|
|
|
final Result r;
|
|
|
|
|
2021-06-23 11:49:42 +02:00
|
|
|
if (targetType.equals("dataset")) {
|
2020-04-29 12:56:58 +02:00
|
|
|
r = new Dataset();
|
2020-05-06 13:20:02 +02:00
|
|
|
r.setResulttype(DATASET_DEFAULT_RESULTTYPE);
|
2021-06-23 11:49:42 +02:00
|
|
|
} else if (targetType.equals("software")) {
|
2020-04-29 12:56:58 +02:00
|
|
|
r = new Software();
|
2020-05-06 13:20:02 +02:00
|
|
|
r.setResulttype(SOFTWARE_DEFAULT_RESULTTYPE);
|
2021-06-23 11:49:42 +02:00
|
|
|
} else if (targetType.equals("other")) {
|
2020-04-29 12:56:58 +02:00
|
|
|
r = new OtherResearchProduct();
|
2020-05-06 13:20:02 +02:00
|
|
|
r.setResulttype(ORP_DEFAULT_RESULTTYPE);
|
2020-04-29 12:56:58 +02:00
|
|
|
} else {
|
|
|
|
r = new Publication();
|
2020-05-06 13:20:02 +02:00
|
|
|
r.setResulttype(PUBLICATION_DEFAULT_RESULTTYPE);
|
2020-04-29 12:56:58 +02:00
|
|
|
}
|
|
|
|
r.setId(createOpenaireId(50, rs.getString("target_id"), false));
|
|
|
|
r.setLastupdatetimestamp(lastUpdateTimestamp);
|
2021-06-23 11:49:42 +02:00
|
|
|
r.setContext(prepareContext(rs.getString("source_id"), DATA_INFO_CLAIM));
|
|
|
|
r.setDataInfo(DATA_INFO_CLAIM);
|
|
|
|
r.setCollectedfrom(COLLECTED_FROM_CLAIM);
|
2020-04-29 12:56:58 +02:00
|
|
|
|
|
|
|
return Arrays.asList(r);
|
|
|
|
} else {
|
2020-12-14 15:41:25 +01:00
|
|
|
final String validationDate = rs.getString("curation_date");
|
|
|
|
|
2021-06-23 11:49:42 +02:00
|
|
|
final String sourceId = createOpenaireId(sourceType, rs.getString("source_id"), false);
|
|
|
|
final String targetId = createOpenaireId(targetType, rs.getString("target_id"), false);
|
2020-04-29 12:56:58 +02:00
|
|
|
|
2021-07-07 11:08:27 +02:00
|
|
|
Relation r1 = prepareRelation(sourceId, targetId, validationDate);
|
|
|
|
Relation r2 = prepareRelation(targetId, sourceId, validationDate);
|
2020-12-14 15:41:25 +01:00
|
|
|
|
2021-06-23 11:49:42 +02:00
|
|
|
final String semantics = rs.getString("semantics");
|
|
|
|
|
|
|
|
switch (semantics) {
|
|
|
|
case "resultResult_relationship_isRelatedTo":
|
2021-07-07 11:08:27 +02:00
|
|
|
r1 = setRelationSemantic(r1, RESULT_RESULT, RELATIONSHIP, IS_RELATED_TO);
|
|
|
|
r2 = setRelationSemantic(r2, RESULT_RESULT, RELATIONSHIP, IS_RELATED_TO);
|
2021-06-23 11:49:42 +02:00
|
|
|
break;
|
|
|
|
case "resultProject_outcome_produces":
|
|
|
|
if (!"project".equals(sourceType)) {
|
|
|
|
throw new IllegalStateException(
|
|
|
|
String
|
|
|
|
.format(
|
|
|
|
"invalid claim, sourceId: %s, targetId: %s, semantics: %s",
|
|
|
|
sourceId, targetId, semantics));
|
|
|
|
}
|
2021-07-07 11:08:27 +02:00
|
|
|
r1 = setRelationSemantic(r1, RESULT_PROJECT, OUTCOME, PRODUCES);
|
|
|
|
r2 = setRelationSemantic(r2, RESULT_PROJECT, OUTCOME, IS_PRODUCED_BY);
|
2021-06-23 11:49:42 +02:00
|
|
|
break;
|
2021-07-06 21:11:11 +02:00
|
|
|
case "resultResult_publicationDataset_isRelatedTo":
|
2021-07-07 11:08:27 +02:00
|
|
|
r1 = setRelationSemantic(r1, RESULT_PROJECT, PUBLICATION_DATASET, IS_RELATED_TO);
|
|
|
|
r2 = setRelationSemantic(r2, RESULT_PROJECT, PUBLICATION_DATASET, IS_RELATED_TO);
|
2021-07-06 21:11:11 +02:00
|
|
|
break;
|
2021-06-23 11:49:42 +02:00
|
|
|
default:
|
|
|
|
throw new IllegalArgumentException("claim semantics not managed: " + semantics);
|
2020-04-29 12:56:58 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return Arrays.asList(r1, r2);
|
|
|
|
}
|
|
|
|
} catch (final Exception e) {
|
|
|
|
throw new RuntimeException(e);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-07-07 11:08:27 +02:00
|
|
|
private Relation prepareRelation(String sourceId, String targetId, String validationDate) {
|
|
|
|
Relation r = new Relation();
|
|
|
|
if (StringUtils.isNotBlank(validationDate)) {
|
|
|
|
r.setValidated(true);
|
|
|
|
r.setValidationDate(validationDate);
|
|
|
|
}
|
|
|
|
r.setCollectedfrom(COLLECTED_FROM_CLAIM);
|
|
|
|
r.setSource(sourceId);
|
|
|
|
r.setTarget(targetId);
|
|
|
|
r.setDataInfo(DATA_INFO_CLAIM);
|
|
|
|
r.setLastupdatetimestamp(lastUpdateTimestamp);
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
|
|
|
private Relation setRelationSemantic(Relation r, String relType, String subRelType, String relClass) {
|
|
|
|
r.setRelType(relType);
|
|
|
|
r.setSubRelType(subRelType);
|
|
|
|
r.setRelClass(relClass);
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
2020-04-29 12:56:58 +02:00
|
|
|
private List<Context> prepareContext(final String id, final DataInfo dataInfo) {
|
|
|
|
final Context context = new Context();
|
|
|
|
context.setId(id);
|
|
|
|
context.setDataInfo(Arrays.asList(dataInfo));
|
|
|
|
return Arrays.asList(context);
|
|
|
|
}
|
|
|
|
|
|
|
|
private DataInfo prepareDataInfo(final ResultSet rs) throws SQLException {
|
|
|
|
final Boolean deletedbyinference = rs.getBoolean("deletedbyinference");
|
|
|
|
final String inferenceprovenance = rs.getString("inferenceprovenance");
|
|
|
|
final Boolean inferred = rs.getBoolean("inferred");
|
2020-07-17 11:58:11 +02:00
|
|
|
|
|
|
|
final double trust = rs.getDouble("trust");
|
|
|
|
|
2020-04-30 12:45:28 +02:00
|
|
|
return dataInfo(
|
2020-07-17 11:58:11 +02:00
|
|
|
deletedbyinference, inferenceprovenance, inferred, false, ENTITYREGISTRY_PROVENANCE_ACTION,
|
|
|
|
String.format("%.3f", trust));
|
2020-04-29 12:56:58 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
private Qualifier prepareQualifierSplitting(final String s) {
|
2020-04-30 12:45:28 +02:00
|
|
|
if (StringUtils.isBlank(s)) {
|
|
|
|
return null;
|
|
|
|
}
|
2020-04-29 12:56:58 +02:00
|
|
|
final String[] arr = s.split("@@@");
|
2020-05-29 12:03:51 +02:00
|
|
|
return arr.length == 2 ? vocs.getTermAsQualifier(arr[1], arr[0]) : null;
|
2020-04-29 12:56:58 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
private List<Field<String>> prepareListFields(final Array array, final DataInfo info) {
|
|
|
|
try {
|
|
|
|
return array != null ? listFields(info, (String[]) array.getArray()) : new ArrayList<>();
|
|
|
|
} catch (final SQLException e) {
|
|
|
|
throw new RuntimeException("Invalid SQL array", e);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
private StructuredProperty prepareStructProp(final String s, final DataInfo dataInfo) {
|
2020-04-30 12:45:28 +02:00
|
|
|
if (StringUtils.isBlank(s)) {
|
|
|
|
return null;
|
|
|
|
}
|
2020-04-29 12:56:58 +02:00
|
|
|
final String[] parts = s.split("###");
|
|
|
|
if (parts.length == 2) {
|
|
|
|
final String value = parts[0];
|
|
|
|
final String[] arr = parts[1].split("@@@");
|
2020-05-29 12:03:51 +02:00
|
|
|
if (arr.length == 2) {
|
|
|
|
return structuredProperty(value, vocs.getTermAsQualifier(arr[1], arr[0]), dataInfo);
|
2020-04-30 12:45:28 +02:00
|
|
|
}
|
2020-04-29 12:56:58 +02:00
|
|
|
}
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
|
|
|
|
private List<StructuredProperty> prepareListOfStructProps(
|
2020-04-30 12:45:28 +02:00
|
|
|
final Array array,
|
|
|
|
final DataInfo dataInfo) throws SQLException {
|
2020-04-29 12:56:58 +02:00
|
|
|
final List<StructuredProperty> res = new ArrayList<>();
|
|
|
|
if (array != null) {
|
|
|
|
for (final String s : (String[]) array.getArray()) {
|
|
|
|
final StructuredProperty sp = prepareStructProp(s, dataInfo);
|
|
|
|
if (sp != null) {
|
|
|
|
res.add(sp);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
2021-04-07 14:27:43 +02:00
|
|
|
public List<Oaf> processOrgOrgMergeRels(final ResultSet rs) {
|
|
|
|
try {
|
|
|
|
final DataInfo info = prepareDataInfo(rs); // TODO
|
|
|
|
|
|
|
|
final String orgId1 = createOpenaireId(20, rs.getString("id1"), true);
|
|
|
|
final String orgId2 = createOpenaireId(20, rs.getString("id2"), true);
|
|
|
|
|
|
|
|
final List<KeyValue> collectedFrom = listKeyValues(
|
|
|
|
createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname"));
|
|
|
|
|
|
|
|
final Relation r1 = new Relation();
|
|
|
|
r1.setRelType(ORG_ORG_RELTYPE);
|
|
|
|
r1.setSubRelType(ModelConstants.DEDUP);
|
|
|
|
r1.setRelClass(MERGES);
|
|
|
|
r1.setSource(orgId1);
|
|
|
|
r1.setTarget(orgId2);
|
|
|
|
r1.setCollectedfrom(collectedFrom);
|
|
|
|
r1.setDataInfo(info);
|
|
|
|
r1.setLastupdatetimestamp(lastUpdateTimestamp);
|
|
|
|
|
|
|
|
final Relation r2 = new Relation();
|
|
|
|
r2.setRelType(ORG_ORG_RELTYPE);
|
|
|
|
r2.setSubRelType(ModelConstants.DEDUP);
|
|
|
|
r2.setRelClass(IS_MERGED_IN);
|
|
|
|
r2.setSource(orgId2);
|
|
|
|
r2.setTarget(orgId1);
|
|
|
|
r2.setCollectedfrom(collectedFrom);
|
|
|
|
r2.setDataInfo(info);
|
|
|
|
r2.setLastupdatetimestamp(lastUpdateTimestamp);
|
|
|
|
return Arrays.asList(r1, r2);
|
|
|
|
} catch (final Exception e) {
|
|
|
|
throw new RuntimeException(e);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-02-10 11:51:50 +01:00
|
|
|
public List<Oaf> processOrgOrgSimRels(final ResultSet rs) {
|
|
|
|
try {
|
|
|
|
final DataInfo info = prepareDataInfo(rs); // TODO
|
|
|
|
|
|
|
|
final String orgId1 = createOpenaireId(20, rs.getString("id1"), true);
|
2021-02-26 10:19:28 +01:00
|
|
|
final String orgId2 = createOpenaireId(20, rs.getString("id2"), true);
|
2021-02-10 11:51:50 +01:00
|
|
|
final String relClass = rs.getString("relclass");
|
|
|
|
|
|
|
|
final List<KeyValue> collectedFrom = listKeyValues(
|
|
|
|
createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname"));
|
|
|
|
|
|
|
|
final Relation r1 = new Relation();
|
2021-04-07 14:27:43 +02:00
|
|
|
r1.setRelType(ORG_ORG_RELTYPE);
|
2021-03-31 17:07:13 +02:00
|
|
|
r1.setSubRelType(ModelConstants.DEDUP);
|
2021-02-10 11:51:50 +01:00
|
|
|
r1.setRelClass(relClass);
|
|
|
|
r1.setSource(orgId1);
|
|
|
|
r1.setTarget(orgId2);
|
|
|
|
r1.setCollectedfrom(collectedFrom);
|
|
|
|
r1.setDataInfo(info);
|
|
|
|
r1.setLastupdatetimestamp(lastUpdateTimestamp);
|
|
|
|
|
2021-03-29 10:07:12 +02:00
|
|
|
// removed because there's no difference between two sides //TODO
|
|
|
|
// final Relation r2 = new Relation();
|
|
|
|
// r2.setRelType(ORG_ORG_RELTYPE);
|
|
|
|
// r2.setSubRelType(ORG_ORG_SUBRELTYPE);
|
|
|
|
// r2.setRelClass(relClass);
|
|
|
|
// r2.setSource(orgId2);
|
|
|
|
// r2.setTarget(orgId1);
|
|
|
|
// r2.setCollectedfrom(collectedFrom);
|
|
|
|
// r2.setDataInfo(info);
|
|
|
|
// r2.setLastupdatetimestamp(lastUpdateTimestamp);
|
|
|
|
// return Arrays.asList(r1, r2);
|
|
|
|
|
|
|
|
return Arrays.asList(r1);
|
2021-02-10 11:51:50 +01:00
|
|
|
} catch (final Exception e) {
|
|
|
|
throw new RuntimeException(e);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-04-29 12:56:58 +02:00
|
|
|
@Override
|
|
|
|
public void close() throws IOException {
|
|
|
|
super.close();
|
|
|
|
dbClient.close();
|
|
|
|
}
|
2020-05-14 15:07:24 +02:00
|
|
|
|
2020-01-17 15:26:21 +01:00
|
|
|
}
|