formatting

This commit is contained in:
Michele Artini 2020-04-29 12:56:58 +02:00
parent a5d7007005
commit c43b4c8962
2 changed files with 473 additions and 508 deletions

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.oa.graph.raw; package eu.dnetlib.dhp.oa.graph.raw;
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.asString; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.asString;
@ -10,6 +11,23 @@ import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.listKeyValues;
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.qualifier; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.qualifier;
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty;
import java.io.Closeable;
import java.io.IOException;
import java.sql.Array;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
import java.util.function.Consumer;
import java.util.function.Function;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.raw.common.AbstractMigrationApplication; import eu.dnetlib.dhp.oa.graph.raw.common.AbstractMigrationApplication;
import eu.dnetlib.dhp.oa.graph.raw.common.DbClient; import eu.dnetlib.dhp.oa.graph.raw.common.DbClient;
@ -31,512 +49,459 @@ import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.schema.oaf.Software; import eu.dnetlib.dhp.schema.oaf.Software;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import java.io.Closeable;
import java.io.IOException;
import java.sql.Array;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
import java.util.function.Consumer;
import java.util.function.Function;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
public class MigrateDbEntitiesApplication extends AbstractMigrationApplication public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
implements Closeable { implements Closeable {
private static final Log log = LogFactory.getLog(MigrateDbEntitiesApplication.class); private static final Log log = LogFactory.getLog(MigrateDbEntitiesApplication.class);
private final DbClient dbClient; private final DbClient dbClient;
private final long lastUpdateTimestamp; private final long lastUpdateTimestamp;
public static void main(final String[] args) throws Exception { public static void main(final String[] args) throws Exception {
final ArgumentApplicationParser parser = final ArgumentApplicationParser parser = new ArgumentApplicationParser(
new ArgumentApplicationParser( IOUtils
IOUtils.toString( .toString(MigrateDbEntitiesApplication.class
MigrateDbEntitiesApplication.class.getResourceAsStream( .getResourceAsStream("/eu/dnetlib/dhp/oa/graph/migrate_db_entities_parameters.json")));
"/eu/dnetlib/dhp/oa/graph/migrate_db_entities_parameters.json")));
parser.parseArgument(args);
parser.parseArgument(args);
final String dbUrl = parser.get("postgresUrl");
final String dbUrl = parser.get("postgresUrl"); final String dbUser = parser.get("postgresUser");
final String dbUser = parser.get("postgresUser"); final String dbPassword = parser.get("postgresPassword");
final String dbPassword = parser.get("postgresPassword");
final String hdfsPath = parser.get("hdfsPath");
final String hdfsPath = parser.get("hdfsPath");
final boolean processClaims = parser.get("action") != null && parser.get("action").equalsIgnoreCase("claims");
final boolean processClaims =
parser.get("action") != null && parser.get("action").equalsIgnoreCase("claims"); try (final MigrateDbEntitiesApplication smdbe = new MigrateDbEntitiesApplication(hdfsPath, dbUrl, dbUser,
dbPassword)) {
try (final MigrateDbEntitiesApplication smdbe = if (processClaims) {
new MigrateDbEntitiesApplication(hdfsPath, dbUrl, dbUser, dbPassword)) { log.info("Processing claims...");
if (processClaims) { smdbe.execute("queryClaims.sql", smdbe::processClaims);
log.info("Processing claims..."); } else {
smdbe.execute("queryClaims.sql", smdbe::processClaims); log.info("Processing datasources...");
} else { smdbe.execute("queryDatasources.sql", smdbe::processDatasource);
log.info("Processing datasources...");
smdbe.execute("queryDatasources.sql", smdbe::processDatasource); log.info("Processing projects...");
smdbe.execute("queryProjects.sql", smdbe::processProject);
log.info("Processing projects...");
smdbe.execute("queryProjects.sql", smdbe::processProject); log.info("Processing orgs...");
smdbe.execute("queryOrganizations.sql", smdbe::processOrganization);
log.info("Processing orgs...");
smdbe.execute("queryOrganizations.sql", smdbe::processOrganization); log.info("Processing relations ds <-> orgs ...");
smdbe.execute("queryDatasourceOrganization.sql", smdbe::processDatasourceOrganization);
log.info("Processing relations ds <-> orgs ...");
smdbe.execute("queryDatasourceOrganization.sql", smdbe::processDatasourceOrganization); log.info("Processing projects <-> orgs ...");
smdbe.execute("queryProjectOrganization.sql", smdbe::processProjectOrganization);
log.info("Processing projects <-> orgs ..."); }
smdbe.execute("queryProjectOrganization.sql", smdbe::processProjectOrganization); log.info("All done.");
} }
log.info("All done."); }
}
} protected MigrateDbEntitiesApplication() { // ONLY FOR UNIT TEST
super();
protected MigrateDbEntitiesApplication() { // ONLY FOR UNIT TEST this.dbClient = null;
super(); this.lastUpdateTimestamp = new Date().getTime();
this.dbClient = null; }
this.lastUpdateTimestamp = new Date().getTime();
} public MigrateDbEntitiesApplication(
final String hdfsPath, final String dbUrl, final String dbUser, final String dbPassword)
public MigrateDbEntitiesApplication( throws Exception {
final String hdfsPath, final String dbUrl, final String dbUser, final String dbPassword) super(hdfsPath);
throws Exception { this.dbClient = new DbClient(dbUrl, dbUser, dbPassword);
super(hdfsPath); this.lastUpdateTimestamp = new Date().getTime();
this.dbClient = new DbClient(dbUrl, dbUser, dbPassword); }
this.lastUpdateTimestamp = new Date().getTime();
} public void execute(final String sqlFile, final Function<ResultSet, List<Oaf>> producer)
throws Exception {
public void execute(final String sqlFile, final Function<ResultSet, List<Oaf>> producer) final String sql = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/sql/" + sqlFile));
throws Exception {
final String sql = final Consumer<ResultSet> consumer = rs -> producer.apply(rs).forEach(oaf -> emitOaf(oaf));
IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/sql/" + sqlFile));
dbClient.processResults(sql, consumer);
final Consumer<ResultSet> consumer = rs -> producer.apply(rs).forEach(oaf -> emitOaf(oaf)); }
dbClient.processResults(sql, consumer); public List<Oaf> processDatasource(final ResultSet rs) {
}
try {
public List<Oaf> processDatasource(final ResultSet rs) {
final DataInfo info = prepareDataInfo(rs);
try {
final Datasource ds = new Datasource();
final DataInfo info = prepareDataInfo(rs);
ds.setId(createOpenaireId(10, rs.getString("datasourceid"), true));
final Datasource ds = new Datasource(); ds.setOriginalId(Arrays.asList(rs.getString("datasourceid")));
ds
ds.setId(createOpenaireId(10, rs.getString("datasourceid"), true)); .setCollectedfrom(listKeyValues(createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname")));
ds.setOriginalId(Arrays.asList(rs.getString("datasourceid"))); ds.setPid(new ArrayList<>());
ds.setCollectedfrom( ds.setDateofcollection(asString(rs.getDate("dateofcollection")));
listKeyValues( ds.setDateoftransformation(null); // Value not returned by the SQL query
createOpenaireId(10, rs.getString("collectedfromid"), true), ds.setExtraInfo(new ArrayList<>()); // Values not present in the DB
rs.getString("collectedfromname"))); ds.setOaiprovenance(null); // Values not present in the DB
ds.setPid(new ArrayList<>()); ds.setDatasourcetype(prepareQualifierSplitting(rs.getString("datasourcetype")));
ds.setDateofcollection(asString(rs.getDate("dateofcollection"))); ds.setOpenairecompatibility(prepareQualifierSplitting(rs.getString("openairecompatibility")));
ds.setDateoftransformation(null); // Value not returned by the SQL query ds.setOfficialname(field(rs.getString("officialname"), info));
ds.setExtraInfo(new ArrayList<>()); // Values not present in the DB ds.setEnglishname(field(rs.getString("englishname"), info));
ds.setOaiprovenance(null); // Values not present in the DB ds.setWebsiteurl(field(rs.getString("websiteurl"), info));
ds.setDatasourcetype(prepareQualifierSplitting(rs.getString("datasourcetype"))); ds.setLogourl(field(rs.getString("logourl"), info));
ds.setOpenairecompatibility(prepareQualifierSplitting(rs.getString("openairecompatibility"))); ds.setContactemail(field(rs.getString("contactemail"), info));
ds.setOfficialname(field(rs.getString("officialname"), info)); ds.setNamespaceprefix(field(rs.getString("namespaceprefix"), info));
ds.setEnglishname(field(rs.getString("englishname"), info)); ds.setLatitude(field(Double.toString(rs.getDouble("latitude")), info));
ds.setWebsiteurl(field(rs.getString("websiteurl"), info)); ds.setLongitude(field(Double.toString(rs.getDouble("longitude")), info));
ds.setLogourl(field(rs.getString("logourl"), info)); ds.setDateofvalidation(field(asString(rs.getDate("dateofvalidation")), info));
ds.setContactemail(field(rs.getString("contactemail"), info)); ds.setDescription(field(rs.getString("description"), info));
ds.setNamespaceprefix(field(rs.getString("namespaceprefix"), info)); ds.setSubjects(prepareListOfStructProps(rs.getArray("subjects"), info));
ds.setLatitude(field(Double.toString(rs.getDouble("latitude")), info)); ds.setOdnumberofitems(field(Double.toString(rs.getInt("odnumberofitems")), info));
ds.setLongitude(field(Double.toString(rs.getDouble("longitude")), info)); ds.setOdnumberofitemsdate(field(asString(rs.getDate("odnumberofitemsdate")), info));
ds.setDateofvalidation(field(asString(rs.getDate("dateofvalidation")), info)); ds.setOdpolicies(field(rs.getString("odpolicies"), info));
ds.setDescription(field(rs.getString("description"), info)); ds.setOdlanguages(prepareListFields(rs.getArray("odlanguages"), info));
ds.setSubjects(prepareListOfStructProps(rs.getArray("subjects"), info)); ds.setOdcontenttypes(prepareListFields(rs.getArray("odcontenttypes"), info));
ds.setOdnumberofitems(field(Double.toString(rs.getInt("odnumberofitems")), info)); ds.setAccessinfopackage(prepareListFields(rs.getArray("accessinfopackage"), info));
ds.setOdnumberofitemsdate(field(asString(rs.getDate("odnumberofitemsdate")), info)); ds.setReleasestartdate(field(asString(rs.getDate("releasestartdate")), info));
ds.setOdpolicies(field(rs.getString("odpolicies"), info)); ds.setReleaseenddate(field(asString(rs.getDate("releaseenddate")), info));
ds.setOdlanguages(prepareListFields(rs.getArray("odlanguages"), info)); ds.setMissionstatementurl(field(rs.getString("missionstatementurl"), info));
ds.setOdcontenttypes(prepareListFields(rs.getArray("odcontenttypes"), info)); ds.setDataprovider(field(rs.getBoolean("dataprovider"), info));
ds.setAccessinfopackage(prepareListFields(rs.getArray("accessinfopackage"), info)); ds.setServiceprovider(field(rs.getBoolean("serviceprovider"), info));
ds.setReleasestartdate(field(asString(rs.getDate("releasestartdate")), info)); ds.setDatabaseaccesstype(field(rs.getString("databaseaccesstype"), info));
ds.setReleaseenddate(field(asString(rs.getDate("releaseenddate")), info)); ds.setDatauploadtype(field(rs.getString("datauploadtype"), info));
ds.setMissionstatementurl(field(rs.getString("missionstatementurl"), info)); ds.setDatabaseaccessrestriction(field(rs.getString("databaseaccessrestriction"), info));
ds.setDataprovider(field(rs.getBoolean("dataprovider"), info)); ds.setDatauploadrestriction(field(rs.getString("datauploadrestriction"), info));
ds.setServiceprovider(field(rs.getBoolean("serviceprovider"), info)); ds.setVersioning(field(rs.getBoolean("versioning"), info));
ds.setDatabaseaccesstype(field(rs.getString("databaseaccesstype"), info)); ds.setCitationguidelineurl(field(rs.getString("citationguidelineurl"), info));
ds.setDatauploadtype(field(rs.getString("datauploadtype"), info)); ds.setQualitymanagementkind(field(rs.getString("qualitymanagementkind"), info));
ds.setDatabaseaccessrestriction(field(rs.getString("databaseaccessrestriction"), info)); ds.setPidsystems(field(rs.getString("pidsystems"), info));
ds.setDatauploadrestriction(field(rs.getString("datauploadrestriction"), info)); ds.setCertificates(field(rs.getString("certificates"), info));
ds.setVersioning(field(rs.getBoolean("versioning"), info)); ds.setPolicies(new ArrayList<>()); // The sql query returns an empty array
ds.setCitationguidelineurl(field(rs.getString("citationguidelineurl"), info)); ds
ds.setQualitymanagementkind(field(rs.getString("qualitymanagementkind"), info)); .setJournal(prepareJournal(rs.getString("officialname"), rs.getString("journal"), info)); // Journal
ds.setPidsystems(field(rs.getString("pidsystems"), info)); ds.setDataInfo(info);
ds.setCertificates(field(rs.getString("certificates"), info)); ds.setLastupdatetimestamp(lastUpdateTimestamp);
ds.setPolicies(new ArrayList<>()); // The sql query returns an empty array
ds.setJournal( return Arrays.asList(ds);
prepareJournal(rs.getString("officialname"), rs.getString("journal"), info)); // Journal } catch (final Exception e) {
ds.setDataInfo(info); throw new RuntimeException(e);
ds.setLastupdatetimestamp(lastUpdateTimestamp); }
}
return Arrays.asList(ds);
} catch (final Exception e) { public List<Oaf> processProject(final ResultSet rs) {
throw new RuntimeException(e); try {
}
} final DataInfo info = prepareDataInfo(rs);
public List<Oaf> processProject(final ResultSet rs) { final Project p = new Project();
try {
p.setId(createOpenaireId(40, rs.getString("projectid"), true));
final DataInfo info = prepareDataInfo(rs); p.setOriginalId(Arrays.asList(rs.getString("projectid")));
p
final Project p = new Project(); .setCollectedfrom(listKeyValues(createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname")));
p.setPid(new ArrayList<>());
p.setId(createOpenaireId(40, rs.getString("projectid"), true)); p.setDateofcollection(asString(rs.getDate("dateofcollection")));
p.setOriginalId(Arrays.asList(rs.getString("projectid"))); p.setDateoftransformation(asString(rs.getDate("dateoftransformation")));
p.setCollectedfrom( p.setExtraInfo(new ArrayList<>()); // Values not present in the DB
listKeyValues( p.setOaiprovenance(null); // Values not present in the DB
createOpenaireId(10, rs.getString("collectedfromid"), true), p.setWebsiteurl(field(rs.getString("websiteurl"), info));
rs.getString("collectedfromname"))); p.setCode(field(rs.getString("code"), info));
p.setPid(new ArrayList<>()); p.setAcronym(field(rs.getString("acronym"), info));
p.setDateofcollection(asString(rs.getDate("dateofcollection"))); p.setTitle(field(rs.getString("title"), info));
p.setDateoftransformation(asString(rs.getDate("dateoftransformation"))); p.setStartdate(field(asString(rs.getDate("startdate")), info));
p.setExtraInfo(new ArrayList<>()); // Values not present in the DB p.setEnddate(field(asString(rs.getDate("enddate")), info));
p.setOaiprovenance(null); // Values not present in the DB p.setCallidentifier(field(rs.getString("callidentifier"), info));
p.setWebsiteurl(field(rs.getString("websiteurl"), info)); p.setKeywords(field(rs.getString("keywords"), info));
p.setCode(field(rs.getString("code"), info)); p.setDuration(field(Integer.toString(rs.getInt("duration")), info));
p.setAcronym(field(rs.getString("acronym"), info)); p.setEcsc39(field(Boolean.toString(rs.getBoolean("ecsc39")), info));
p.setTitle(field(rs.getString("title"), info)); p
p.setStartdate(field(asString(rs.getDate("startdate")), info)); .setOamandatepublications(field(Boolean.toString(rs.getBoolean("oamandatepublications")), info));
p.setEnddate(field(asString(rs.getDate("enddate")), info)); p.setEcarticle29_3(field(Boolean.toString(rs.getBoolean("ecarticle29_3")), info));
p.setCallidentifier(field(rs.getString("callidentifier"), info)); p.setSubjects(prepareListOfStructProps(rs.getArray("subjects"), info));
p.setKeywords(field(rs.getString("keywords"), info)); p.setFundingtree(prepareListFields(rs.getArray("fundingtree"), info));
p.setDuration(field(Integer.toString(rs.getInt("duration")), info)); p.setContracttype(prepareQualifierSplitting(rs.getString("contracttype")));
p.setEcsc39(field(Boolean.toString(rs.getBoolean("ecsc39")), info)); p.setOptional1(field(rs.getString("optional1"), info));
p.setOamandatepublications( p.setOptional2(field(rs.getString("optional2"), info));
field(Boolean.toString(rs.getBoolean("oamandatepublications")), info)); p.setJsonextrainfo(field(rs.getString("jsonextrainfo"), info));
p.setEcarticle29_3(field(Boolean.toString(rs.getBoolean("ecarticle29_3")), info)); p.setContactfullname(field(rs.getString("contactfullname"), info));
p.setSubjects(prepareListOfStructProps(rs.getArray("subjects"), info)); p.setContactfax(field(rs.getString("contactfax"), info));
p.setFundingtree(prepareListFields(rs.getArray("fundingtree"), info)); p.setContactphone(field(rs.getString("contactphone"), info));
p.setContracttype(prepareQualifierSplitting(rs.getString("contracttype"))); p.setContactemail(field(rs.getString("contactemail"), info));
p.setOptional1(field(rs.getString("optional1"), info)); p.setSummary(field(rs.getString("summary"), info));
p.setOptional2(field(rs.getString("optional2"), info)); p.setCurrency(field(rs.getString("currency"), info));
p.setJsonextrainfo(field(rs.getString("jsonextrainfo"), info)); p.setTotalcost(new Float(rs.getDouble("totalcost")));
p.setContactfullname(field(rs.getString("contactfullname"), info)); p.setFundedamount(new Float(rs.getDouble("fundedamount")));
p.setContactfax(field(rs.getString("contactfax"), info)); p.setDataInfo(info);
p.setContactphone(field(rs.getString("contactphone"), info)); p.setLastupdatetimestamp(lastUpdateTimestamp);
p.setContactemail(field(rs.getString("contactemail"), info));
p.setSummary(field(rs.getString("summary"), info)); return Arrays.asList(p);
p.setCurrency(field(rs.getString("currency"), info));
p.setTotalcost(new Float(rs.getDouble("totalcost"))); } catch (final Exception e) {
p.setFundedamount(new Float(rs.getDouble("fundedamount"))); throw new RuntimeException(e);
p.setDataInfo(info); }
p.setLastupdatetimestamp(lastUpdateTimestamp); }
return Arrays.asList(p); public List<Oaf> processOrganization(final ResultSet rs) {
} catch (final Exception e) { try {
throw new RuntimeException(e);
} final DataInfo info = prepareDataInfo(rs);
}
final Organization o = new Organization();
public List<Oaf> processOrganization(final ResultSet rs) {
o.setId(createOpenaireId(20, rs.getString("organizationid"), true));
try { o.setOriginalId(Arrays.asList(rs.getString("organizationid")));
o
final DataInfo info = prepareDataInfo(rs); .setCollectedfrom(listKeyValues(createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname")));
o.setPid(new ArrayList<>());
final Organization o = new Organization(); o.setDateofcollection(asString(rs.getDate("dateofcollection")));
o.setDateoftransformation(asString(rs.getDate("dateoftransformation")));
o.setId(createOpenaireId(20, rs.getString("organizationid"), true)); o.setExtraInfo(new ArrayList<>()); // Values not present in the DB
o.setOriginalId(Arrays.asList(rs.getString("organizationid"))); o.setOaiprovenance(null); // Values not present in the DB
o.setCollectedfrom( o.setLegalshortname(field(rs.getString("legalshortname"), info));
listKeyValues( o.setLegalname(field(rs.getString("legalname"), info));
createOpenaireId(10, rs.getString("collectedfromid"), true), o.setAlternativeNames(new ArrayList<>()); // Values not returned by the SQL query
rs.getString("collectedfromname"))); o.setWebsiteurl(field(rs.getString("websiteurl"), info));
o.setPid(new ArrayList<>()); o.setLogourl(field(rs.getString("logourl"), info));
o.setDateofcollection(asString(rs.getDate("dateofcollection"))); o.setEclegalbody(field(Boolean.toString(rs.getBoolean("eclegalbody")), info));
o.setDateoftransformation(asString(rs.getDate("dateoftransformation"))); o.setEclegalperson(field(Boolean.toString(rs.getBoolean("eclegalperson")), info));
o.setExtraInfo(new ArrayList<>()); // Values not present in the DB o.setEcnonprofit(field(Boolean.toString(rs.getBoolean("ecnonprofit")), info));
o.setOaiprovenance(null); // Values not present in the DB o
o.setLegalshortname(field(rs.getString("legalshortname"), info)); .setEcresearchorganization(field(Boolean.toString(rs.getBoolean("ecresearchorganization")), info));
o.setLegalname(field(rs.getString("legalname"), info)); o.setEchighereducation(field(Boolean.toString(rs.getBoolean("echighereducation")), info));
o.setAlternativeNames(new ArrayList<>()); // Values not returned by the SQL query o
o.setWebsiteurl(field(rs.getString("websiteurl"), info)); .setEcinternationalorganizationeurinterests(field(Boolean.toString(rs.getBoolean("ecinternationalorganizationeurinterests")), info));
o.setLogourl(field(rs.getString("logourl"), info)); o
o.setEclegalbody(field(Boolean.toString(rs.getBoolean("eclegalbody")), info)); .setEcinternationalorganization(field(Boolean.toString(rs.getBoolean("ecinternationalorganization")), info));
o.setEclegalperson(field(Boolean.toString(rs.getBoolean("eclegalperson")), info)); o.setEcenterprise(field(Boolean.toString(rs.getBoolean("ecenterprise")), info));
o.setEcnonprofit(field(Boolean.toString(rs.getBoolean("ecnonprofit")), info)); o.setEcsmevalidated(field(Boolean.toString(rs.getBoolean("ecsmevalidated")), info));
o.setEcresearchorganization( o.setEcnutscode(field(Boolean.toString(rs.getBoolean("ecnutscode")), info));
field(Boolean.toString(rs.getBoolean("ecresearchorganization")), info)); o.setCountry(prepareQualifierSplitting(rs.getString("country")));
o.setEchighereducation(field(Boolean.toString(rs.getBoolean("echighereducation")), info)); o.setDataInfo(info);
o.setEcinternationalorganizationeurinterests( o.setLastupdatetimestamp(lastUpdateTimestamp);
field(Boolean.toString(rs.getBoolean("ecinternationalorganizationeurinterests")), info));
o.setEcinternationalorganization( return Arrays.asList(o);
field(Boolean.toString(rs.getBoolean("ecinternationalorganization")), info)); } catch (final Exception e) {
o.setEcenterprise(field(Boolean.toString(rs.getBoolean("ecenterprise")), info)); throw new RuntimeException(e);
o.setEcsmevalidated(field(Boolean.toString(rs.getBoolean("ecsmevalidated")), info)); }
o.setEcnutscode(field(Boolean.toString(rs.getBoolean("ecnutscode")), info)); }
o.setCountry(prepareQualifierSplitting(rs.getString("country")));
o.setDataInfo(info); public List<Oaf> processDatasourceOrganization(final ResultSet rs) {
o.setLastupdatetimestamp(lastUpdateTimestamp); try {
final DataInfo info = prepareDataInfo(rs);
return Arrays.asList(o); final String orgId = createOpenaireId(20, rs.getString("organization"), true);
} catch (final Exception e) { final String dsId = createOpenaireId(10, rs.getString("datasource"), true);
throw new RuntimeException(e); final List<KeyValue> collectedFrom = listKeyValues(createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname"));
}
} final Relation r1 = new Relation();
r1.setRelType("datasourceOrganization");
public List<Oaf> processDatasourceOrganization(final ResultSet rs) { r1.setSubRelType("provision");
try { r1.setRelClass("isProvidedBy");
final DataInfo info = prepareDataInfo(rs); r1.setSource(dsId);
final String orgId = createOpenaireId(20, rs.getString("organization"), true); r1.setTarget(orgId);
final String dsId = createOpenaireId(10, rs.getString("datasource"), true); r1.setCollectedfrom(collectedFrom);
final List<KeyValue> collectedFrom = r1.setDataInfo(info);
listKeyValues( r1.setLastupdatetimestamp(lastUpdateTimestamp);
createOpenaireId(10, rs.getString("collectedfromid"), true),
rs.getString("collectedfromname")); final Relation r2 = new Relation();
r2.setRelType("datasourceOrganization");
final Relation r1 = new Relation(); r2.setSubRelType("provision");
r1.setRelType("datasourceOrganization"); r2.setRelClass("provides");
r1.setSubRelType("provision"); r2.setSource(orgId);
r1.setRelClass("isProvidedBy"); r2.setTarget(dsId);
r1.setSource(dsId); r2.setCollectedfrom(collectedFrom);
r1.setTarget(orgId); r2.setDataInfo(info);
r1.setCollectedfrom(collectedFrom); r2.setLastupdatetimestamp(lastUpdateTimestamp);
r1.setDataInfo(info);
r1.setLastupdatetimestamp(lastUpdateTimestamp); return Arrays.asList(r1, r2);
} catch (final Exception e) {
final Relation r2 = new Relation(); throw new RuntimeException(e);
r2.setRelType("datasourceOrganization"); }
r2.setSubRelType("provision"); }
r2.setRelClass("provides");
r2.setSource(orgId); public List<Oaf> processProjectOrganization(final ResultSet rs) {
r2.setTarget(dsId); try {
r2.setCollectedfrom(collectedFrom); final DataInfo info = prepareDataInfo(rs);
r2.setDataInfo(info); final String orgId = createOpenaireId(20, rs.getString("resporganization"), true);
r2.setLastupdatetimestamp(lastUpdateTimestamp); final String projectId = createOpenaireId(40, rs.getString("project"), true);
final List<KeyValue> collectedFrom = listKeyValues(createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname"));
return Arrays.asList(r1, r2);
} catch (final Exception e) { final Relation r1 = new Relation();
throw new RuntimeException(e); r1.setRelType("projectOrganization");
} r1.setSubRelType("participation");
} r1.setRelClass("hasParticipant");
r1.setSource(projectId);
public List<Oaf> processProjectOrganization(final ResultSet rs) { r1.setTarget(orgId);
try { r1.setCollectedfrom(collectedFrom);
final DataInfo info = prepareDataInfo(rs); r1.setDataInfo(info);
final String orgId = createOpenaireId(20, rs.getString("resporganization"), true); r1.setLastupdatetimestamp(lastUpdateTimestamp);
final String projectId = createOpenaireId(40, rs.getString("project"), true);
final List<KeyValue> collectedFrom = final Relation r2 = new Relation();
listKeyValues( r2.setRelType("projectOrganization");
createOpenaireId(10, rs.getString("collectedfromid"), true), r2.setSubRelType("participation");
rs.getString("collectedfromname")); r2.setRelClass("isParticipant");
r2.setSource(orgId);
final Relation r1 = new Relation(); r2.setTarget(projectId);
r1.setRelType("projectOrganization"); r2.setCollectedfrom(collectedFrom);
r1.setSubRelType("participation"); r2.setDataInfo(info);
r1.setRelClass("hasParticipant"); r2.setLastupdatetimestamp(lastUpdateTimestamp);
r1.setSource(projectId);
r1.setTarget(orgId); return Arrays.asList(r1, r2);
r1.setCollectedfrom(collectedFrom); } catch (final Exception e) {
r1.setDataInfo(info); throw new RuntimeException(e);
r1.setLastupdatetimestamp(lastUpdateTimestamp); }
}
final Relation r2 = new Relation();
r2.setRelType("projectOrganization"); public List<Oaf> processClaims(final ResultSet rs) {
r2.setSubRelType("participation");
r2.setRelClass("isParticipant"); final DataInfo info =
r2.setSource(orgId); dataInfo(false, null, false, false, qualifier("user:claim", "user:claim", "dnet:provenanceActions", "dnet:provenanceActions"), "0.9");
r2.setTarget(projectId);
r2.setCollectedfrom(collectedFrom); final List<KeyValue> collectedFrom = listKeyValues(createOpenaireId(10, "infrastruct_::openaire", true), "OpenAIRE");
r2.setDataInfo(info);
r2.setLastupdatetimestamp(lastUpdateTimestamp); try {
return Arrays.asList(r1, r2); if (rs.getString("source_type").equals("context")) {
} catch (final Exception e) { final Result r;
throw new RuntimeException(e);
} if (rs.getString("target_type").equals("dataset")) {
} r = new Dataset();
r.setResulttype(MigrationConstants.DATASET_RESULTTYPE_QUALIFIER);
public List<Oaf> processClaims(final ResultSet rs) { } else if (rs.getString("target_type").equals("software")) {
r = new Software();
final DataInfo info = r.setResulttype(MigrationConstants.SOFTWARE_RESULTTYPE_QUALIFIER);
dataInfo( } else if (rs.getString("target_type").equals("other")) {
false, r = new OtherResearchProduct();
null, r.setResulttype(MigrationConstants.OTHER_RESULTTYPE_QUALIFIER);
false, } else {
false, r = new Publication();
qualifier( r.setResulttype(MigrationConstants.PUBLICATION_RESULTTYPE_QUALIFIER);
"user:claim", "user:claim", "dnet:provenanceActions", "dnet:provenanceActions"), }
"0.9"); r.setId(createOpenaireId(50, rs.getString("target_id"), false));
r.setLastupdatetimestamp(lastUpdateTimestamp);
final List<KeyValue> collectedFrom = r.setContext(prepareContext(rs.getString("source_id"), info));
listKeyValues(createOpenaireId(10, "infrastruct_::openaire", true), "OpenAIRE"); r.setDataInfo(info);
r.setCollectedfrom(collectedFrom);
try {
return Arrays.asList(r);
if (rs.getString("source_type").equals("context")) { } else {
final Result r; final String sourceId = createOpenaireId(rs.getString("source_type"), rs.getString("source_id"), false);
final String targetId = createOpenaireId(rs.getString("target_type"), rs.getString("target_id"), false);
if (rs.getString("target_type").equals("dataset")) {
r = new Dataset(); final Relation r1 = new Relation();
r.setResulttype(MigrationConstants.DATASET_RESULTTYPE_QUALIFIER); final Relation r2 = new Relation();
} else if (rs.getString("target_type").equals("software")) {
r = new Software(); if (rs.getString("source_type").equals("project")) {
r.setResulttype(MigrationConstants.SOFTWARE_RESULTTYPE_QUALIFIER); r1.setCollectedfrom(collectedFrom);
} else if (rs.getString("target_type").equals("other")) { r1.setRelType("resultProject");
r = new OtherResearchProduct(); r1.setSubRelType("outcome");
r.setResulttype(MigrationConstants.OTHER_RESULTTYPE_QUALIFIER); r1.setRelClass("produces");
} else {
r = new Publication(); r2.setCollectedfrom(collectedFrom);
r.setResulttype(MigrationConstants.PUBLICATION_RESULTTYPE_QUALIFIER); r2.setRelType("resultProject");
} r2.setSubRelType("outcome");
r.setId(createOpenaireId(50, rs.getString("target_id"), false)); r2.setRelClass("isProducedBy");
r.setLastupdatetimestamp(lastUpdateTimestamp); } else {
r.setContext(prepareContext(rs.getString("source_id"), info)); r1.setCollectedfrom(collectedFrom);
r.setDataInfo(info); r1.setRelType("resultResult");
r.setCollectedfrom(collectedFrom); r1.setSubRelType("relationship");
r1.setRelClass("isRelatedTo");
return Arrays.asList(r);
} else { r2.setCollectedfrom(collectedFrom);
final String sourceId = r2.setRelType("resultResult");
createOpenaireId(rs.getString("source_type"), rs.getString("source_id"), false); r2.setSubRelType("relationship");
final String targetId = r2.setRelClass("isRelatedTo");
createOpenaireId(rs.getString("target_type"), rs.getString("target_id"), false); }
final Relation r1 = new Relation(); r1.setSource(sourceId);
final Relation r2 = new Relation(); r1.setTarget(targetId);
r1.setDataInfo(info);
if (rs.getString("source_type").equals("project")) { r1.setLastupdatetimestamp(lastUpdateTimestamp);
r1.setCollectedfrom(collectedFrom);
r1.setRelType("resultProject"); r2.setSource(targetId);
r1.setSubRelType("outcome"); r2.setTarget(sourceId);
r1.setRelClass("produces"); r2.setDataInfo(info);
r2.setLastupdatetimestamp(lastUpdateTimestamp);
r2.setCollectedfrom(collectedFrom);
r2.setRelType("resultProject"); return Arrays.asList(r1, r2);
r2.setSubRelType("outcome"); }
r2.setRelClass("isProducedBy");
} else { } catch (final Exception e) {
r1.setCollectedfrom(collectedFrom); throw new RuntimeException(e);
r1.setRelType("resultResult"); }
r1.setSubRelType("relationship"); }
r1.setRelClass("isRelatedTo");
private List<Context> prepareContext(final String id, final DataInfo dataInfo) {
r2.setCollectedfrom(collectedFrom); final Context context = new Context();
r2.setRelType("resultResult"); context.setId(id);
r2.setSubRelType("relationship"); context.setDataInfo(Arrays.asList(dataInfo));
r2.setRelClass("isRelatedTo"); return Arrays.asList(context);
} }
r1.setSource(sourceId); private DataInfo prepareDataInfo(final ResultSet rs) throws SQLException {
r1.setTarget(targetId); final Boolean deletedbyinference = rs.getBoolean("deletedbyinference");
r1.setDataInfo(info); final String inferenceprovenance = rs.getString("inferenceprovenance");
r1.setLastupdatetimestamp(lastUpdateTimestamp); final Boolean inferred = rs.getBoolean("inferred");
final String trust = rs.getString("trust");
r2.setSource(targetId); return dataInfo(deletedbyinference, inferenceprovenance, inferred, false, MigrationConstants.ENTITYREGISTRY_PROVENANCE_ACTION, trust);
r2.setTarget(sourceId); }
r2.setDataInfo(info);
r2.setLastupdatetimestamp(lastUpdateTimestamp); private Qualifier prepareQualifierSplitting(final String s) {
if (StringUtils.isBlank(s)) { return null; }
return Arrays.asList(r1, r2); final String[] arr = s.split("@@@");
} return arr.length == 4 ? qualifier(arr[0], arr[1], arr[2], arr[3]) : null;
}
} catch (final Exception e) {
throw new RuntimeException(e); private List<Field<String>> prepareListFields(final Array array, final DataInfo info) {
} try {
} return array != null ? listFields(info, (String[]) array.getArray()) : new ArrayList<>();
} catch (final SQLException e) {
private List<Context> prepareContext(final String id, final DataInfo dataInfo) { throw new RuntimeException("Invalid SQL array", e);
final Context context = new Context(); }
context.setId(id); }
context.setDataInfo(Arrays.asList(dataInfo));
return Arrays.asList(context); private StructuredProperty prepareStructProp(final String s, final DataInfo dataInfo) {
} if (StringUtils.isBlank(s)) { return null; }
final String[] parts = s.split("###");
private DataInfo prepareDataInfo(final ResultSet rs) throws SQLException { if (parts.length == 2) {
final Boolean deletedbyinference = rs.getBoolean("deletedbyinference"); final String value = parts[0];
final String inferenceprovenance = rs.getString("inferenceprovenance"); final String[] arr = parts[1].split("@@@");
final Boolean inferred = rs.getBoolean("inferred"); if (arr.length == 4) { return structuredProperty(value, arr[0], arr[1], arr[2], arr[3], dataInfo); }
final String trust = rs.getString("trust"); }
return dataInfo( return null;
deletedbyinference, }
inferenceprovenance,
inferred, private List<StructuredProperty> prepareListOfStructProps(
false, final Array array,
MigrationConstants.ENTITYREGISTRY_PROVENANCE_ACTION, final DataInfo dataInfo) throws SQLException {
trust); final List<StructuredProperty> res = new ArrayList<>();
} if (array != null) {
for (final String s : (String[]) array.getArray()) {
private Qualifier prepareQualifierSplitting(final String s) { final StructuredProperty sp = prepareStructProp(s, dataInfo);
if (StringUtils.isBlank(s)) { if (sp != null) {
return null; res.add(sp);
} }
final String[] arr = s.split("@@@"); }
return arr.length == 4 ? qualifier(arr[0], arr[1], arr[2], arr[3]) : null; }
}
return res;
private List<Field<String>> prepareListFields(final Array array, final DataInfo info) { }
try {
return array != null ? listFields(info, (String[]) array.getArray()) : new ArrayList<>(); private Journal prepareJournal(final String name, final String sj, final DataInfo info) {
} catch (final SQLException e) { if (StringUtils.isNotBlank(sj)) {
throw new RuntimeException("Invalid SQL array", e); final String[] arr = sj.split("@@@");
} if (arr.length == 3) {
} final String issn = StringUtils.isNotBlank(arr[0]) ? arr[0] : null;
final String eissn = StringUtils.isNotBlank(arr[1]) ? arr[1] : null;;
private StructuredProperty prepareStructProp(final String s, final DataInfo dataInfo) { final String lissn = StringUtils.isNotBlank(arr[2]) ? arr[2] : null;;
if (StringUtils.isBlank(s)) { if (issn != null || eissn != null || lissn != null) {
return null; return journal(name, issn, eissn, eissn, null, null, null, null, null, null, null, info);
} }
final String[] parts = s.split("###"); }
if (parts.length == 2) { }
final String value = parts[0]; return null;
final String[] arr = parts[1].split("@@@"); }
if (arr.length == 4) {
return structuredProperty(value, arr[0], arr[1], arr[2], arr[3], dataInfo); @Override
} public void close() throws IOException {
} super.close();
return null; dbClient.close();
} }
private List<StructuredProperty> prepareListOfStructProps(
final Array array, final DataInfo dataInfo) throws SQLException {
final List<StructuredProperty> res = new ArrayList<>();
if (array != null) {
for (final String s : (String[]) array.getArray()) {
final StructuredProperty sp = prepareStructProp(s, dataInfo);
if (sp != null) {
res.add(sp);
}
}
}
return res;
}
private Journal prepareJournal(final String name, final String sj, final DataInfo info) {
if (StringUtils.isNotBlank(sj)) {
final String[] arr = sj.split("@@@");
if (arr.length == 3) {
final String issn = StringUtils.isNotBlank(arr[0]) ? arr[0] : null;
final String eissn = StringUtils.isNotBlank(arr[1]) ? arr[1] : null;
;
final String lissn = StringUtils.isNotBlank(arr[2]) ? arr[2] : null;
;
if (issn != null || eissn != null || lissn != null) {
return journal(name, issn, eissn, eissn, null, null, null, null, null, null, null, info);
}
}
}
return null;
}
@Override
public void close() throws IOException {
super.close();
dbClient.close();
}
} }

View File

@ -3,10 +3,10 @@
<parent> <parent>
<artifactId>dhp-workflows</artifactId> <artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId> <groupId>eu.dnetlib.dhp</groupId>
<version>1.1.6-SNAPSHOT</version> <version>1.1.7-SNAPSHOT</version>
</parent> </parent>
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<artifactId>dhp-statistics-workflow</artifactId> <artifactId>dhp-stats-update</artifactId>
<dependencies> <dependencies>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>