diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractDbApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractDbApplication.java index edd36f25c..8e2940b6a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractDbApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractDbApplication.java @@ -85,10 +85,17 @@ public abstract class AbstractDbApplication extends AbstractMigrationApplication final String hdfsPath, final String dbUrl, final String dbUser, final String dbPassword, final String isLookupUrl) throws Exception { - + super(hdfsPath); this.dbClient = new DbClient(dbUrl, dbUser, dbPassword); - this.lastUpdateTimestamp = new Date().getTime(); this.vocs = VocabularyGroup.loadVocsFromIS(ISLookupClientFactory.getLookUpService(isLookupUrl)); + this.lastUpdateTimestamp = new Date().getTime(); + } + + protected AbstractDbApplication(final DbClient dbClient, final VocabularyGroup vocs) { // ONLY FOT TESTS + super(); + this.dbClient = dbClient; + this.vocs = vocs; + this.lastUpdateTimestamp = new Date().getTime(); } public void execute(final String sqlFile, final Function> producer) @@ -164,6 +171,7 @@ public abstract class AbstractDbApplication extends AbstractMigrationApplication } public List processProject(final ResultSet rs) { + try { final DataInfo info = prepareDataInfo(rs); @@ -219,6 +227,7 @@ public abstract class AbstractDbApplication extends AbstractMigrationApplication } public List processOrganization(final ResultSet rs) { + try { final DataInfo info = prepareDataInfo(rs); @@ -238,7 +247,7 @@ public abstract class AbstractDbApplication extends AbstractMigrationApplication o.setOaiprovenance(null); // Values not present in the DB o.setLegalshortname(field(rs.getString("legalshortname"), info)); o.setLegalname(field(rs.getString("legalname"), info)); - o.setAlternativeNames(new ArrayList<>()); // Values not returned by the SQL query + o.setAlternativeNames(prepareListFields(rs.getArray("alternativenames"), info)); o.setWebsiteurl(field(rs.getString("websiteurl"), info)); o.setLogourl(field(rs.getString("logourl"), info)); o.setEclegalbody(field(Boolean.toString(rs.getBoolean("eclegalbody")), info)); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java index 54a44df36..1c1baf1af 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java @@ -6,15 +6,11 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.DbClient; +import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup; public class MigrateDbEntitiesApplication extends AbstractDbApplication { - public MigrateDbEntitiesApplication(final String hdfsPath, final String dbUrl, final String dbUser, - final String dbPassword, final String isLookupUrl) - throws Exception { - super(hdfsPath, dbUrl, dbUser, dbPassword, isLookupUrl); - } - private static final Logger log = LoggerFactory.getLogger(MigrateDbEntitiesApplication.class); public static final String SOURCE_TYPE = "source_type"; @@ -80,4 +76,14 @@ public class MigrateDbEntitiesApplication extends AbstractDbApplication { } } + public MigrateDbEntitiesApplication(final String hdfsPath, final String dbUrl, final String dbUser, + final String dbPassword, final String isLookupUrl) + throws Exception { + super(hdfsPath, dbUrl, dbUser, dbPassword, isLookupUrl); + } + + protected MigrateDbEntitiesApplication(final DbClient dbClient, final VocabularyGroup vocs) { // ONLY FOT TESTS + super(dbClient, vocs); + } + } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateOpenOrgsApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateOpenOrgsApplication.java index ae9aa5489..a199da2d5 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateOpenOrgsApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateOpenOrgsApplication.java @@ -9,12 +9,6 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser; public class MigrateOpenOrgsApplication extends AbstractDbApplication { - public MigrateOpenOrgsApplication(final String hdfsPath, final String dbUrl, final String dbUser, - final String dbPassword, final String isLookupUrl) - throws Exception { - super(hdfsPath, dbUrl, dbUser, dbPassword, isLookupUrl); - } - private static final Logger log = LoggerFactory.getLogger(MigrateOpenOrgsApplication.class); public static final String SOURCE_TYPE = "source_type"; @@ -47,7 +41,7 @@ public class MigrateOpenOrgsApplication extends AbstractDbApplication { try (final MigrateOpenOrgsApplication mapper = new MigrateOpenOrgsApplication(hdfsPath, dbUrl, dbUser, dbPassword, isLookupUrl)) { - log.info("Processing orgs..."); + log.info("Processing open orgs..."); mapper.execute("queryOrganizationsFromOpenOrgsDB.sql", mapper::processOrganization); log.info("Processing simrels..."); @@ -55,6 +49,13 @@ public class MigrateOpenOrgsApplication extends AbstractDbApplication { log.info("All done."); } + + } + + public MigrateOpenOrgsApplication(final String hdfsPath, final String dbUrl, final String dbUser, + final String dbPassword, final String isLookupUrl) + throws Exception { + super(hdfsPath, dbUrl, dbUser, dbPassword, isLookupUrl); } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml index d8b61b5ea..860df1856 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml @@ -25,6 +25,18 @@ postgresPassword the password postgres + + + postgresOpenOrgsURL + the postgres URL to access to the OpenOrgs database + + + postgresOpenOrgsUser + the user of OpenOrgs database + + + postgresOpenOrgsPassword + the password of OpenOrgs database dbSchema @@ -116,8 +128,25 @@ + + + + + + + eu.dnetlib.dhp.oa.graph.raw.MigrateOpenOrgsApplication + --hdfsPath${contentPath}/db_openorgs + --postgresUrl${postgresOpenOrgsURL} + --postgresUser${postgresOpenOrgsUser} + --postgresPassword${postgresOpenOrgsPassword} + --isLookupUrl${isLookupUrl} + + + + + @@ -308,7 +337,7 @@ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --sourcePaths${contentPath}/db_records,${contentPath}/oaf_records,${contentPath}/odf_records + --sourcePaths${contentPath}/db_openorgs,${contentPath}/db_records,${contentPath}/oaf_records,${contentPath}/odf_records --targetPath${workingDir}/entities --isLookupUrl${isLookupUrl} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_step1/oozie_app/config-default.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_openorgs/oozie_app/config-default.xml similarity index 100% rename from dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_step1/oozie_app/config-default.xml rename to dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_openorgs/oozie_app/config-default.xml diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_openorgs/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_openorgs/oozie_app/workflow.xml new file mode 100644 index 000000000..9be62373e --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_openorgs/oozie_app/workflow.xml @@ -0,0 +1,102 @@ + + + + contentPath + path location to store (or reuse) content from the aggregator + + + postgresOpenOrgsURL + the postgres URL to access to the OpenOrgs database + + + postgresOpenOrgsUser + the user of OpenOrgs database + + + postgresOpenOrgsPassword + the password of OpenOrgs database + + + isLookupUrl + the address of the lookUp service + + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + + oozieActionShareLibForSpark2 + oozie action sharelib for spark 2.* + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + spark 2.* extra listeners classname + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + spark 2.* sql query execution listeners classname + + + spark2YarnHistoryServerAddress + spark 2.* yarn history server address + + + spark2EventLogDir + spark 2.* event log dir location + + + + + ${jobTracker} + ${nameNode} + + + mapreduce.job.queuename + ${queueName} + + + oozie.launcher.mapred.job.queue.name + ${oozieLauncherQueueName} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + + + eu.dnetlib.dhp.oa.graph.raw.MigrateOpenOrgsApplication + --hdfsPath${contentPath}/db_openorgs + --postgresUrl${postgresOpenOrgsURL} + --postgresUser${postgresOpenOrgsUser} + --postgresPassword${postgresOpenOrgsPassword} + --isLookupUrl${isLookupUrl} + + + + + + + diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_step1/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_step1/oozie_app/workflow.xml deleted file mode 100644 index 868418152..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_step1/oozie_app/workflow.xml +++ /dev/null @@ -1,108 +0,0 @@ - - - - migrationPathStep1 - the base path to store hdfs file - - - postgresURL - the postgres URL to access to the database - - - postgresUser - the user postgres - - - postgresPassword - the password postgres - - - mongoURL - mongoDB url, example: mongodb://[username:password@]host[:port] - - - mongoDb - mongo database - - - isLookupUrl - the address of the lookUp service - - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorCores - number of cores used by single executor - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - - - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.dhp.migration.step1.MigrateDbEntitiesApplication - -p${migrationPathStep1}/db_records - -pgurl${postgresURL} - -pguser${postgresUser} - -pgpasswd${postgresPassword} - -islookup${isLookupUrl} - - - - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.dhp.migration.step1.MigrateMongoMdstoresApplication - -p${migrationPathStep1}/odf_records - -mongourl${mongoURL} - -mongodb${mongoDb} - -fODF - -lstore - -icleaned - - - - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.dhp.migration.step1.MigrateMongoMdstoresApplication - -p${migrationPathStep1}/oaf_records - -mongourl${mongoURL} - -mongodb${mongoDb} - -fOAF - -lstore - -icleaned - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_step2/oozie_app/config-default.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_step2/oozie_app/config-default.xml deleted file mode 100644 index 2e0ed9aee..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_step2/oozie_app/config-default.xml +++ /dev/null @@ -1,18 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - oozie.action.sharelib.for.spark - spark2 - - \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_step2/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_step2/oozie_app/workflow.xml deleted file mode 100644 index f6485ea9c..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_step2/oozie_app/workflow.xml +++ /dev/null @@ -1,65 +0,0 @@ - - - - migrationPathStep1 - the base path to store hdfs file - - - migrationPathStep2 - the temporary path to store entities before dispatching - - - isLookupUrl - the address of the lookUp service - - - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorCores - number of cores used by single executor - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - - - - - - - ${jobTracker} - ${nameNode} - yarn-cluster - cluster - GenerateEntities - eu.dnetlib.dhp.migration.step2.GenerateEntitiesApplication - dhp-aggregation-${projectVersion}.jar - --executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener" --conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener" --conf spark.sql.warehouse.dir="/user/hive/warehouse" - -mt yarn-cluster - -s${migrationPathStep1}/db_records,${migrationPathStep1}/oaf_records,${migrationPathStep1}/odf_records - -t${migrationPathStep2}/all_entities - --islookup${isLookupUrl} - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_step3/oozie_app/config-default.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_step3/oozie_app/config-default.xml deleted file mode 100644 index 2e0ed9aee..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_step3/oozie_app/config-default.xml +++ /dev/null @@ -1,18 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - oozie.action.sharelib.for.spark - spark2 - - \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_step3/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_step3/oozie_app/workflow.xml deleted file mode 100644 index 8688f09d1..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_step3/oozie_app/workflow.xml +++ /dev/null @@ -1,60 +0,0 @@ - - - - - migrationPathStep2 - the temporary path to store entities before dispatching - - - migrationPathStep3 - the graph Raw base path - - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorCores - number of cores used by single executor - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - - - - - - - ${jobTracker} - ${nameNode} - yarn-cluster - cluster - GenerateGraph - eu.dnetlib.dhp.migration.step3.DispatchEntitiesApplication - dhp-aggregation-${projectVersion}.jar - --executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener" --conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener" --conf spark.sql.warehouse.dir="/user/hive/warehouse" - -mt yarn-cluster - -s${migrationPathStep2}/all_entities - -g${migrationPathStep3} - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryOrganizations.sql b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryOrganizations.sql index 3e5de8071..eab2ba9d1 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryOrganizations.sql +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryOrganizations.sql @@ -1,30 +1,31 @@ SELECT - o.id AS organizationid, - o.legalshortname AS legalshortname, - o.legalname AS legalname, - o.websiteurl AS websiteurl, - o.logourl AS logourl, - o.ec_legalbody AS eclegalbody, - o.ec_legalperson AS eclegalperson, - o.ec_nonprofit AS ecnonprofit, - o.ec_researchorganization AS ecresearchorganization, - o.ec_highereducation AS echighereducation, - o.ec_internationalorganizationeurinterests AS ecinternationalorganizationeurinterests, - o.ec_internationalorganization AS ecinternationalorganization, - o.ec_enterprise AS ecenterprise, - o.ec_smevalidated AS ecsmevalidated, - o.ec_nutscode AS ecnutscode, - o.dateofcollection AS dateofcollection, - o.lastupdate AS dateoftransformation, - false AS inferred, - false AS deletedbyinference, - o.trust AS trust, - '' AS inferenceprovenance, - d.id AS collectedfromid, - d.officialname AS collectedfromname, - o.country || '@@@dnet:countries' AS country, + o.id AS organizationid, + o.legalshortname AS legalshortname, + o.legalname AS legalname, + ARRAY[]::text[] AS alternativenames, + o.websiteurl AS websiteurl, + o.logourl AS logourl, + o.ec_legalbody AS eclegalbody, + o.ec_legalperson AS eclegalperson, + o.ec_nonprofit AS ecnonprofit, + o.ec_researchorganization AS ecresearchorganization, + o.ec_highereducation AS echighereducation, + o.ec_internationalorganizationeurinterests AS ecinternationalorganizationeurinterests, + o.ec_internationalorganization AS ecinternationalorganization, + o.ec_enterprise AS ecenterprise, + o.ec_smevalidated AS ecsmevalidated, + o.ec_nutscode AS ecnutscode, + o.dateofcollection AS dateofcollection, + o.lastupdate AS dateoftransformation, + false AS inferred, + false AS deletedbyinference, + o.trust AS trust, + '' AS inferenceprovenance, + d.id AS collectedfromid, + d.officialname AS collectedfromname, + o.country || '@@@dnet:countries' AS country, 'sysimport:crosswalk:entityregistry@@@dnet:provenance_actions' AS provenanceaction, - ARRAY[]::text[] AS pid + ARRAY[]::text[] AS pid FROM dsm_organizations o LEFT OUTER JOIN dsm_datasources d ON (d.id = o.collectedfrom) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryOrganizationsFromOpenOrgsDB.sql b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryOrganizationsFromOpenOrgsDB.sql index 3396f365c..93eb4c128 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryOrganizationsFromOpenOrgsDB.sql +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryOrganizationsFromOpenOrgsDB.sql @@ -1,19 +1,31 @@ SELECT - o.id AS organizationid, - coalesce((array_agg(a.acronym))[1], o.name) AS legalshortname, - o.name AS legalname, - array_agg(DISTINCT n.name) AS "alternativeNames", - (array_agg(u.url))[1] AS websiteurl, - o.modification_date AS dateoftransformation, - false AS inferred, - false AS deletedbyinference, - 0.95 AS trust, - '' AS inferenceprovenance, - 'openaire____::openorgs' AS collectedfromid, - 'OpenOrgs Database' AS collectedfromname, - o.country || '@@@dnet:countries' AS country, - 'sysimport:crosswalk:entityregistry@@@dnet:provenance_actions' AS provenanceaction, - array_agg(DISTINCT i.otherid || '###' || i.type || '@@@dnet:pid_types') AS pid + o.id AS organizationid, + coalesce((array_agg(a.acronym))[1], o.name) AS legalshortname, + o.name AS legalname, + array_agg(DISTINCT n.name) AS alternativenames, + (array_agg(u.url))[1] AS websiteurl, + '' AS logourl, + DATE(o.creation_date) AS dateofcollection, + DATE(o.modification_date) AS dateoftransformation, + false AS ecenterprise, + false AS echighereducation, + false AS ecinternationalorganization, + false AS ecinternationalorganizationeurinterests, + false AS eclegalbody, + false AS eclegalperson, + false AS ecnonprofit, + false AS ecnutscode, + false AS ecresearchorganization, + false AS ecsmevalidated, + false AS inferred, + false AS deletedbyinference, + 0.99 AS trust, + '' AS inferenceprovenance, + 'openaire____::openorgs' AS collectedfromid, + 'OpenOrgs Database' AS collectedfromname, + o.country || '@@@dnet:countries' AS country, + 'sysimport:crosswalk:entityregistry@@@dnet:provenance_actions' AS provenanceaction, + array_agg(DISTINCT i.otherid || '###' || i.type || '@@@dnet:pid_types') AS pid FROM organizations o LEFT OUTER JOIN acronyms a ON (a.id = o.id) LEFT OUTER JOIN urls u ON (u.id = o.id) @@ -28,21 +40,33 @@ GROUP BY UNION ALL SELECT - 'openorgsmesh'||substring(o.id, 13)||'-'||md5(n.name) AS organizationid, - n.name AS legalshortname, - n.name AS legalname, - ARRAY[]::text[] AS "alternativeNames", - (array_agg(u.url))[1] AS websiteurl, - o.modification_date AS dateoftransformation, - false AS inferred, - false AS deletedbyinference, - 0.88 AS trust, - '' AS inferenceprovenance, - 'openaire____::openorgs' AS collectedfromid, - 'OpenOrgs Database' AS collectedfromname, - o.country || '@@@dnet:countries' AS country, - 'sysimport:crosswalk:entityregistry@@@dnet:provenance_actions' AS provenanceaction, - array_agg(DISTINCT i.otherid || '###' || i.type || '@@@dnet:pid_types') AS pid + 'openorgsmesh'||substring(o.id, 13)||'-'||md5(n.name) AS organizationid, + n.name AS legalshortname, + n.name AS legalname, + ARRAY[]::text[] AS alternativenames, + (array_agg(u.url))[1] AS websiteurl, + '' AS logourl, + DATE(o.creation_date) AS dateofcollection, + DATE(o.modification_date) AS dateoftransformation, + false AS ecenterprise, + false AS echighereducation, + false AS ecinternationalorganization, + false AS ecinternationalorganizationeurinterests, + false AS eclegalbody, + false AS eclegalperson, + false AS ecnonprofit, + false AS ecnutscode, + false AS ecresearchorganization, + false AS ecsmevalidated, + false AS inferred, + false AS deletedbyinference, + 0.88 AS trust, + '' AS inferenceprovenance, + 'openaire____::openorgs' AS collectedfromid, + 'OpenOrgs Database' AS collectedfromname, + o.country || '@@@dnet:countries' AS country, + 'sysimport:crosswalk:entityregistry@@@dnet:provenance_actions' AS provenanceaction, + array_agg(DISTINCT i.otherid || '###' || i.type || '@@@dnet:pid_types') AS pid FROM other_names n LEFT OUTER JOIN organizations o ON (n.id = o.id) LEFT OUTER JOIN urls u ON (u.id = o.id) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java index 22fcb36c9..f4f5e2c75 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java @@ -27,6 +27,7 @@ import org.mockito.junit.jupiter.MockitoExtension; import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.common.DbClient; import eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils; import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup; import eu.dnetlib.dhp.schema.oaf.Datasource; @@ -47,6 +48,9 @@ public class MigrateDbEntitiesApplicationTest { @Mock private VocabularyGroup vocs; + @Mock + private DbClient dbClient; + @BeforeEach public void setUp() { lenient() @@ -59,7 +63,7 @@ public class MigrateDbEntitiesApplicationTest { lenient().when(vocs.termExists(anyString(), anyString())).thenReturn(true); - this.app = new MigrateDbEntitiesApplication(vocs); + this.app = new MigrateDbEntitiesApplication(dbClient, vocs); } @Test