partial implementation of applications to migrate entities

This commit is contained in:
Michele Artini 2020-01-17 15:26:21 +01:00
parent f7b9a7a9af
commit 81f82b5d34
15 changed files with 1410 additions and 0 deletions

View File

@ -0,0 +1,61 @@
package eu.dnetlib.dhp.migration;
import java.io.Closeable;
import java.io.IOException;
import java.net.URI;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.codehaus.jackson.map.ObjectMapper;
import eu.dnetlib.dhp.schema.oaf.Oaf;
public class AbstractMigrateApplication implements Closeable {
private final AtomicInteger counter = new AtomicInteger(0);
private final IntWritable key = new IntWritable(counter.get());
private final Text value = new Text();
private final ObjectMapper objectMapper = new ObjectMapper();
private final SequenceFile.Writer writer;
public AbstractMigrateApplication(final String hdfsPath, final String hdfsNameNode, final String hdfsUser) throws Exception {
this.writer = SequenceFile.createWriter(getConf(hdfsNameNode, hdfsUser), SequenceFile.Writer.file(new Path(hdfsPath)), SequenceFile.Writer
.keyClass(IntWritable.class), SequenceFile.Writer.valueClass(Text.class));
}
private Configuration getConf(final String hdfsNameNode, final String hdfsUser) throws IOException {
final Configuration conf = new Configuration();
conf.set("fs.defaultFS", hdfsNameNode);
conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());
System.setProperty("HADOOP_USER_NAME", hdfsUser);
System.setProperty("hadoop.home.dir", "/");
FileSystem.get(URI.create(hdfsNameNode), conf);
return conf;
}
protected void emitOaf(final Oaf oaf) {
try {
key.set(counter.getAndIncrement());
value.set(objectMapper.writeValueAsString(oaf));
writer.append(key, value);
} catch (final Exception e) {
e.printStackTrace();
}
}
@Override
public void close() throws IOException {
writer.close();
}
}

View File

@ -0,0 +1,58 @@
package eu.dnetlib.dhp.migration;
import java.io.Closeable;
import java.io.IOException;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.function.Consumer;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
public class DbClient implements Closeable {
private static final Log log = LogFactory.getLog(DbClient.class);
private Connection connection;
public DbClient(final String address, final String login, final String password) {
try {
Class.forName("org.postgresql.Driver");
this.connection = DriverManager.getConnection(address, login, password);
this.connection.setAutoCommit(false);
} catch (final Exception e) {
log.error(e.getClass().getName() + ": " + e.getMessage());
throw new RuntimeException(e);
}
log.info("Opened database successfully");
}
public void processResults(final String sql, final Consumer<ResultSet> consumer) {
try (final Statement stmt = connection.createStatement()) {
try (final ResultSet rs = stmt.executeQuery("SELECT * FROM COMPANY;")) {
while (rs.next()) {
consumer.accept(rs);
}
} catch (final SQLException e) {
throw new RuntimeException(e);
}
} catch (final SQLException e1) {
throw new RuntimeException(e1);
}
}
@Override
public void close() throws IOException {
try {
connection.close();
} catch (final SQLException e) {
throw new RuntimeException(e);
}
}
}

View File

@ -0,0 +1,87 @@
package eu.dnetlib.dhp.migration;
import java.io.Closeable;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.stream.StreamSupport;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.bson.Document;
import com.google.common.collect.Iterables;
import com.mongodb.MongoClient;
import com.mongodb.MongoClientURI;
import com.mongodb.client.MongoCollection;
import com.mongodb.client.MongoDatabase;
public class MdstoreClient implements Closeable {
private final MongoClient client;
private final MongoDatabase db;
private static final String COLL_METADATA = "metadata";
private static final String COLL_METADATA_MANAGER = "metadataManager";
private static final Log log = LogFactory.getLog(MdstoreClient.class);
public MdstoreClient(final String baseUrl, final String dbName) {
this.client = new MongoClient(new MongoClientURI(baseUrl));
this.db = getDb(client, dbName);
}
public Map<String, String> validCollections(final String mdFormat, final String mdLayout, final String mdInterpretation) {
final Map<String, String> transactions = new HashMap<>();
for (final Document entry : getColl(db, COLL_METADATA_MANAGER).find()) {
final String mdId = entry.getString("mdId");
final String currentId = entry.getString("currentId");
if (StringUtils.isNoneBlank(mdId, currentId)) {
transactions.put(mdId, currentId);
}
}
final Map<String, String> res = new HashMap<>();
for (final Document entry : getColl(db, COLL_METADATA).find()) {
if (entry.getString("format").equals(mdFormat) && entry.getString("layout").equals(mdLayout)
&& entry.getString("interpretation").equals(mdInterpretation) && transactions.containsKey(entry.getString("mdId"))) {
res.put(entry.getString("mdId"), transactions.get(entry.getString("mdId")));
}
}
return res;
}
private MongoDatabase getDb(final MongoClient client, final String dbName) {
if (!Iterables.contains(client.listDatabaseNames(), dbName)) {
final String err = String.format("Database '%s' not found in %s", dbName, client.getAddress());
log.warn(err);
throw new RuntimeException(err);
}
return client.getDatabase(dbName);
}
private MongoCollection<Document> getColl(final MongoDatabase db, final String collName) {
if (!Iterables.contains(db.listCollectionNames(), collName)) {
final String err = String.format(String.format("Missing collection '%s' in database '%s'", collName, db.getName()));
log.warn(err);
throw new RuntimeException(err);
}
return db.getCollection(collName);
}
public Iterable<String> listRecords(final String coll) {
return () -> StreamSupport.stream(getColl(db, coll).find().spliterator(), false)
.filter(e -> e.containsKey("body"))
.map(e -> e.getString("body"))
.iterator();
}
@Override
public void close() throws IOException {
client.close();
}
}

View File

@ -0,0 +1,390 @@
package eu.dnetlib.dhp.migration;
import java.io.Closeable;
import java.io.IOException;
import java.sql.ResultSet;
import java.util.Arrays;
import java.util.function.Consumer;
import org.apache.commons.io.IOUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.Datasource;
import eu.dnetlib.dhp.schema.oaf.Organization;
import eu.dnetlib.dhp.schema.oaf.Project;
import eu.dnetlib.dhp.schema.oaf.Relation;
public class MigrateDbEntitiesApplication extends AbstractMigrateApplication implements Closeable {
private static final Log log = LogFactory.getLog(MigrateDbEntitiesApplication.class);
private final DbClient dbClient;
public static void main(final String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils.toString(MigrateDbEntitiesApplication.class.getResourceAsStream("/eu/dnetlib/dhp/migration/migrate_db_entities_parameters.json")));
parser.parseArgument(args);
final String dbUrl = parser.get("postgresUrl");
final String dbUser = parser.get("postgresUser");
final String dbPassword = parser.get("postgresPassword");
final String hdfsPath = parser.get("hdfsPath");
final String hdfsNameNode = parser.get("namenode");
final String hdfsUser = parser.get("hdfsUser");
try (final MigrateDbEntitiesApplication smdbe = new MigrateDbEntitiesApplication(hdfsPath, hdfsNameNode, hdfsUser, dbUrl, dbUser, dbPassword)) {
smdbe.execute("queryDatasources.sql", smdbe::processDatasource);
smdbe.execute("queryProjects.sql", smdbe::processProject);
smdbe.execute("queryOrganizations.sql", smdbe::processOrganization);
smdbe.execute("queryDatasourceOrganization.sql", smdbe::processDatasourceOrganization);
smdbe.execute("queryProjectOrganization.sql", smdbe::processProjectOrganization);
}
}
public MigrateDbEntitiesApplication(final String hdfsPath, final String hdfsNameNode, final String hdfsUser, final String dbUrl, final String dbUser,
final String dbPassword) throws Exception {
super(hdfsPath, hdfsNameNode, hdfsUser);
this.dbClient = new DbClient(dbUrl, dbUser, dbPassword);
}
public void execute(final String sqlFile, final Consumer<ResultSet> consumer) throws Exception {
final String sql = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/migration/sql/" + sqlFile));
dbClient.processResults(sql, consumer);
}
public void processDatasource(final ResultSet rs) {
try {
final DataInfo info = MigrationUtils.dataInfo(null, null, null, null, null, null); // TODO
final Datasource ds = new Datasource();
ds.setId(MigrationUtils.createOpenaireId("10", rs.getString("datasourceid")));
ds.setOriginalId(Arrays.asList(rs.getString("datasourceid")));
ds.setCollectedfrom(MigrationUtils.listKeyValues(rs.getString("collectedfromid"), rs.getString("collectedfromname")));
ds.setPid(null); // List<StructuredProperty> // TODO
ds.setDateofcollection(rs.getDate("dateofcollection").toString());
ds.setDateoftransformation(null); // TODO
ds.setExtraInfo(null); // TODO
ds.setOaiprovenance(null); // TODO
ds.setDatasourcetype(null); // Qualifier datasourcetype) {
ds.setOpenairecompatibility(null); // Qualifier openairecompatibility) {
ds.setOfficialname(MigrationUtils.field(rs.getString("officialname"), info));
ds.setEnglishname(MigrationUtils.field(rs.getString("englishname"), info));
ds.setWebsiteurl(MigrationUtils.field(rs.getString("websiteurl"), info));
ds.setLogourl(MigrationUtils.field(rs.getString("logourl"), info));
ds.setContactemail(MigrationUtils.field(rs.getString("contactemail"), info));
ds.setNamespaceprefix(MigrationUtils.field(rs.getString("namespaceprefix"), info));
ds.setLatitude(MigrationUtils.field(Double.toString(rs.getDouble("latitude")), info));
ds.setLongitude(MigrationUtils.field(Double.toString(rs.getDouble("longitude")), info));
ds.setDateofvalidation(MigrationUtils.field(rs.getDate("dateofvalidation").toString(), info));
ds.setDescription(MigrationUtils.field(rs.getString("description"), info));
ds.setSubjects(null); // List<StructuredProperty> subjects) {
ds.setOdnumberofitems(MigrationUtils.field(Double.toString(rs.getInt("odnumberofitems")), info));
ds.setOdnumberofitemsdate(MigrationUtils.field(rs.getDate("odnumberofitemsdate").toString(), info));
ds.setOdpolicies(MigrationUtils.field(rs.getString("odpolicies"), info));
ds.setOdlanguages(MigrationUtils.listFields(info, rs.getArray("odlanguages")));
ds.setOdcontenttypes(MigrationUtils.listFields(info, rs.getArray("odcontenttypes")));
ds.setAccessinfopackage(MigrationUtils.listFields(info, rs.getArray("accessinfopackage")));
ds.setReleasestartdate(MigrationUtils.field(rs.getDate("releasestartdate").toString(), info));
ds.setReleaseenddate(MigrationUtils.field(rs.getDate("releaseenddate").toString(), info));
ds.setMissionstatementurl(MigrationUtils.field(rs.getString("missionstatementurl"), info));
ds.setDataprovider(MigrationUtils.field(rs.getBoolean("dataprovider"), info));
ds.setServiceprovider(MigrationUtils.field(rs.getBoolean("serviceprovider"), info));
ds.setDatabaseaccesstype(MigrationUtils.field(rs.getString("databaseaccesstype"), info));
ds.setDatauploadtype(MigrationUtils.field(rs.getString("datauploadtype"), info));
ds.setDatabaseaccessrestriction(MigrationUtils.field(rs.getString("databaseaccessrestriction"), info));
ds.setDatauploadrestriction(MigrationUtils.field(rs.getString("datauploadrestriction"), info));
ds.setVersioning(MigrationUtils.field(rs.getBoolean("versioning"), info));
ds.setCitationguidelineurl(MigrationUtils.field(rs.getString("citationguidelineurl"), info));
ds.setQualitymanagementkind(MigrationUtils.field(rs.getString("qualitymanagementkind"), info));
ds.setPidsystems(MigrationUtils.field(rs.getString("pidsystems"), info));
ds.setCertificates(MigrationUtils.field(rs.getString("certificates"), info));
ds.setPolicies(null); // List<KeyValue> // TODO
ds.setJournal(null); // Journal // TODO
// rs.getString("datasourceid");
rs.getArray("identities");
// rs.getString("officialname");
// rs.getString("englishname");
// rs.getString("contactemail");
rs.getString("openairecompatibility"); // COMPLEX ...@@@...
// rs.getString("websiteurl");
// rs.getString("logourl");
// rs.getArray("accessinfopackage");
// rs.getDouble("latitude");
// rs.getDouble("longitude");
// rs.getString("namespaceprefix");
// rs.getInt("odnumberofitems"); // NULL
// rs.getDate("odnumberofitemsdate"); // NULL
rs.getArray("subjects");
// rs.getString("description");
// rs.getString("odpolicies"); // NULL
// rs.getArray("odlanguages");
// rs.getArray("odcontenttypes");
rs.getBoolean("inferred"); // false
rs.getBoolean("deletedbyinference");// false
rs.getDouble("trust"); // 0.9
rs.getString("inferenceprovenance"); // NULL
// rs.getDate("dateofcollection");
// rs.getDate("dateofvalidation");
// rs.getDate("releasestartdate");
// rs.getDate("releaseenddate");
// rs.getString("missionstatementurl");
// rs.getBoolean("dataprovider");
// rs.getBoolean("serviceprovider");
// rs.getString("databaseaccesstype");
// rs.getString("datauploadtype");
// rs.getString("databaseaccessrestriction");
// rs.getString("datauploadrestriction");
// rs.getBoolean("versioning");
// rs.getString("citationguidelineurl");
// rs.getString("qualitymanagementkind");
// rs.getString("pidsystems");
// rs.getString("certificates");
rs.getArray("policies");
// rs.getString("collectedfromid");
// rs.getString("collectedfromname");
rs.getString("datasourcetype"); // COMPLEX XXX@@@@....
rs.getString("provenanceaction"); // 'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions'
// AS provenanceaction,
rs.getString("journal"); // CONCAT(d.issn, '@@@', d.eissn, '@@@', d.lissn) AS journal
emitOaf(ds);
} catch (final Exception e) {
// TODO: handle exception
}
}
public void processProject(final ResultSet rs) {
try {
final DataInfo info = MigrationUtils.dataInfo(null, null, null, null, null, null); // TODO
final Project p = new Project();
p.setId(MigrationUtils.createOpenaireId("40", rs.getString("projectid")));
p.setOriginalId(Arrays.asList(rs.getString("projectid")));
p.setCollectedfrom(MigrationUtils.listKeyValues(rs.getString("collectedfromid"), rs.getString("collectedfromname")));
p.setPid(null); // List<StructuredProperty> // TODO
p.setDateofcollection(rs.getDate("dateofcollection").toString());
p.setDateoftransformation(rs.getDate("dateoftransformation").toString());
p.setExtraInfo(null); // List<ExtraInfo> //TODO
p.setOaiprovenance(null); // OAIProvenance /TODO
p.setWebsiteurl(MigrationUtils.field(rs.getString("websiteurl"), info));
p.setCode(MigrationUtils.field(rs.getString("code"), info));
p.setAcronym(MigrationUtils.field(rs.getString("acronym"), info));
p.setTitle(MigrationUtils.field(rs.getString("title"), info));
p.setStartdate(MigrationUtils.field(rs.getDate("startdate").toString(), info));
p.setEnddate(MigrationUtils.field(rs.getDate("enddate").toString(), info));
p.setCallidentifier(MigrationUtils.field(rs.getString("callidentifier"), info));
p.setKeywords(MigrationUtils.field(rs.getString("keywords"), info));
p.setDuration(MigrationUtils.field(Integer.toString(rs.getInt("duration")), info));
p.setEcsc39(MigrationUtils.field(Boolean.toString(rs.getBoolean("ecsc39")), info));
p.setOamandatepublications(MigrationUtils.field(Boolean.toString(rs.getBoolean("oamandatepublications")), info));
p.setEcarticle29_3(MigrationUtils.field(Boolean.toString(rs.getBoolean("ecarticle29_3")), info));
p.setSubjects(null); // List<StructuredProperty> //TODO
p.setFundingtree(null); // List<Field<String>> //TODO
p.setContracttype(null); // Qualifier //TODO
p.setOptional1(MigrationUtils.field(rs.getString("optional1"), info));
p.setOptional2(MigrationUtils.field(rs.getString("optional2"), info));
p.setJsonextrainfo(MigrationUtils.field(rs.getString("jsonextrainfo"), info));
p.setContactfullname(MigrationUtils.field(rs.getString("contactfullname"), info));
p.setContactfax(MigrationUtils.field(rs.getString("contactfax"), info));
p.setContactphone(MigrationUtils.field(rs.getString("contactphone"), info));
p.setContactemail(MigrationUtils.field(rs.getString("contactemail"), info));
p.setSummary(MigrationUtils.field(rs.getString("summary"), info));
p.setCurrency(MigrationUtils.field(rs.getString("currency"), info));
p.setTotalcost(new Float(rs.getDouble("totalcost")));
p.setFundedamount(new Float(rs.getDouble("fundedamount")));
// rs.getString("projectid");
// rs.getString("code");
// rs.getString("websiteurl");
// rs.getString("acronym");
// rs.getString("title");
// rs.getDate("startdate");
// rs.getDate("enddate");
// rs.getString("callidentifier");
// rs.getString("keywords");
// rs.getInt("duration");
// rs.getBoolean("ecsc39");
// rs.getBoolean("oamandatepublications");
// rs.getBoolean("ecarticle29_3");
// rs.getDate("dateofcollection");
// rs.getDate("dateoftransformation");
rs.getBoolean("inferred");
rs.getBoolean("deletedbyinference");
rs.getDouble("trust");
rs.getString("inferenceprovenance");
// rs.getString("optional1");
// rs.getString("optional2");
rs.getString("jsonextrainfo");
// rs.getString("contactfullname");
// rs.getString("contactfax");
// rs.getString("contactphone");
// rs.getString("contactemail");
// rs.getString("summary");
// rs.getString("currency");
// rs.getDouble("totalcost");
// rs.getDouble("fundedamount");
// rs.getString("collectedfromid");
// rs.getString("collectedfromname");
rs.getString("contracttype"); // COMPLEX
rs.getString("provenanceaction"); // COMPLEX
rs.getArray("pid");
rs.getArray("subjects");
rs.getArray("fundingtree");
emitOaf(p);
} catch (final Exception e) {
// TODO: handle exception
}
}
public void processOrganization(final ResultSet rs) {
try {
final DataInfo info = MigrationUtils.dataInfo(null, null, null, null, null, null); // TODO
final Organization o = new Organization();
o.setId(MigrationUtils.createOpenaireId("20", rs.getString("organizationid"))); // String id) {
o.setOriginalId(Arrays.asList(rs.getString("organizationid")));
o.setCollectedfrom(MigrationUtils.listKeyValues(rs.getString("collectedfromid"), rs.getString("collectedfromname")));
o.setPid(null); // List<StructuredProperty> // TODO
o.setDateofcollection(rs.getDate("dateofcollection").toString());
o.setDateoftransformation(rs.getDate("dateoftransformation").toString());
o.setExtraInfo(null); // List<ExtraInfo> // TODO
o.setOaiprovenance(null); // OAIProvenance // TODO
o.setLegalshortname(MigrationUtils.field("legalshortname", info));
o.setLegalname(MigrationUtils.field("legalname", info));
o.setAlternativeNames(null); // List<Field<String>> //TODO
o.setWebsiteurl(MigrationUtils.field("websiteurl", info));
o.setLogourl(MigrationUtils.field("logourl", info));
o.setEclegalbody(MigrationUtils.field(Boolean.toString(rs.getBoolean("eclegalbody")), info));
o.setEclegalperson(MigrationUtils.field(Boolean.toString(rs.getBoolean("eclegalperson")), info));
o.setEcnonprofit(MigrationUtils.field(Boolean.toString(rs.getBoolean("ecnonprofit")), info));
o.setEcresearchorganization(MigrationUtils.field(Boolean.toString(rs.getBoolean("ecresearchorganization")), info));
o.setEchighereducation(MigrationUtils.field(Boolean.toString(rs.getBoolean("echighereducation")), info));
o.setEcinternationalorganizationeurinterests(MigrationUtils
.field(Boolean.toString(rs.getBoolean("ecinternationalorganizationeurinterests")), info));
o.setEcinternationalorganization(MigrationUtils.field(Boolean.toString(rs.getBoolean("ecinternationalorganization")), info));
o.setEcenterprise(MigrationUtils.field(Boolean.toString(rs.getBoolean("ecenterprise")), info));
o.setEcsmevalidated(MigrationUtils.field(Boolean.toString(rs.getBoolean("ecsmevalidated")), info));
o.setEcnutscode(MigrationUtils.field(Boolean.toString(rs.getBoolean("ecnutscode")), info));
o.setCountry(null); // Qualifier country) {
// rs.getString("organizationid");
// rs.getString("legalshortname");
// rs.getString("legalname");
// rs.getString("websiteurl");
// rs.getString("logourl");
// rs.getBoolean("eclegalbody");
// rs.getBoolean("eclegalperson");
// rs.getBoolean("ecnonprofit");
// rs.getBoolean("ecresearchorganization");
// rs.getBoolean("echighereducation");
// rs.getBoolean("ecinternationalorganizationeurinterests");
// rs.getBoolean("ecinternationalorganization");
// rs.getBoolean("ecenterprise");
// rs.getBoolean("ecsmevalidated");
// rs.getBoolean("ecnutscode");
rs.getDate("dateofcollection");
rs.getDate("dateoftransformation");
rs.getBoolean("inferred");
rs.getBoolean("deletedbyinference");
rs.getDouble("trust");
rs.getString("inferenceprovenance");
// rs.getString("collectedfromid");
// rs.getString("collectedfromname");
rs.getString("country");
rs.getString("provenanceaction");
rs.getArray("pid");
emitOaf(o);
} catch (final Exception e) {
// TODO: handle exception
}
}
public void processDatasourceOrganization(final ResultSet rs) {
try {
final Relation r = new Relation();
r.setRelType(null); // TODO
r.setSubRelType(null); // TODO
r.setRelClass(null); // TODO
r.setSource(null); // TODO
r.setTarget(null); // TODO
r.setCollectedFrom(MigrationUtils.listKeyValues("", ""));
rs.getString("datasource");
rs.getString("organization");
rs.getDate("startdate"); // NULL
rs.getDate("enddate"); // NULL
rs.getBoolean("inferred"); // false
rs.getBoolean("deletedbyinference"); // false
rs.getDouble("trust"); // 0.9
rs.getString("inferenceprovenance"); // NULL
rs.getString("semantics"); // 'providedBy@@@provided
// by@@@dnet:datasources_organizations_typologies@@@dnet:datasources_organizations_typologies' AS
// semantics,
rs.getString("provenanceaction"); // d.provenanceaction || '@@@' || d.provenanceaction ||
// '@@@dnet:provenanceActions@@@dnet:provenanceActions' AS provenanceaction
emitOaf(r);
} catch (final Exception e) {
// TODO: handle exception
}
}
public void processProjectOrganization(final ResultSet rs) {
try {
final Relation r = new Relation();
r.setRelType(null); // TODO
r.setSubRelType(null); // TODO
r.setRelClass(null); // TODO
r.setSource(null); // TODO
r.setTarget(null); // TODO
r.setCollectedFrom(null);
rs.getString("project");
rs.getString("resporganization");
rs.getInt("participantnumber");
rs.getDouble("contribution");
rs.getDate("startdate");// null
rs.getDate("enddate");// null
rs.getBoolean("inferred");// false
rs.getBoolean("deletedbyinference"); // false
rs.getDouble("trust");
rs.getString("inferenceprovenance"); // NULL
rs.getString("semantics"); // po.semanticclass || '@@@' || po.semanticclass ||
// '@@@dnet:project_organization_relations@@@dnet:project_organization_relations' AS semantics,
rs.getString("provenanceaction"); // 'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions'
// AS provenanceaction
emitOaf(r);
} catch (final Exception e) {
// TODO: handle exception
}
}
@Override
public void close() throws IOException {
super.close();
dbClient.close();
}
}

View File

@ -0,0 +1,190 @@
package eu.dnetlib.dhp.migration;
import java.io.Closeable;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;
import java.util.Map.Entry;
import org.apache.commons.io.IOUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Node;
import org.dom4j.io.SAXReader;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.schema.oaf.Dataset;
import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.schema.oaf.Software;
public class MigrateMongoMdstoresApplication extends AbstractMigrateApplication implements Closeable {
private static final Log log = LogFactory.getLog(MigrateMongoMdstoresApplication.class);
private final MdstoreClient mdstoreClient;
public static void main(final String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils.toString(MigrateMongoMdstoresApplication.class.getResourceAsStream("/eu/dnetlib/dhp/migration/migrate_mongo_mstores_parameters.json")));
parser.parseArgument(args);
final String mongoBaseUrl = parser.get("mongoBaseUrl");
final String mongoDb = parser.get("mongoDb");
final String mdFormat = parser.get("mdFormat");
final String mdLayout = parser.get("mdLayout");
final String mdInterpretation = parser.get("mdInterpretation");
final String hdfsPath = parser.get("hdfsPath");
final String hdfsNameNode = parser.get("namenode");
final String hdfsUser = parser.get("hdfsUser");
try (final MigrateMongoMdstoresApplication mig = new MigrateMongoMdstoresApplication(hdfsPath, hdfsNameNode, hdfsUser, mongoBaseUrl, mongoDb)) {
mig.processMdRecords(mdFormat, mdLayout, mdInterpretation);
}
}
public MigrateMongoMdstoresApplication(final String hdfsPath, final String hdfsNameNode, final String hdfsUser, final String mongoBaseUrl,
final String mongoDb) throws Exception {
super(hdfsPath, hdfsNameNode, hdfsUser);
this.mdstoreClient = new MdstoreClient(mongoBaseUrl, mongoDb);
}
public void processMdRecords(final String mdFormat, final String mdLayout, final String mdInterpretation) throws DocumentException {
for (final Entry<String, String> entry : mdstoreClient.validCollections(mdFormat, mdLayout, mdInterpretation).entrySet()) {
// final String mdId = entry.getKey();
final String currentColl = entry.getValue();
for (final String xml : mdstoreClient.listRecords(currentColl)) {
for (final Oaf oaf : createOafs(xml)) {
emitOaf(oaf);
}
}
}
}
private List<Oaf> createOafs(final String xml) throws DocumentException {
final SAXReader reader = new SAXReader();
final Document doc = reader.read(new StringReader(xml));
final String type = doc.valueOf(""); // TODO
final List<Oaf> oafs = new ArrayList<>();
switch (type.toLowerCase()) {
case "publication":
final Publication p = new Publication();
populateResultFields(p, doc);
p.setJournal(null); // TODO
oafs.add(p);
break;
case "dataset":
final Dataset d = new Dataset();
populateResultFields(d, doc);
d.setStoragedate(null); // TODO
d.setDevice(null); // TODO
d.setSize(null); // TODO
d.setVersion(null); // TODO
d.setLastmetadataupdate(null); // TODO
d.setMetadataversionnumber(null); // TODO
d.setGeolocation(null); // TODO
oafs.add(d);
break;
case "otherresearchproducts":
final OtherResearchProduct o = new OtherResearchProduct();
populateResultFields(o, doc);
o.setContactperson(null); // TODO
o.setContactgroup(null); // TODO
o.setTool(null); // TODO
oafs.add(o);
break;
case "software":
final Software s = new Software();
populateResultFields(s, doc);
s.setDocumentationUrl(null); // TODO
s.setLicense(null); // TODO
s.setCodeRepositoryUrl(null); // TODO
s.setProgrammingLanguage(null); // TODO
oafs.add(s);
break;
default:
log.error("Inavlid type: " + type);
break;
}
if (!oafs.isEmpty()) {
addRelations(oafs, doc, "//*", "TYPE");
addRelations(oafs, doc, "//*", "TYPE");
addRelations(oafs, doc, "//*", "TYPE");
}
return oafs;
}
private void addRelations(final List<Oaf> oafs, final Document doc, final String xpath, final String type) {
for (final Object o : doc.selectNodes(xpath)) {
final Node n = (Node) o;
final Relation r = new Relation();
r.setRelType(null); // TODO
r.setSubRelType(null); // TODO
r.setRelClass(null); // TODO
r.setSource(null); // TODO
r.setTarget(null); // TODO
r.setCollectedFrom(null); // TODO
oafs.add(r);
}
}
private void populateResultFields(final Result r, final Document doc) {
r.setDataInfo(null); // TODO
r.setLastupdatetimestamp(null); // TODO
r.setId(null); // TODO
r.setOriginalId(null); // TODO
r.setCollectedfrom(null); // TODO
r.setPid(null); // TODO
r.setDateofcollection(null); // TODO
r.setDateoftransformation(null); // TODO
r.setExtraInfo(null); // TODO
r.setOaiprovenance(null); // TODO
r.setAuthor(null); // TODO
r.setResulttype(null); // TODO
r.setLanguage(null); // TODO
r.setCountry(null); // TODO
r.setSubject(null); // TODO
r.setTitle(null); // TODO
r.setRelevantdate(null); // TODO
r.setDescription(null); // TODO
r.setDateofacceptance(null); // TODO
r.setPublisher(null); // TODO
r.setEmbargoenddate(null); // TODO
r.setSource(null); // TODO
r.setFulltext(null); // TODO
r.setFormat(null); // TODO
r.setContributor(null); // TODO
r.setResourcetype(null); // TODO
r.setCoverage(null); // TODO
r.setRefereed(null); // TODO
r.setContext(null); // TODO
r.setExternalReference(null); // TODO
r.setInstance(null); // TODO
r.setProcessingchargeamount(null); // TODO
r.setProcessingchargecurrency(null); // TODO
}
@Override
public void close() throws IOException {
super.close();
mdstoreClient.close();
}
}

View File

@ -0,0 +1,164 @@
package eu.dnetlib.dhp.migration;
import java.sql.Array;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.ExtraInfo;
import eu.dnetlib.dhp.schema.oaf.Field;
import eu.dnetlib.dhp.schema.oaf.Journal;
import eu.dnetlib.dhp.schema.oaf.KeyValue;
import eu.dnetlib.dhp.schema.oaf.OAIProvenance;
import eu.dnetlib.dhp.schema.oaf.OriginDescription;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import eu.dnetlib.dhp.utils.DHPUtils;
public class MigrationUtils {
public static KeyValue keyValue(final String k, final String v) {
final KeyValue kv = new KeyValue();
kv.setKey(k);
kv.setValue(v);
return kv;
}
public static List<KeyValue> listKeyValues(final String... s) {
if (s.length % 2 > 0) { throw new RuntimeException("Invalid number of parameters (k,v,k,v,....)"); }
final List<KeyValue> list = new ArrayList<>();
for (int i = 0; i < s.length; i += 2) {
list.add(keyValue(s[i], s[i + 1]));
}
return list;
}
public static <T> Field<T> field(final T value, final DataInfo info) {
final Field<T> field = new Field<>();
field.setValue(value);
field.setDataInfo(info);
return field;
}
public static List<Field<String>> listFields(final DataInfo info, final String... values) {
return Arrays.stream(values).map(v -> field(v, info)).collect(Collectors.toList());
}
public static List<Field<String>> listFields(final DataInfo info, final Array array) {
try {
return listFields(info, (String[]) array.getArray());
} catch (final SQLException e) {
throw new RuntimeException("Invalid SQL array", e);
}
}
public static Qualifier qualifier(final String classid, final String classname, final String schemeid, final String schemename) {
final Qualifier q = new Qualifier();
q.setClassid(classid);
q.setClassname(classname);
q.setSchemeid(schemeid);
q.setSchemename(schemename);
return q;
}
public static StructuredProperty structuredProperty(final String value,
final String classid,
final String classname,
final String schemeid,
final String schemename,
final DataInfo dataInfo) {
final StructuredProperty sp = new StructuredProperty();
sp.setValue(value);
sp.setQualifier(qualifier(classid, classname, schemeid, schemename));
sp.setDataInfo(dataInfo);
return sp;
}
public static ExtraInfo extraInfo(final String name, final String value, final String typology, final String provenance, final String trust) {
final ExtraInfo info = new ExtraInfo();
info.setName(name);
info.setValue(value);
info.setTypology(typology);
info.setProvenance(provenance);
info.setTrust(trust);
return info;
}
public static OAIProvenance oaiIProvenance(final String identifier,
final String baseURL,
final String metadataNamespace,
final Boolean altered,
final String datestamp,
final String harvestDate) {
final OriginDescription desc = new OriginDescription();
desc.setIdentifier(identifier);
desc.setBaseURL(baseURL);
desc.setMetadataNamespace(metadataNamespace);
desc.setAltered(altered);
desc.setDatestamp(datestamp);
desc.setHarvestDate(harvestDate);
final OAIProvenance p = new OAIProvenance();
p.setOriginDescription(desc);
return p;
}
public static Journal journal(final String name,
final String issnPrinted,
final String issnOnline,
final String issnLinking,
final String ep,
final String iss,
final String sp,
final String vol,
final String edition,
final String conferenceplace,
final String conferencedate,
final DataInfo dataInfo) {
final Journal j = new Journal();
j.setName(name);
j.setIssnPrinted(issnPrinted);
j.setIssnOnline(issnOnline);
j.setIssnLinking(issnLinking);
j.setEp(ep);
j.setIss(iss);
j.setSp(sp);
j.setVol(vol);
j.setEdition(edition);
j.setConferenceplace(conferenceplace);
j.setConferencedate(conferencedate);
j.setDataInfo(dataInfo);
return j;
}
public static DataInfo dataInfo(final Boolean deletedbyinference,
final String inferenceprovenance,
final Boolean inferred,
final Boolean invisible,
final Qualifier provenanceaction,
final String trust) {
final DataInfo d = new DataInfo();
d.setDeletedbyinference(deletedbyinference);
d.setInferenceprovenance(inferenceprovenance);
d.setInferred(inferred);
d.setInvisible(invisible);
d.setProvenanceaction(provenanceaction);
d.setTrust(trust);
return d;
}
public static String createOpenaireId(final String prefix, final String originalId) {
final String nsPrefix = StringUtils.substringBefore(originalId, "::");
final String rest = StringUtils.substringAfter(originalId, "::");
return String.format("%s|%s::%s", prefix, nsPrefix, DHPUtils.md5(rest));
}
}

View File

@ -0,0 +1,38 @@
[
{
"paramName": "p",
"paramLongName": "hdfsPath",
"paramDescription": "the path where storing the sequential file",
"paramRequired": true
},
{
"paramName": "n",
"paramLongName": "namenode",
"paramDescription": "the Name Node URI",
"paramRequired": true
},
{
"paramName": "u",
"paramLongName": "hdfsUser",
"paramDescription": "the user wich create the hdfs seq file",
"paramRequired": true
},
{
"paramName": "dburl",
"paramLongName": "postgresUrl",
"paramDescription": "postgres url, example: jdbc:postgresql://localhost:5432/testdb",
"paramRequired": true
},
{
"paramName": "dbuser",
"paramLongName": "postgresUser",
"paramDescription": "postgres user",
"paramRequired": true
},
{
"paramName": "dbpasswd",
"paramLongName": "postgresPassword",
"paramDescription": "postgres password",
"paramRequired": true
}
]

View File

@ -0,0 +1,50 @@
[
{
"paramName": "p",
"paramLongName": "hdfsPath",
"paramDescription": "the path where storing the sequential file",
"paramRequired": true
},
{
"paramName": "n",
"paramLongName": "namenode",
"paramDescription": "the Name Node URI",
"paramRequired": true
},
{
"paramName": "u",
"paramLongName": "hdfsUser",
"paramDescription": "the user wich create the hdfs seq file",
"paramRequired": true
},
{
"paramName": "mongourl",
"paramLongName": "mongoBaseUrl",
"paramDescription": "mongoDB url, example: mongodb://[username:password@]host[:port]",
"paramRequired": true
},
{
"paramName": "db",
"paramLongName": "mongoDb",
"paramDescription": "mongo database",
"paramRequired": true
},
{
"paramName": "f",
"paramLongName": "mdFormat",
"paramDescription": "metadata format",
"paramRequired": true
},
{
"paramName": "l",
"paramLongName": "mdLayout",
"paramDescription": "metadata layout",
"paramRequired": true
},
{
"paramName": "i",
"paramLongName": "mdInterpretation",
"paramDescription": "metadata interpretation",
"paramRequired": true
}
]

View File

@ -0,0 +1,16 @@
SELECT
dor.datasource AS datasource,
dor.organization AS organization,
NULL AS startdate,
NULL AS enddate,
false AS inferred,
false AS deletedbyinference,
0.9 AS trust,
NULL AS inferenceprovenance,
'providedBy@@@provided by@@@dnet:datasources_organizations_typologies@@@dnet:datasources_organizations_typologies' AS semantics,
d.provenanceaction || '@@@' || d.provenanceaction || '@@@dnet:provenanceActions@@@dnet:provenanceActions' AS provenanceaction
FROM dsm_datasource_organization dor
LEFT OUTER JOIN dsm_datasources d ON (dor.datasource = d.id)

View File

@ -0,0 +1,147 @@
SELECT
d.id AS datasourceid,
d.id || array_agg(distinct di.pid) AS identities,
d.officialname AS officialname,
d.englishname AS englishname,
d.contactemail AS contactemail,
CASE
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility):: TEXT) @> ARRAY ['openaire-cris_1.1'])
THEN
'openaire-cris_1.1@@@OpenAIRE CRIS v1.1@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility):: TEXT) @> ARRAY ['driver', 'openaire2.0'])
THEN
'driver-openaire2.0@@@OpenAIRE 2.0+ (DRIVER OA, EC funding)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['driver'])
THEN
'driver@@@OpenAIRE Basic (DRIVER OA)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['openaire2.0'])
THEN
'openaire2.0@@@OpenAIRE 2.0 (EC funding)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['openaire3.0'])
THEN
'openaire3.0@@@OpenAIRE 3.0 (OA, funding)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['openaire2.0_data'])
THEN
'openaire2.0_data@@@OpenAIRE Data (funded, referenced datasets)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['native'])
THEN
'native@@@proprietary@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['hostedBy'])
THEN
'hostedBy@@@collected from a compatible aggregator@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['notCompatible'])
THEN
'notCompatible@@@under validation@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
ELSE
'UNKNOWN@@@not available@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
END AS openairecompatibility,
d.websiteurl AS websiteurl,
d.logourl AS logourl,
array_agg(DISTINCT CASE WHEN a.protocol = 'oai' and last_aggregation_date is not null THEN a.baseurl ELSE NULL END) AS accessinfopackage,
d.latitude AS latitude,
d.longitude AS longitude,
d.namespaceprefix AS namespaceprefix,
NULL AS odnumberofitems,
NULL AS odnumberofitemsdate,
(SELECT array_agg(s|| '###keywords@@@keywords@@@dnet:subject_classification_typologies@@@dnet:subject_classification_typologies')
FROM UNNEST(
ARRAY(
SELECT trim(s)
FROM unnest(string_to_array(d.subjects, '@@')) AS s)) AS s) AS subjects,
d.description AS description,
NULL AS odpolicies,
ARRAY(SELECT trim(s)
FROM unnest(string_to_array(d.languages, ',')) AS s) AS odlanguages,
ARRAY(SELECT trim(s)
FROM unnest(string_to_array(d.od_contenttypes, '-')) AS s) AS odcontenttypes,
false AS inferred,
false AS deletedbyinference,
0.9 AS trust,
NULL AS inferenceprovenance,
d.dateofcollection AS dateofcollection,
d.dateofvalidation AS dateofvalidation,
-- re3data fields
d.releasestartdate AS releasestartdate,
d.releaseenddate AS releaseenddate,
d.missionstatementurl AS missionstatementurl,
d.dataprovider AS dataprovider,
d.serviceprovider AS serviceprovider,
d.databaseaccesstype AS databaseaccesstype,
d.datauploadtype AS datauploadtype,
d.databaseaccessrestriction AS databaseaccessrestriction,
d.datauploadrestriction AS datauploadrestriction,
d.versioning AS versioning,
d.citationguidelineurl AS citationguidelineurl,
d.qualitymanagementkind AS qualitymanagementkind,
d.pidsystems AS pidsystems,
d.certificates AS certificates,
ARRAY[]::text[] AS policies,
dc.id AS collectedfromid,
dc.officialname AS collectedfromname,
d.typology || '@@@' || CASE
WHEN (d.typology = 'crissystem') THEN 'CRIS System'
WHEN (d.typology = 'datarepository::unknown') THEN 'Data Repository'
WHEN (d.typology = 'aggregator::datarepository') THEN 'Data Repository Aggregator'
WHEN (d.typology = 'infospace') THEN 'Information Space'
WHEN (d.typology = 'pubsrepository::institutional') THEN 'Institutional Repository'
WHEN (d.typology = 'aggregator::pubsrepository::institutional') THEN 'Institutional Repository Aggregator'
WHEN (d.typology = 'pubsrepository::journal') THEN 'Journal'
WHEN (d.typology = 'aggregator::pubsrepository::journals') THEN 'Journal Aggregator/Publisher'
WHEN (d.typology = 'pubsrepository::mock') THEN 'Other'
WHEN (d.typology = 'pubscatalogue::unknown') THEN 'Publication Catalogue'
WHEN (d.typology = 'pubsrepository::unknown') THEN 'Publication Repository'
WHEN (d.typology = 'aggregator::pubsrepository::unknown') THEN 'Publication Repository Aggregator'
WHEN (d.typology = 'entityregistry') THEN 'Registry'
WHEN (d.typology = 'scholarcomminfra') THEN 'Scholarly Comm. Infrastructure'
WHEN (d.typology = 'pubsrepository::thematic') THEN 'Thematic Repository'
WHEN (d.typology = 'websource') THEN 'Web Source'
WHEN (d.typology = 'entityregistry::projects') THEN 'Funder database'
WHEN (d.typology = 'entityregistry::repositories') THEN 'Registry of repositories'
WHEN (d.typology = 'softwarerepository') THEN 'Software Repository'
WHEN (d.typology = 'aggregator::softwarerepository') THEN 'Software Repository Aggregator'
WHEN (d.typology = 'orprepository') THEN 'Repository'
ELSE 'Other'
END || '@@@dnet:datasource_typologies@@@dnet:datasource_typologies' AS datasourcetype,
'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions' AS provenanceaction,
CONCAT(d.issn, '@@@', d.eissn, '@@@', d.lissn) AS journal
FROM dsm_datasources d
LEFT OUTER JOIN dsm_datasources dc on (d.collectedfrom = dc.id)
LEFT OUTER JOIN dsm_api a ON (d.id = a.datasource)
LEFT OUTER JOIN dsm_datasourcepids di ON (d.id = di.datasource)
GROUP BY
d.id,
d.officialname,
d.englishname,
d.websiteurl,
d.logourl,
d.contactemail,
d.namespaceprefix,
d.description,
d.latitude,
d.longitude,
d.dateofcollection,
d.dateofvalidation,
d.releasestartdate,
d.releaseenddate,
d.missionstatementurl,
d.dataprovider,
d.serviceprovider,
d.databaseaccesstype,
d.datauploadtype,
d.databaseaccessrestriction,
d.datauploadrestriction,
d.versioning,
d.citationguidelineurl,
d.qualitymanagementkind,
d.pidsystems,
d.certificates,
dc.id,
dc.officialname,
d.issn,
d.eissn,
d.lissn

View File

@ -0,0 +1,36 @@
SELECT
o.id AS organizationid,
o.legalshortname AS legalshortname,
o.legalname AS legalname,
o.websiteurl AS websiteurl,
o.logourl AS logourl,
o.ec_legalbody AS eclegalbody,
o.ec_legalperson AS eclegalperson,
o.ec_nonprofit AS ecnonprofit,
o.ec_researchorganization AS ecresearchorganization,
o.ec_highereducation AS echighereducation,
o.ec_internationalorganizationeurinterests AS ecinternationalorganizationeurinterests,
o.ec_internationalorganization AS ecinternationalorganization,
o.ec_enterprise AS ecenterprise,
o.ec_smevalidated AS ecsmevalidated,
o.ec_nutscode AS ecnutscode,
o.dateofcollection AS dateofcollection,
o.lastupdate AS dateoftransformation,
false AS inferred,
false AS deletedbyinference,
o.trust AS trust,
'' AS inferenceprovenance,
d.id AS collectedfromid,
d.officialname AS collectedfromname,
o.country || '@@@dnet:countries' AS country,
'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions' AS provenanceaction,
ARRAY[]::text[] AS pid
FROM dsm_organizations o
LEFT OUTER JOIN dsm_datasources d ON (d.id = o.collectedfrom)

View File

@ -0,0 +1,53 @@
SELECT
o.id AS organizationid,
coalesce((array_agg(a.acronym))[1], o.name) AS legalshortname,
o.name AS legalname,
array_agg(DISTINCT n.name) AS "alternativeNames",
(array_agg(u.url))[1] AS websiteurl,
o.modification_date AS dateoftransformation,
false AS inferred,
false AS deletedbyinference,
0.95 AS trust,
'' AS inferenceprovenance,
'openaire____::openorgs' AS collectedfromid,
'OpenOrgs Database' AS collectedfromname,
o.country || '@@@dnet:countries' AS country,
'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions' AS provenanceaction,
array_agg(DISTINCT i.otherid || '###' || i.type || '@@@dnet:pid_types') AS pid
FROM organizations o
LEFT OUTER JOIN acronyms a ON (a.id = o.id)
LEFT OUTER JOIN urls u ON (u.id = o.id)
LEFT OUTER JOIN other_ids i ON (i.id = o.id)
LEFT OUTER JOIN other_names n ON (n.id = o.id)
GROUP BY
o.id,
o.name,
o.modification_date,
o.country
UNION ALL
SELECT
'openorgsmesh'||substring(o.id, 13)||'-'||md5(n.name) AS organizationid,
n.name AS legalshortname,
n.name AS legalname,
ARRAY[]::text[] AS "alternativeNames",
(array_agg(u.url))[1] AS websiteurl,
o.modification_date AS dateoftransformation,
false AS inferred,
false AS deletedbyinference,
0.88 AS trust,
'' AS inferenceprovenance,
'openaire____::openorgs' AS collectedfromid,
'OpenOrgs Database' AS collectedfromname,
o.country || '@@@dnet:countries' AS country,
'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions' AS provenanceaction,
array_agg(DISTINCT i.otherid || '###' || i.type || '@@@dnet:pid_types') AS pid
FROM other_names n
LEFT OUTER JOIN organizations o ON (n.id = o.id)
LEFT OUTER JOIN urls u ON (u.id = o.id)
LEFT OUTER JOIN other_ids i ON (i.id = o.id)
GROUP BY
o.id, o.modification_date, o.country, n.name

View File

@ -0,0 +1,16 @@
SELECT
po.project AS project,
po.resporganization AS resporganization,
po.participantnumber AS participantnumber,
po.contribution AS contribution,
NULL AS startdate,
NULL AS enddate,
false AS inferred,
false AS deletedbyinference,
po.trust AS trust,
NULL AS inferenceprovenance,
po.semanticclass || '@@@' || po.semanticclass || '@@@dnet:project_organization_relations@@@dnet:project_organization_relations' AS semantics,
'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions' AS provenanceaction
FROM project_organization po

View File

@ -0,0 +1,87 @@
SELECT
p.id AS projectid,
p.code AS code,
p.websiteurl AS websiteurl,
p.acronym AS acronym,
p.title AS title,
p.startdate AS startdate,
p.enddate AS enddate,
p.call_identifier AS callidentifier,
p.keywords AS keywords,
p.duration AS duration,
p.ec_sc39 AS ecsc39,
p.oa_mandate_for_publications AS oamandatepublications,
p.ec_article29_3 AS ecarticle29_3,
p.dateofcollection AS dateofcollection,
p.lastupdate AS dateoftransformation,
p.inferred AS inferred,
p.deletedbyinference AS deletedbyinference,
p.trust AS trust,
p.inferenceprovenance AS inferenceprovenance,
p.optional1 AS optional1,
p.optional2 AS optional2,
p.jsonextrainfo AS jsonextrainfo,
p.contactfullname AS contactfullname,
p.contactfax AS contactfax,
p.contactphone AS contactphone,
p.contactemail AS contactemail,
p.summary AS summary,
p.currency AS currency,
p.totalcost AS totalcost,
p.fundedamount AS fundedamount,
dc.id AS collectedfromid,
dc.officialname AS collectedfromname,
p.contracttype || '@@@' || p.contracttypename || '@@@' || p.contracttypescheme || '@@@' || p.contracttypescheme AS contracttype,
pac.code || '@@@' || pac.name || '@@@' || pas.code || '@@@' || pas.name AS provenanceaction,
array_agg(DISTINCT i.pid || '###' || i.issuertype) AS pid,
array_agg(DISTINCT s.name || '###' || sc.code || '@@@' || sc.name || '@@@' || ss.code || '@@@' || ss.name) AS subjects,
array_agg(DISTINCT fp.path) AS fundingtree
FROM projects p
LEFT OUTER JOIN class pac ON (pac.code = p.provenanceactionclass)
LEFT OUTER JOIN scheme pas ON (pas.code = p.provenanceactionscheme)
LEFT OUTER JOIN projectpids pp ON (pp.project = p.id)
LEFT OUTER JOIN dsm_identities i ON (i.pid = pp.pid)
LEFT OUTER JOIN dsm_datasources dc ON (dc.id = p.collectedfrom)
LEFT OUTER JOIN project_fundingpath pf ON (pf.project = p.id)
LEFT OUTER JOIN fundingpaths fp ON (fp.id = pf.funding)
LEFT OUTER JOIN project_subject ps ON (ps.project = p.id)
LEFT OUTER JOIN subjects s ON (s.id = ps.subject)
LEFT OUTER JOIN class sc ON (sc.code = s.semanticclass)
LEFT OUTER JOIN scheme ss ON (ss.code = s.semanticscheme)
GROUP BY
p.id,
p.code,
p.websiteurl,
p.acronym,
p.title,
p.startdate,
p.enddate,
p.call_identifier,
p.keywords,
p.duration,
p.ec_sc39,
p.oa_mandate_for_publications,
p.ec_article29_3,
p.dateofcollection,
p.inferred,
p.deletedbyinference,
p.trust,
p.inferenceprovenance,
p.contactfullname,
p.contactfax,
p.contactphone,
p.contactemail,
p.contracttype,
p.summary,
p.currency,
p.totalcost,
p.fundedamount,
dc.id,
dc.officialname,
pac.code, pac.name, pas.code, pas.name;

View File

@ -0,0 +1,17 @@
SELECT local_id AS id1, oa_original_id AS id2 FROM openaire_simrels WHERE reltype = 'is_similar'
UNION ALL
SELECT
o.id AS id1,
'openorgsmesh'||substring(o.id, 13)||'-'||md5(a.acronym) AS id2
FROM acronyms a
LEFT OUTER JOIN organizations o ON (a.id = o.id)
UNION ALL
SELECT
o.id AS id1,
'openorgsmesh'||substring(o.id, 13)||'-'||md5(n.name) AS id2
FROM other_names n
LEFT OUTER JOIN organizations o ON (n.id = o.id)