forked from D-Net/dnet-hadoop
partial implementation of applications to migrate entities
This commit is contained in:
parent
f7b9a7a9af
commit
81f82b5d34
|
@ -0,0 +1,61 @@
|
|||
package eu.dnetlib.dhp.migration;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.net.URI;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.io.IntWritable;
|
||||
import org.apache.hadoop.io.SequenceFile;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.codehaus.jackson.map.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
||||
|
||||
public class AbstractMigrateApplication implements Closeable {
|
||||
|
||||
private final AtomicInteger counter = new AtomicInteger(0);
|
||||
|
||||
private final IntWritable key = new IntWritable(counter.get());
|
||||
|
||||
private final Text value = new Text();
|
||||
|
||||
private final ObjectMapper objectMapper = new ObjectMapper();
|
||||
|
||||
private final SequenceFile.Writer writer;
|
||||
|
||||
public AbstractMigrateApplication(final String hdfsPath, final String hdfsNameNode, final String hdfsUser) throws Exception {
|
||||
this.writer = SequenceFile.createWriter(getConf(hdfsNameNode, hdfsUser), SequenceFile.Writer.file(new Path(hdfsPath)), SequenceFile.Writer
|
||||
.keyClass(IntWritable.class), SequenceFile.Writer.valueClass(Text.class));
|
||||
}
|
||||
|
||||
private Configuration getConf(final String hdfsNameNode, final String hdfsUser) throws IOException {
|
||||
final Configuration conf = new Configuration();
|
||||
conf.set("fs.defaultFS", hdfsNameNode);
|
||||
conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
|
||||
conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());
|
||||
System.setProperty("HADOOP_USER_NAME", hdfsUser);
|
||||
System.setProperty("hadoop.home.dir", "/");
|
||||
FileSystem.get(URI.create(hdfsNameNode), conf);
|
||||
return conf;
|
||||
}
|
||||
|
||||
protected void emitOaf(final Oaf oaf) {
|
||||
try {
|
||||
key.set(counter.getAndIncrement());
|
||||
value.set(objectMapper.writeValueAsString(oaf));
|
||||
writer.append(key, value);
|
||||
} catch (final Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
writer.close();
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,58 @@
|
|||
package eu.dnetlib.dhp.migration;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.sql.Connection;
|
||||
import java.sql.DriverManager;
|
||||
import java.sql.ResultSet;
|
||||
import java.sql.SQLException;
|
||||
import java.sql.Statement;
|
||||
import java.util.function.Consumer;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
||||
public class DbClient implements Closeable {
|
||||
|
||||
private static final Log log = LogFactory.getLog(DbClient.class);
|
||||
|
||||
private Connection connection;
|
||||
|
||||
public DbClient(final String address, final String login, final String password) {
|
||||
|
||||
try {
|
||||
Class.forName("org.postgresql.Driver");
|
||||
this.connection = DriverManager.getConnection(address, login, password);
|
||||
this.connection.setAutoCommit(false);
|
||||
} catch (final Exception e) {
|
||||
log.error(e.getClass().getName() + ": " + e.getMessage());
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
log.info("Opened database successfully");
|
||||
}
|
||||
|
||||
public void processResults(final String sql, final Consumer<ResultSet> consumer) {
|
||||
|
||||
try (final Statement stmt = connection.createStatement()) {
|
||||
try (final ResultSet rs = stmt.executeQuery("SELECT * FROM COMPANY;")) {
|
||||
while (rs.next()) {
|
||||
consumer.accept(rs);
|
||||
}
|
||||
} catch (final SQLException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
} catch (final SQLException e1) {
|
||||
throw new RuntimeException(e1);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
try {
|
||||
connection.close();
|
||||
} catch (final SQLException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,87 @@
|
|||
package eu.dnetlib.dhp.migration;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.stream.StreamSupport;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.bson.Document;
|
||||
|
||||
import com.google.common.collect.Iterables;
|
||||
import com.mongodb.MongoClient;
|
||||
import com.mongodb.MongoClientURI;
|
||||
import com.mongodb.client.MongoCollection;
|
||||
import com.mongodb.client.MongoDatabase;
|
||||
|
||||
public class MdstoreClient implements Closeable {
|
||||
|
||||
private final MongoClient client;
|
||||
private final MongoDatabase db;
|
||||
|
||||
private static final String COLL_METADATA = "metadata";
|
||||
private static final String COLL_METADATA_MANAGER = "metadataManager";
|
||||
|
||||
private static final Log log = LogFactory.getLog(MdstoreClient.class);
|
||||
|
||||
public MdstoreClient(final String baseUrl, final String dbName) {
|
||||
this.client = new MongoClient(new MongoClientURI(baseUrl));
|
||||
this.db = getDb(client, dbName);
|
||||
}
|
||||
|
||||
public Map<String, String> validCollections(final String mdFormat, final String mdLayout, final String mdInterpretation) {
|
||||
|
||||
final Map<String, String> transactions = new HashMap<>();
|
||||
for (final Document entry : getColl(db, COLL_METADATA_MANAGER).find()) {
|
||||
final String mdId = entry.getString("mdId");
|
||||
final String currentId = entry.getString("currentId");
|
||||
if (StringUtils.isNoneBlank(mdId, currentId)) {
|
||||
transactions.put(mdId, currentId);
|
||||
}
|
||||
}
|
||||
|
||||
final Map<String, String> res = new HashMap<>();
|
||||
for (final Document entry : getColl(db, COLL_METADATA).find()) {
|
||||
if (entry.getString("format").equals(mdFormat) && entry.getString("layout").equals(mdLayout)
|
||||
&& entry.getString("interpretation").equals(mdInterpretation) && transactions.containsKey(entry.getString("mdId"))) {
|
||||
res.put(entry.getString("mdId"), transactions.get(entry.getString("mdId")));
|
||||
}
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
private MongoDatabase getDb(final MongoClient client, final String dbName) {
|
||||
if (!Iterables.contains(client.listDatabaseNames(), dbName)) {
|
||||
final String err = String.format("Database '%s' not found in %s", dbName, client.getAddress());
|
||||
log.warn(err);
|
||||
throw new RuntimeException(err);
|
||||
}
|
||||
return client.getDatabase(dbName);
|
||||
}
|
||||
|
||||
private MongoCollection<Document> getColl(final MongoDatabase db, final String collName) {
|
||||
if (!Iterables.contains(db.listCollectionNames(), collName)) {
|
||||
final String err = String.format(String.format("Missing collection '%s' in database '%s'", collName, db.getName()));
|
||||
log.warn(err);
|
||||
throw new RuntimeException(err);
|
||||
}
|
||||
return db.getCollection(collName);
|
||||
}
|
||||
|
||||
public Iterable<String> listRecords(final String coll) {
|
||||
return () -> StreamSupport.stream(getColl(db, coll).find().spliterator(), false)
|
||||
.filter(e -> e.containsKey("body"))
|
||||
.map(e -> e.getString("body"))
|
||||
.iterator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
client.close();
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,390 @@
|
|||
package eu.dnetlib.dhp.migration;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.sql.ResultSet;
|
||||
import java.util.Arrays;
|
||||
import java.util.function.Consumer;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.schema.oaf.DataInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Datasource;
|
||||
import eu.dnetlib.dhp.schema.oaf.Organization;
|
||||
import eu.dnetlib.dhp.schema.oaf.Project;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
|
||||
public class MigrateDbEntitiesApplication extends AbstractMigrateApplication implements Closeable {
|
||||
|
||||
private static final Log log = LogFactory.getLog(MigrateDbEntitiesApplication.class);
|
||||
|
||||
private final DbClient dbClient;
|
||||
|
||||
public static void main(final String[] args) throws Exception {
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||
IOUtils.toString(MigrateDbEntitiesApplication.class.getResourceAsStream("/eu/dnetlib/dhp/migration/migrate_db_entities_parameters.json")));
|
||||
|
||||
parser.parseArgument(args);
|
||||
|
||||
final String dbUrl = parser.get("postgresUrl");
|
||||
final String dbUser = parser.get("postgresUser");
|
||||
final String dbPassword = parser.get("postgresPassword");
|
||||
|
||||
final String hdfsPath = parser.get("hdfsPath");
|
||||
final String hdfsNameNode = parser.get("namenode");
|
||||
final String hdfsUser = parser.get("hdfsUser");
|
||||
|
||||
try (final MigrateDbEntitiesApplication smdbe = new MigrateDbEntitiesApplication(hdfsPath, hdfsNameNode, hdfsUser, dbUrl, dbUser, dbPassword)) {
|
||||
smdbe.execute("queryDatasources.sql", smdbe::processDatasource);
|
||||
smdbe.execute("queryProjects.sql", smdbe::processProject);
|
||||
smdbe.execute("queryOrganizations.sql", smdbe::processOrganization);
|
||||
smdbe.execute("queryDatasourceOrganization.sql", smdbe::processDatasourceOrganization);
|
||||
smdbe.execute("queryProjectOrganization.sql", smdbe::processProjectOrganization);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public MigrateDbEntitiesApplication(final String hdfsPath, final String hdfsNameNode, final String hdfsUser, final String dbUrl, final String dbUser,
|
||||
final String dbPassword) throws Exception {
|
||||
super(hdfsPath, hdfsNameNode, hdfsUser);
|
||||
this.dbClient = new DbClient(dbUrl, dbUser, dbPassword);
|
||||
}
|
||||
|
||||
public void execute(final String sqlFile, final Consumer<ResultSet> consumer) throws Exception {
|
||||
final String sql = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/migration/sql/" + sqlFile));
|
||||
dbClient.processResults(sql, consumer);
|
||||
}
|
||||
|
||||
public void processDatasource(final ResultSet rs) {
|
||||
try {
|
||||
|
||||
final DataInfo info = MigrationUtils.dataInfo(null, null, null, null, null, null); // TODO
|
||||
|
||||
final Datasource ds = new Datasource();
|
||||
|
||||
ds.setId(MigrationUtils.createOpenaireId("10", rs.getString("datasourceid")));
|
||||
ds.setOriginalId(Arrays.asList(rs.getString("datasourceid")));
|
||||
ds.setCollectedfrom(MigrationUtils.listKeyValues(rs.getString("collectedfromid"), rs.getString("collectedfromname")));
|
||||
ds.setPid(null); // List<StructuredProperty> // TODO
|
||||
ds.setDateofcollection(rs.getDate("dateofcollection").toString());
|
||||
ds.setDateoftransformation(null); // TODO
|
||||
ds.setExtraInfo(null); // TODO
|
||||
ds.setOaiprovenance(null); // TODO
|
||||
|
||||
ds.setDatasourcetype(null); // Qualifier datasourcetype) {
|
||||
ds.setOpenairecompatibility(null); // Qualifier openairecompatibility) {
|
||||
ds.setOfficialname(MigrationUtils.field(rs.getString("officialname"), info));
|
||||
ds.setEnglishname(MigrationUtils.field(rs.getString("englishname"), info));
|
||||
ds.setWebsiteurl(MigrationUtils.field(rs.getString("websiteurl"), info));
|
||||
ds.setLogourl(MigrationUtils.field(rs.getString("logourl"), info));
|
||||
ds.setContactemail(MigrationUtils.field(rs.getString("contactemail"), info));
|
||||
ds.setNamespaceprefix(MigrationUtils.field(rs.getString("namespaceprefix"), info));
|
||||
ds.setLatitude(MigrationUtils.field(Double.toString(rs.getDouble("latitude")), info));
|
||||
ds.setLongitude(MigrationUtils.field(Double.toString(rs.getDouble("longitude")), info));
|
||||
ds.setDateofvalidation(MigrationUtils.field(rs.getDate("dateofvalidation").toString(), info));
|
||||
ds.setDescription(MigrationUtils.field(rs.getString("description"), info));
|
||||
ds.setSubjects(null); // List<StructuredProperty> subjects) {
|
||||
ds.setOdnumberofitems(MigrationUtils.field(Double.toString(rs.getInt("odnumberofitems")), info));
|
||||
ds.setOdnumberofitemsdate(MigrationUtils.field(rs.getDate("odnumberofitemsdate").toString(), info));
|
||||
ds.setOdpolicies(MigrationUtils.field(rs.getString("odpolicies"), info));
|
||||
ds.setOdlanguages(MigrationUtils.listFields(info, rs.getArray("odlanguages")));
|
||||
ds.setOdcontenttypes(MigrationUtils.listFields(info, rs.getArray("odcontenttypes")));
|
||||
ds.setAccessinfopackage(MigrationUtils.listFields(info, rs.getArray("accessinfopackage")));
|
||||
ds.setReleasestartdate(MigrationUtils.field(rs.getDate("releasestartdate").toString(), info));
|
||||
ds.setReleaseenddate(MigrationUtils.field(rs.getDate("releaseenddate").toString(), info));
|
||||
ds.setMissionstatementurl(MigrationUtils.field(rs.getString("missionstatementurl"), info));
|
||||
ds.setDataprovider(MigrationUtils.field(rs.getBoolean("dataprovider"), info));
|
||||
ds.setServiceprovider(MigrationUtils.field(rs.getBoolean("serviceprovider"), info));
|
||||
ds.setDatabaseaccesstype(MigrationUtils.field(rs.getString("databaseaccesstype"), info));
|
||||
ds.setDatauploadtype(MigrationUtils.field(rs.getString("datauploadtype"), info));
|
||||
ds.setDatabaseaccessrestriction(MigrationUtils.field(rs.getString("databaseaccessrestriction"), info));
|
||||
ds.setDatauploadrestriction(MigrationUtils.field(rs.getString("datauploadrestriction"), info));
|
||||
ds.setVersioning(MigrationUtils.field(rs.getBoolean("versioning"), info));
|
||||
ds.setCitationguidelineurl(MigrationUtils.field(rs.getString("citationguidelineurl"), info));
|
||||
ds.setQualitymanagementkind(MigrationUtils.field(rs.getString("qualitymanagementkind"), info));
|
||||
ds.setPidsystems(MigrationUtils.field(rs.getString("pidsystems"), info));
|
||||
ds.setCertificates(MigrationUtils.field(rs.getString("certificates"), info));
|
||||
ds.setPolicies(null); // List<KeyValue> // TODO
|
||||
ds.setJournal(null); // Journal // TODO
|
||||
|
||||
// rs.getString("datasourceid");
|
||||
rs.getArray("identities");
|
||||
// rs.getString("officialname");
|
||||
// rs.getString("englishname");
|
||||
// rs.getString("contactemail");
|
||||
rs.getString("openairecompatibility"); // COMPLEX ...@@@...
|
||||
// rs.getString("websiteurl");
|
||||
// rs.getString("logourl");
|
||||
// rs.getArray("accessinfopackage");
|
||||
// rs.getDouble("latitude");
|
||||
// rs.getDouble("longitude");
|
||||
// rs.getString("namespaceprefix");
|
||||
// rs.getInt("odnumberofitems"); // NULL
|
||||
// rs.getDate("odnumberofitemsdate"); // NULL
|
||||
rs.getArray("subjects");
|
||||
// rs.getString("description");
|
||||
// rs.getString("odpolicies"); // NULL
|
||||
// rs.getArray("odlanguages");
|
||||
// rs.getArray("odcontenttypes");
|
||||
rs.getBoolean("inferred"); // false
|
||||
rs.getBoolean("deletedbyinference");// false
|
||||
rs.getDouble("trust"); // 0.9
|
||||
rs.getString("inferenceprovenance"); // NULL
|
||||
// rs.getDate("dateofcollection");
|
||||
// rs.getDate("dateofvalidation");
|
||||
// rs.getDate("releasestartdate");
|
||||
// rs.getDate("releaseenddate");
|
||||
// rs.getString("missionstatementurl");
|
||||
// rs.getBoolean("dataprovider");
|
||||
// rs.getBoolean("serviceprovider");
|
||||
// rs.getString("databaseaccesstype");
|
||||
// rs.getString("datauploadtype");
|
||||
// rs.getString("databaseaccessrestriction");
|
||||
// rs.getString("datauploadrestriction");
|
||||
// rs.getBoolean("versioning");
|
||||
// rs.getString("citationguidelineurl");
|
||||
// rs.getString("qualitymanagementkind");
|
||||
// rs.getString("pidsystems");
|
||||
// rs.getString("certificates");
|
||||
rs.getArray("policies");
|
||||
// rs.getString("collectedfromid");
|
||||
// rs.getString("collectedfromname");
|
||||
rs.getString("datasourcetype"); // COMPLEX XXX@@@@....
|
||||
rs.getString("provenanceaction"); // 'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions'
|
||||
// AS provenanceaction,
|
||||
rs.getString("journal"); // CONCAT(d.issn, '@@@', d.eissn, '@@@', d.lissn) AS journal
|
||||
|
||||
emitOaf(ds);
|
||||
} catch (final Exception e) {
|
||||
// TODO: handle exception
|
||||
}
|
||||
}
|
||||
|
||||
public void processProject(final ResultSet rs) {
|
||||
try {
|
||||
|
||||
final DataInfo info = MigrationUtils.dataInfo(null, null, null, null, null, null); // TODO
|
||||
|
||||
final Project p = new Project();
|
||||
|
||||
p.setId(MigrationUtils.createOpenaireId("40", rs.getString("projectid")));
|
||||
p.setOriginalId(Arrays.asList(rs.getString("projectid")));
|
||||
p.setCollectedfrom(MigrationUtils.listKeyValues(rs.getString("collectedfromid"), rs.getString("collectedfromname")));
|
||||
p.setPid(null); // List<StructuredProperty> // TODO
|
||||
|
||||
p.setDateofcollection(rs.getDate("dateofcollection").toString());
|
||||
p.setDateoftransformation(rs.getDate("dateoftransformation").toString());
|
||||
p.setExtraInfo(null); // List<ExtraInfo> //TODO
|
||||
p.setOaiprovenance(null); // OAIProvenance /TODO
|
||||
|
||||
p.setWebsiteurl(MigrationUtils.field(rs.getString("websiteurl"), info));
|
||||
p.setCode(MigrationUtils.field(rs.getString("code"), info));
|
||||
p.setAcronym(MigrationUtils.field(rs.getString("acronym"), info));
|
||||
p.setTitle(MigrationUtils.field(rs.getString("title"), info));
|
||||
p.setStartdate(MigrationUtils.field(rs.getDate("startdate").toString(), info));
|
||||
p.setEnddate(MigrationUtils.field(rs.getDate("enddate").toString(), info));
|
||||
p.setCallidentifier(MigrationUtils.field(rs.getString("callidentifier"), info));
|
||||
p.setKeywords(MigrationUtils.field(rs.getString("keywords"), info));
|
||||
p.setDuration(MigrationUtils.field(Integer.toString(rs.getInt("duration")), info));
|
||||
p.setEcsc39(MigrationUtils.field(Boolean.toString(rs.getBoolean("ecsc39")), info));
|
||||
p.setOamandatepublications(MigrationUtils.field(Boolean.toString(rs.getBoolean("oamandatepublications")), info));
|
||||
p.setEcarticle29_3(MigrationUtils.field(Boolean.toString(rs.getBoolean("ecarticle29_3")), info));
|
||||
p.setSubjects(null); // List<StructuredProperty> //TODO
|
||||
p.setFundingtree(null); // List<Field<String>> //TODO
|
||||
p.setContracttype(null); // Qualifier //TODO
|
||||
p.setOptional1(MigrationUtils.field(rs.getString("optional1"), info));
|
||||
p.setOptional2(MigrationUtils.field(rs.getString("optional2"), info));
|
||||
p.setJsonextrainfo(MigrationUtils.field(rs.getString("jsonextrainfo"), info));
|
||||
p.setContactfullname(MigrationUtils.field(rs.getString("contactfullname"), info));
|
||||
p.setContactfax(MigrationUtils.field(rs.getString("contactfax"), info));
|
||||
p.setContactphone(MigrationUtils.field(rs.getString("contactphone"), info));
|
||||
p.setContactemail(MigrationUtils.field(rs.getString("contactemail"), info));
|
||||
p.setSummary(MigrationUtils.field(rs.getString("summary"), info));
|
||||
p.setCurrency(MigrationUtils.field(rs.getString("currency"), info));
|
||||
p.setTotalcost(new Float(rs.getDouble("totalcost")));
|
||||
p.setFundedamount(new Float(rs.getDouble("fundedamount")));
|
||||
|
||||
// rs.getString("projectid");
|
||||
// rs.getString("code");
|
||||
// rs.getString("websiteurl");
|
||||
// rs.getString("acronym");
|
||||
// rs.getString("title");
|
||||
// rs.getDate("startdate");
|
||||
// rs.getDate("enddate");
|
||||
// rs.getString("callidentifier");
|
||||
// rs.getString("keywords");
|
||||
// rs.getInt("duration");
|
||||
// rs.getBoolean("ecsc39");
|
||||
// rs.getBoolean("oamandatepublications");
|
||||
// rs.getBoolean("ecarticle29_3");
|
||||
// rs.getDate("dateofcollection");
|
||||
// rs.getDate("dateoftransformation");
|
||||
rs.getBoolean("inferred");
|
||||
rs.getBoolean("deletedbyinference");
|
||||
rs.getDouble("trust");
|
||||
rs.getString("inferenceprovenance");
|
||||
// rs.getString("optional1");
|
||||
// rs.getString("optional2");
|
||||
rs.getString("jsonextrainfo");
|
||||
// rs.getString("contactfullname");
|
||||
// rs.getString("contactfax");
|
||||
// rs.getString("contactphone");
|
||||
// rs.getString("contactemail");
|
||||
// rs.getString("summary");
|
||||
// rs.getString("currency");
|
||||
// rs.getDouble("totalcost");
|
||||
// rs.getDouble("fundedamount");
|
||||
// rs.getString("collectedfromid");
|
||||
// rs.getString("collectedfromname");
|
||||
rs.getString("contracttype"); // COMPLEX
|
||||
rs.getString("provenanceaction"); // COMPLEX
|
||||
rs.getArray("pid");
|
||||
rs.getArray("subjects");
|
||||
rs.getArray("fundingtree");
|
||||
|
||||
emitOaf(p);
|
||||
|
||||
} catch (final Exception e) {
|
||||
// TODO: handle exception
|
||||
}
|
||||
}
|
||||
|
||||
public void processOrganization(final ResultSet rs) {
|
||||
try {
|
||||
|
||||
final DataInfo info = MigrationUtils.dataInfo(null, null, null, null, null, null); // TODO
|
||||
|
||||
final Organization o = new Organization();
|
||||
|
||||
o.setId(MigrationUtils.createOpenaireId("20", rs.getString("organizationid"))); // String id) {
|
||||
o.setOriginalId(Arrays.asList(rs.getString("organizationid")));
|
||||
o.setCollectedfrom(MigrationUtils.listKeyValues(rs.getString("collectedfromid"), rs.getString("collectedfromname")));
|
||||
o.setPid(null); // List<StructuredProperty> // TODO
|
||||
o.setDateofcollection(rs.getDate("dateofcollection").toString());
|
||||
o.setDateoftransformation(rs.getDate("dateoftransformation").toString());
|
||||
o.setExtraInfo(null); // List<ExtraInfo> // TODO
|
||||
o.setOaiprovenance(null); // OAIProvenance // TODO
|
||||
o.setLegalshortname(MigrationUtils.field("legalshortname", info));
|
||||
o.setLegalname(MigrationUtils.field("legalname", info));
|
||||
o.setAlternativeNames(null); // List<Field<String>> //TODO
|
||||
o.setWebsiteurl(MigrationUtils.field("websiteurl", info));
|
||||
o.setLogourl(MigrationUtils.field("logourl", info));
|
||||
o.setEclegalbody(MigrationUtils.field(Boolean.toString(rs.getBoolean("eclegalbody")), info));
|
||||
o.setEclegalperson(MigrationUtils.field(Boolean.toString(rs.getBoolean("eclegalperson")), info));
|
||||
o.setEcnonprofit(MigrationUtils.field(Boolean.toString(rs.getBoolean("ecnonprofit")), info));
|
||||
o.setEcresearchorganization(MigrationUtils.field(Boolean.toString(rs.getBoolean("ecresearchorganization")), info));
|
||||
o.setEchighereducation(MigrationUtils.field(Boolean.toString(rs.getBoolean("echighereducation")), info));
|
||||
o.setEcinternationalorganizationeurinterests(MigrationUtils
|
||||
.field(Boolean.toString(rs.getBoolean("ecinternationalorganizationeurinterests")), info));
|
||||
o.setEcinternationalorganization(MigrationUtils.field(Boolean.toString(rs.getBoolean("ecinternationalorganization")), info));
|
||||
o.setEcenterprise(MigrationUtils.field(Boolean.toString(rs.getBoolean("ecenterprise")), info));
|
||||
o.setEcsmevalidated(MigrationUtils.field(Boolean.toString(rs.getBoolean("ecsmevalidated")), info));
|
||||
o.setEcnutscode(MigrationUtils.field(Boolean.toString(rs.getBoolean("ecnutscode")), info));
|
||||
o.setCountry(null); // Qualifier country) {
|
||||
|
||||
// rs.getString("organizationid");
|
||||
// rs.getString("legalshortname");
|
||||
// rs.getString("legalname");
|
||||
// rs.getString("websiteurl");
|
||||
// rs.getString("logourl");
|
||||
// rs.getBoolean("eclegalbody");
|
||||
// rs.getBoolean("eclegalperson");
|
||||
// rs.getBoolean("ecnonprofit");
|
||||
// rs.getBoolean("ecresearchorganization");
|
||||
// rs.getBoolean("echighereducation");
|
||||
// rs.getBoolean("ecinternationalorganizationeurinterests");
|
||||
// rs.getBoolean("ecinternationalorganization");
|
||||
// rs.getBoolean("ecenterprise");
|
||||
// rs.getBoolean("ecsmevalidated");
|
||||
// rs.getBoolean("ecnutscode");
|
||||
rs.getDate("dateofcollection");
|
||||
rs.getDate("dateoftransformation");
|
||||
rs.getBoolean("inferred");
|
||||
rs.getBoolean("deletedbyinference");
|
||||
rs.getDouble("trust");
|
||||
rs.getString("inferenceprovenance");
|
||||
// rs.getString("collectedfromid");
|
||||
// rs.getString("collectedfromname");
|
||||
rs.getString("country");
|
||||
rs.getString("provenanceaction");
|
||||
rs.getArray("pid");
|
||||
|
||||
emitOaf(o);
|
||||
} catch (final Exception e) {
|
||||
// TODO: handle exception
|
||||
}
|
||||
}
|
||||
|
||||
public void processDatasourceOrganization(final ResultSet rs) {
|
||||
|
||||
try {
|
||||
final Relation r = new Relation();
|
||||
|
||||
r.setRelType(null); // TODO
|
||||
r.setSubRelType(null); // TODO
|
||||
r.setRelClass(null); // TODO
|
||||
r.setSource(null); // TODO
|
||||
r.setTarget(null); // TODO
|
||||
r.setCollectedFrom(MigrationUtils.listKeyValues("", ""));
|
||||
|
||||
rs.getString("datasource");
|
||||
rs.getString("organization");
|
||||
rs.getDate("startdate"); // NULL
|
||||
rs.getDate("enddate"); // NULL
|
||||
rs.getBoolean("inferred"); // false
|
||||
rs.getBoolean("deletedbyinference"); // false
|
||||
rs.getDouble("trust"); // 0.9
|
||||
rs.getString("inferenceprovenance"); // NULL
|
||||
rs.getString("semantics"); // 'providedBy@@@provided
|
||||
// by@@@dnet:datasources_organizations_typologies@@@dnet:datasources_organizations_typologies' AS
|
||||
// semantics,
|
||||
rs.getString("provenanceaction"); // d.provenanceaction || '@@@' || d.provenanceaction ||
|
||||
// '@@@dnet:provenanceActions@@@dnet:provenanceActions' AS provenanceaction
|
||||
|
||||
emitOaf(r);
|
||||
} catch (final Exception e) {
|
||||
// TODO: handle exception
|
||||
}
|
||||
}
|
||||
|
||||
public void processProjectOrganization(final ResultSet rs) {
|
||||
try {
|
||||
final Relation r = new Relation();
|
||||
|
||||
r.setRelType(null); // TODO
|
||||
r.setSubRelType(null); // TODO
|
||||
r.setRelClass(null); // TODO
|
||||
r.setSource(null); // TODO
|
||||
r.setTarget(null); // TODO
|
||||
r.setCollectedFrom(null);
|
||||
|
||||
rs.getString("project");
|
||||
rs.getString("resporganization");
|
||||
rs.getInt("participantnumber");
|
||||
rs.getDouble("contribution");
|
||||
rs.getDate("startdate");// null
|
||||
rs.getDate("enddate");// null
|
||||
rs.getBoolean("inferred");// false
|
||||
rs.getBoolean("deletedbyinference"); // false
|
||||
rs.getDouble("trust");
|
||||
rs.getString("inferenceprovenance"); // NULL
|
||||
rs.getString("semantics"); // po.semanticclass || '@@@' || po.semanticclass ||
|
||||
// '@@@dnet:project_organization_relations@@@dnet:project_organization_relations' AS semantics,
|
||||
rs.getString("provenanceaction"); // 'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions'
|
||||
// AS provenanceaction
|
||||
emitOaf(r);
|
||||
} catch (final Exception e) {
|
||||
// TODO: handle exception
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
super.close();
|
||||
dbClient.close();
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,190 @@
|
|||
package eu.dnetlib.dhp.migration;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map.Entry;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.dom4j.Document;
|
||||
import org.dom4j.DocumentException;
|
||||
import org.dom4j.Node;
|
||||
import org.dom4j.io.SAXReader;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
||||
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
||||
import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
|
||||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
import eu.dnetlib.dhp.schema.oaf.Software;
|
||||
|
||||
public class MigrateMongoMdstoresApplication extends AbstractMigrateApplication implements Closeable {
|
||||
|
||||
private static final Log log = LogFactory.getLog(MigrateMongoMdstoresApplication.class);
|
||||
|
||||
private final MdstoreClient mdstoreClient;
|
||||
|
||||
public static void main(final String[] args) throws Exception {
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||
IOUtils.toString(MigrateMongoMdstoresApplication.class.getResourceAsStream("/eu/dnetlib/dhp/migration/migrate_mongo_mstores_parameters.json")));
|
||||
parser.parseArgument(args);
|
||||
|
||||
final String mongoBaseUrl = parser.get("mongoBaseUrl");
|
||||
final String mongoDb = parser.get("mongoDb");
|
||||
|
||||
final String mdFormat = parser.get("mdFormat");
|
||||
final String mdLayout = parser.get("mdLayout");
|
||||
final String mdInterpretation = parser.get("mdInterpretation");
|
||||
|
||||
final String hdfsPath = parser.get("hdfsPath");
|
||||
final String hdfsNameNode = parser.get("namenode");
|
||||
final String hdfsUser = parser.get("hdfsUser");
|
||||
|
||||
try (final MigrateMongoMdstoresApplication mig = new MigrateMongoMdstoresApplication(hdfsPath, hdfsNameNode, hdfsUser, mongoBaseUrl, mongoDb)) {
|
||||
mig.processMdRecords(mdFormat, mdLayout, mdInterpretation);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public MigrateMongoMdstoresApplication(final String hdfsPath, final String hdfsNameNode, final String hdfsUser, final String mongoBaseUrl,
|
||||
final String mongoDb) throws Exception {
|
||||
super(hdfsPath, hdfsNameNode, hdfsUser);
|
||||
this.mdstoreClient = new MdstoreClient(mongoBaseUrl, mongoDb);
|
||||
|
||||
}
|
||||
|
||||
public void processMdRecords(final String mdFormat, final String mdLayout, final String mdInterpretation) throws DocumentException {
|
||||
|
||||
for (final Entry<String, String> entry : mdstoreClient.validCollections(mdFormat, mdLayout, mdInterpretation).entrySet()) {
|
||||
// final String mdId = entry.getKey();
|
||||
final String currentColl = entry.getValue();
|
||||
|
||||
for (final String xml : mdstoreClient.listRecords(currentColl)) {
|
||||
for (final Oaf oaf : createOafs(xml)) {
|
||||
emitOaf(oaf);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private List<Oaf> createOafs(final String xml) throws DocumentException {
|
||||
final SAXReader reader = new SAXReader();
|
||||
final Document doc = reader.read(new StringReader(xml));
|
||||
|
||||
final String type = doc.valueOf(""); // TODO
|
||||
|
||||
final List<Oaf> oafs = new ArrayList<>();
|
||||
|
||||
switch (type.toLowerCase()) {
|
||||
case "publication":
|
||||
final Publication p = new Publication();
|
||||
populateResultFields(p, doc);
|
||||
p.setJournal(null); // TODO
|
||||
oafs.add(p);
|
||||
break;
|
||||
case "dataset":
|
||||
final Dataset d = new Dataset();
|
||||
populateResultFields(d, doc);
|
||||
d.setStoragedate(null); // TODO
|
||||
d.setDevice(null); // TODO
|
||||
d.setSize(null); // TODO
|
||||
d.setVersion(null); // TODO
|
||||
d.setLastmetadataupdate(null); // TODO
|
||||
d.setMetadataversionnumber(null); // TODO
|
||||
d.setGeolocation(null); // TODO
|
||||
oafs.add(d);
|
||||
break;
|
||||
case "otherresearchproducts":
|
||||
final OtherResearchProduct o = new OtherResearchProduct();
|
||||
populateResultFields(o, doc);
|
||||
o.setContactperson(null); // TODO
|
||||
o.setContactgroup(null); // TODO
|
||||
o.setTool(null); // TODO
|
||||
oafs.add(o);
|
||||
break;
|
||||
case "software":
|
||||
final Software s = new Software();
|
||||
populateResultFields(s, doc);
|
||||
s.setDocumentationUrl(null); // TODO
|
||||
s.setLicense(null); // TODO
|
||||
s.setCodeRepositoryUrl(null); // TODO
|
||||
s.setProgrammingLanguage(null); // TODO
|
||||
oafs.add(s);
|
||||
break;
|
||||
default:
|
||||
log.error("Inavlid type: " + type);
|
||||
break;
|
||||
}
|
||||
|
||||
if (!oafs.isEmpty()) {
|
||||
addRelations(oafs, doc, "//*", "TYPE");
|
||||
addRelations(oafs, doc, "//*", "TYPE");
|
||||
addRelations(oafs, doc, "//*", "TYPE");
|
||||
}
|
||||
|
||||
return oafs;
|
||||
}
|
||||
|
||||
private void addRelations(final List<Oaf> oafs, final Document doc, final String xpath, final String type) {
|
||||
for (final Object o : doc.selectNodes(xpath)) {
|
||||
final Node n = (Node) o;
|
||||
final Relation r = new Relation();
|
||||
r.setRelType(null); // TODO
|
||||
r.setSubRelType(null); // TODO
|
||||
r.setRelClass(null); // TODO
|
||||
r.setSource(null); // TODO
|
||||
r.setTarget(null); // TODO
|
||||
r.setCollectedFrom(null); // TODO
|
||||
oafs.add(r);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private void populateResultFields(final Result r, final Document doc) {
|
||||
r.setDataInfo(null); // TODO
|
||||
r.setLastupdatetimestamp(null); // TODO
|
||||
r.setId(null); // TODO
|
||||
r.setOriginalId(null); // TODO
|
||||
r.setCollectedfrom(null); // TODO
|
||||
r.setPid(null); // TODO
|
||||
r.setDateofcollection(null); // TODO
|
||||
r.setDateoftransformation(null); // TODO
|
||||
r.setExtraInfo(null); // TODO
|
||||
r.setOaiprovenance(null); // TODO
|
||||
r.setAuthor(null); // TODO
|
||||
r.setResulttype(null); // TODO
|
||||
r.setLanguage(null); // TODO
|
||||
r.setCountry(null); // TODO
|
||||
r.setSubject(null); // TODO
|
||||
r.setTitle(null); // TODO
|
||||
r.setRelevantdate(null); // TODO
|
||||
r.setDescription(null); // TODO
|
||||
r.setDateofacceptance(null); // TODO
|
||||
r.setPublisher(null); // TODO
|
||||
r.setEmbargoenddate(null); // TODO
|
||||
r.setSource(null); // TODO
|
||||
r.setFulltext(null); // TODO
|
||||
r.setFormat(null); // TODO
|
||||
r.setContributor(null); // TODO
|
||||
r.setResourcetype(null); // TODO
|
||||
r.setCoverage(null); // TODO
|
||||
r.setRefereed(null); // TODO
|
||||
r.setContext(null); // TODO
|
||||
r.setExternalReference(null); // TODO
|
||||
r.setInstance(null); // TODO
|
||||
r.setProcessingchargeamount(null); // TODO
|
||||
r.setProcessingchargecurrency(null); // TODO
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
super.close();
|
||||
mdstoreClient.close();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,164 @@
|
|||
package eu.dnetlib.dhp.migration;
|
||||
|
||||
import java.sql.Array;
|
||||
import java.sql.SQLException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import eu.dnetlib.dhp.schema.oaf.DataInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.ExtraInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Field;
|
||||
import eu.dnetlib.dhp.schema.oaf.Journal;
|
||||
import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
||||
import eu.dnetlib.dhp.schema.oaf.OAIProvenance;
|
||||
import eu.dnetlib.dhp.schema.oaf.OriginDescription;
|
||||
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||
|
||||
public class MigrationUtils {
|
||||
|
||||
public static KeyValue keyValue(final String k, final String v) {
|
||||
final KeyValue kv = new KeyValue();
|
||||
kv.setKey(k);
|
||||
kv.setValue(v);
|
||||
return kv;
|
||||
}
|
||||
|
||||
public static List<KeyValue> listKeyValues(final String... s) {
|
||||
if (s.length % 2 > 0) { throw new RuntimeException("Invalid number of parameters (k,v,k,v,....)"); }
|
||||
|
||||
final List<KeyValue> list = new ArrayList<>();
|
||||
for (int i = 0; i < s.length; i += 2) {
|
||||
list.add(keyValue(s[i], s[i + 1]));
|
||||
}
|
||||
return list;
|
||||
}
|
||||
|
||||
public static <T> Field<T> field(final T value, final DataInfo info) {
|
||||
final Field<T> field = new Field<>();
|
||||
field.setValue(value);
|
||||
field.setDataInfo(info);
|
||||
return field;
|
||||
}
|
||||
|
||||
public static List<Field<String>> listFields(final DataInfo info, final String... values) {
|
||||
return Arrays.stream(values).map(v -> field(v, info)).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
public static List<Field<String>> listFields(final DataInfo info, final Array array) {
|
||||
try {
|
||||
return listFields(info, (String[]) array.getArray());
|
||||
} catch (final SQLException e) {
|
||||
throw new RuntimeException("Invalid SQL array", e);
|
||||
}
|
||||
}
|
||||
|
||||
public static Qualifier qualifier(final String classid, final String classname, final String schemeid, final String schemename) {
|
||||
final Qualifier q = new Qualifier();
|
||||
q.setClassid(classid);
|
||||
q.setClassname(classname);
|
||||
q.setSchemeid(schemeid);
|
||||
q.setSchemename(schemename);
|
||||
return q;
|
||||
}
|
||||
|
||||
public static StructuredProperty structuredProperty(final String value,
|
||||
final String classid,
|
||||
final String classname,
|
||||
final String schemeid,
|
||||
final String schemename,
|
||||
final DataInfo dataInfo) {
|
||||
final StructuredProperty sp = new StructuredProperty();
|
||||
sp.setValue(value);
|
||||
sp.setQualifier(qualifier(classid, classname, schemeid, schemename));
|
||||
sp.setDataInfo(dataInfo);
|
||||
return sp;
|
||||
}
|
||||
|
||||
public static ExtraInfo extraInfo(final String name, final String value, final String typology, final String provenance, final String trust) {
|
||||
final ExtraInfo info = new ExtraInfo();
|
||||
info.setName(name);
|
||||
info.setValue(value);
|
||||
info.setTypology(typology);
|
||||
info.setProvenance(provenance);
|
||||
info.setTrust(trust);
|
||||
return info;
|
||||
}
|
||||
|
||||
public static OAIProvenance oaiIProvenance(final String identifier,
|
||||
final String baseURL,
|
||||
final String metadataNamespace,
|
||||
final Boolean altered,
|
||||
final String datestamp,
|
||||
final String harvestDate) {
|
||||
|
||||
final OriginDescription desc = new OriginDescription();
|
||||
desc.setIdentifier(identifier);
|
||||
desc.setBaseURL(baseURL);
|
||||
desc.setMetadataNamespace(metadataNamespace);
|
||||
desc.setAltered(altered);
|
||||
desc.setDatestamp(datestamp);
|
||||
desc.setHarvestDate(harvestDate);
|
||||
|
||||
final OAIProvenance p = new OAIProvenance();
|
||||
p.setOriginDescription(desc);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
public static Journal journal(final String name,
|
||||
final String issnPrinted,
|
||||
final String issnOnline,
|
||||
final String issnLinking,
|
||||
final String ep,
|
||||
final String iss,
|
||||
final String sp,
|
||||
final String vol,
|
||||
final String edition,
|
||||
final String conferenceplace,
|
||||
final String conferencedate,
|
||||
final DataInfo dataInfo) {
|
||||
final Journal j = new Journal();
|
||||
j.setName(name);
|
||||
j.setIssnPrinted(issnPrinted);
|
||||
j.setIssnOnline(issnOnline);
|
||||
j.setIssnLinking(issnLinking);
|
||||
j.setEp(ep);
|
||||
j.setIss(iss);
|
||||
j.setSp(sp);
|
||||
j.setVol(vol);
|
||||
j.setEdition(edition);
|
||||
j.setConferenceplace(conferenceplace);
|
||||
j.setConferencedate(conferencedate);
|
||||
j.setDataInfo(dataInfo);
|
||||
return j;
|
||||
}
|
||||
|
||||
public static DataInfo dataInfo(final Boolean deletedbyinference,
|
||||
final String inferenceprovenance,
|
||||
final Boolean inferred,
|
||||
final Boolean invisible,
|
||||
final Qualifier provenanceaction,
|
||||
final String trust) {
|
||||
final DataInfo d = new DataInfo();
|
||||
d.setDeletedbyinference(deletedbyinference);
|
||||
d.setInferenceprovenance(inferenceprovenance);
|
||||
d.setInferred(inferred);
|
||||
d.setInvisible(invisible);
|
||||
d.setProvenanceaction(provenanceaction);
|
||||
d.setTrust(trust);
|
||||
return d;
|
||||
}
|
||||
|
||||
public static String createOpenaireId(final String prefix, final String originalId) {
|
||||
final String nsPrefix = StringUtils.substringBefore(originalId, "::");
|
||||
final String rest = StringUtils.substringAfter(originalId, "::");
|
||||
return String.format("%s|%s::%s", prefix, nsPrefix, DHPUtils.md5(rest));
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,38 @@
|
|||
[
|
||||
{
|
||||
"paramName": "p",
|
||||
"paramLongName": "hdfsPath",
|
||||
"paramDescription": "the path where storing the sequential file",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "n",
|
||||
"paramLongName": "namenode",
|
||||
"paramDescription": "the Name Node URI",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "u",
|
||||
"paramLongName": "hdfsUser",
|
||||
"paramDescription": "the user wich create the hdfs seq file",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "dburl",
|
||||
"paramLongName": "postgresUrl",
|
||||
"paramDescription": "postgres url, example: jdbc:postgresql://localhost:5432/testdb",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "dbuser",
|
||||
"paramLongName": "postgresUser",
|
||||
"paramDescription": "postgres user",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "dbpasswd",
|
||||
"paramLongName": "postgresPassword",
|
||||
"paramDescription": "postgres password",
|
||||
"paramRequired": true
|
||||
}
|
||||
]
|
|
@ -0,0 +1,50 @@
|
|||
[
|
||||
{
|
||||
"paramName": "p",
|
||||
"paramLongName": "hdfsPath",
|
||||
"paramDescription": "the path where storing the sequential file",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "n",
|
||||
"paramLongName": "namenode",
|
||||
"paramDescription": "the Name Node URI",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "u",
|
||||
"paramLongName": "hdfsUser",
|
||||
"paramDescription": "the user wich create the hdfs seq file",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "mongourl",
|
||||
"paramLongName": "mongoBaseUrl",
|
||||
"paramDescription": "mongoDB url, example: mongodb://[username:password@]host[:port]",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "db",
|
||||
"paramLongName": "mongoDb",
|
||||
"paramDescription": "mongo database",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "f",
|
||||
"paramLongName": "mdFormat",
|
||||
"paramDescription": "metadata format",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "l",
|
||||
"paramLongName": "mdLayout",
|
||||
"paramDescription": "metadata layout",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "i",
|
||||
"paramLongName": "mdInterpretation",
|
||||
"paramDescription": "metadata interpretation",
|
||||
"paramRequired": true
|
||||
}
|
||||
]
|
|
@ -0,0 +1,16 @@
|
|||
SELECT
|
||||
dor.datasource AS datasource,
|
||||
dor.organization AS organization,
|
||||
NULL AS startdate,
|
||||
NULL AS enddate,
|
||||
false AS inferred,
|
||||
false AS deletedbyinference,
|
||||
0.9 AS trust,
|
||||
NULL AS inferenceprovenance,
|
||||
|
||||
'providedBy@@@provided by@@@dnet:datasources_organizations_typologies@@@dnet:datasources_organizations_typologies' AS semantics,
|
||||
d.provenanceaction || '@@@' || d.provenanceaction || '@@@dnet:provenanceActions@@@dnet:provenanceActions' AS provenanceaction
|
||||
|
||||
FROM dsm_datasource_organization dor
|
||||
LEFT OUTER JOIN dsm_datasources d ON (dor.datasource = d.id)
|
||||
|
|
@ -0,0 +1,147 @@
|
|||
SELECT
|
||||
d.id AS datasourceid,
|
||||
d.id || array_agg(distinct di.pid) AS identities,
|
||||
d.officialname AS officialname,
|
||||
d.englishname AS englishname,
|
||||
d.contactemail AS contactemail,
|
||||
CASE
|
||||
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility):: TEXT) @> ARRAY ['openaire-cris_1.1'])
|
||||
THEN
|
||||
'openaire-cris_1.1@@@OpenAIRE CRIS v1.1@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
|
||||
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility):: TEXT) @> ARRAY ['driver', 'openaire2.0'])
|
||||
THEN
|
||||
'driver-openaire2.0@@@OpenAIRE 2.0+ (DRIVER OA, EC funding)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
|
||||
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['driver'])
|
||||
THEN
|
||||
'driver@@@OpenAIRE Basic (DRIVER OA)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
|
||||
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['openaire2.0'])
|
||||
THEN
|
||||
'openaire2.0@@@OpenAIRE 2.0 (EC funding)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
|
||||
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['openaire3.0'])
|
||||
THEN
|
||||
'openaire3.0@@@OpenAIRE 3.0 (OA, funding)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
|
||||
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['openaire2.0_data'])
|
||||
THEN
|
||||
'openaire2.0_data@@@OpenAIRE Data (funded, referenced datasets)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
|
||||
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['native'])
|
||||
THEN
|
||||
'native@@@proprietary@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
|
||||
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['hostedBy'])
|
||||
THEN
|
||||
'hostedBy@@@collected from a compatible aggregator@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
|
||||
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['notCompatible'])
|
||||
THEN
|
||||
'notCompatible@@@under validation@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
|
||||
ELSE
|
||||
'UNKNOWN@@@not available@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
|
||||
END AS openairecompatibility,
|
||||
d.websiteurl AS websiteurl,
|
||||
d.logourl AS logourl,
|
||||
array_agg(DISTINCT CASE WHEN a.protocol = 'oai' and last_aggregation_date is not null THEN a.baseurl ELSE NULL END) AS accessinfopackage,
|
||||
d.latitude AS latitude,
|
||||
d.longitude AS longitude,
|
||||
d.namespaceprefix AS namespaceprefix,
|
||||
NULL AS odnumberofitems,
|
||||
NULL AS odnumberofitemsdate,
|
||||
|
||||
(SELECT array_agg(s|| '###keywords@@@keywords@@@dnet:subject_classification_typologies@@@dnet:subject_classification_typologies')
|
||||
FROM UNNEST(
|
||||
ARRAY(
|
||||
SELECT trim(s)
|
||||
FROM unnest(string_to_array(d.subjects, '@@')) AS s)) AS s) AS subjects,
|
||||
|
||||
d.description AS description,
|
||||
NULL AS odpolicies,
|
||||
ARRAY(SELECT trim(s)
|
||||
FROM unnest(string_to_array(d.languages, ',')) AS s) AS odlanguages,
|
||||
ARRAY(SELECT trim(s)
|
||||
FROM unnest(string_to_array(d.od_contenttypes, '-')) AS s) AS odcontenttypes,
|
||||
false AS inferred,
|
||||
false AS deletedbyinference,
|
||||
0.9 AS trust,
|
||||
NULL AS inferenceprovenance,
|
||||
d.dateofcollection AS dateofcollection,
|
||||
d.dateofvalidation AS dateofvalidation,
|
||||
-- re3data fields
|
||||
d.releasestartdate AS releasestartdate,
|
||||
d.releaseenddate AS releaseenddate,
|
||||
d.missionstatementurl AS missionstatementurl,
|
||||
d.dataprovider AS dataprovider,
|
||||
d.serviceprovider AS serviceprovider,
|
||||
d.databaseaccesstype AS databaseaccesstype,
|
||||
d.datauploadtype AS datauploadtype,
|
||||
d.databaseaccessrestriction AS databaseaccessrestriction,
|
||||
d.datauploadrestriction AS datauploadrestriction,
|
||||
d.versioning AS versioning,
|
||||
d.citationguidelineurl AS citationguidelineurl,
|
||||
d.qualitymanagementkind AS qualitymanagementkind,
|
||||
d.pidsystems AS pidsystems,
|
||||
d.certificates AS certificates,
|
||||
ARRAY[]::text[] AS policies,
|
||||
dc.id AS collectedfromid,
|
||||
dc.officialname AS collectedfromname,
|
||||
d.typology || '@@@' || CASE
|
||||
WHEN (d.typology = 'crissystem') THEN 'CRIS System'
|
||||
WHEN (d.typology = 'datarepository::unknown') THEN 'Data Repository'
|
||||
WHEN (d.typology = 'aggregator::datarepository') THEN 'Data Repository Aggregator'
|
||||
WHEN (d.typology = 'infospace') THEN 'Information Space'
|
||||
WHEN (d.typology = 'pubsrepository::institutional') THEN 'Institutional Repository'
|
||||
WHEN (d.typology = 'aggregator::pubsrepository::institutional') THEN 'Institutional Repository Aggregator'
|
||||
WHEN (d.typology = 'pubsrepository::journal') THEN 'Journal'
|
||||
WHEN (d.typology = 'aggregator::pubsrepository::journals') THEN 'Journal Aggregator/Publisher'
|
||||
WHEN (d.typology = 'pubsrepository::mock') THEN 'Other'
|
||||
WHEN (d.typology = 'pubscatalogue::unknown') THEN 'Publication Catalogue'
|
||||
WHEN (d.typology = 'pubsrepository::unknown') THEN 'Publication Repository'
|
||||
WHEN (d.typology = 'aggregator::pubsrepository::unknown') THEN 'Publication Repository Aggregator'
|
||||
WHEN (d.typology = 'entityregistry') THEN 'Registry'
|
||||
WHEN (d.typology = 'scholarcomminfra') THEN 'Scholarly Comm. Infrastructure'
|
||||
WHEN (d.typology = 'pubsrepository::thematic') THEN 'Thematic Repository'
|
||||
WHEN (d.typology = 'websource') THEN 'Web Source'
|
||||
WHEN (d.typology = 'entityregistry::projects') THEN 'Funder database'
|
||||
WHEN (d.typology = 'entityregistry::repositories') THEN 'Registry of repositories'
|
||||
WHEN (d.typology = 'softwarerepository') THEN 'Software Repository'
|
||||
WHEN (d.typology = 'aggregator::softwarerepository') THEN 'Software Repository Aggregator'
|
||||
WHEN (d.typology = 'orprepository') THEN 'Repository'
|
||||
ELSE 'Other'
|
||||
END || '@@@dnet:datasource_typologies@@@dnet:datasource_typologies' AS datasourcetype,
|
||||
'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions' AS provenanceaction,
|
||||
CONCAT(d.issn, '@@@', d.eissn, '@@@', d.lissn) AS journal
|
||||
|
||||
FROM dsm_datasources d
|
||||
|
||||
LEFT OUTER JOIN dsm_datasources dc on (d.collectedfrom = dc.id)
|
||||
LEFT OUTER JOIN dsm_api a ON (d.id = a.datasource)
|
||||
LEFT OUTER JOIN dsm_datasourcepids di ON (d.id = di.datasource)
|
||||
|
||||
GROUP BY
|
||||
d.id,
|
||||
d.officialname,
|
||||
d.englishname,
|
||||
d.websiteurl,
|
||||
d.logourl,
|
||||
d.contactemail,
|
||||
d.namespaceprefix,
|
||||
d.description,
|
||||
d.latitude,
|
||||
d.longitude,
|
||||
d.dateofcollection,
|
||||
d.dateofvalidation,
|
||||
d.releasestartdate,
|
||||
d.releaseenddate,
|
||||
d.missionstatementurl,
|
||||
d.dataprovider,
|
||||
d.serviceprovider,
|
||||
d.databaseaccesstype,
|
||||
d.datauploadtype,
|
||||
d.databaseaccessrestriction,
|
||||
d.datauploadrestriction,
|
||||
d.versioning,
|
||||
d.citationguidelineurl,
|
||||
d.qualitymanagementkind,
|
||||
d.pidsystems,
|
||||
d.certificates,
|
||||
dc.id,
|
||||
dc.officialname,
|
||||
d.issn,
|
||||
d.eissn,
|
||||
d.lissn
|
|
@ -0,0 +1,36 @@
|
|||
SELECT
|
||||
o.id AS organizationid,
|
||||
o.legalshortname AS legalshortname,
|
||||
o.legalname AS legalname,
|
||||
o.websiteurl AS websiteurl,
|
||||
o.logourl AS logourl,
|
||||
o.ec_legalbody AS eclegalbody,
|
||||
o.ec_legalperson AS eclegalperson,
|
||||
o.ec_nonprofit AS ecnonprofit,
|
||||
o.ec_researchorganization AS ecresearchorganization,
|
||||
o.ec_highereducation AS echighereducation,
|
||||
o.ec_internationalorganizationeurinterests AS ecinternationalorganizationeurinterests,
|
||||
o.ec_internationalorganization AS ecinternationalorganization,
|
||||
o.ec_enterprise AS ecenterprise,
|
||||
o.ec_smevalidated AS ecsmevalidated,
|
||||
o.ec_nutscode AS ecnutscode,
|
||||
o.dateofcollection AS dateofcollection,
|
||||
o.lastupdate AS dateoftransformation,
|
||||
false AS inferred,
|
||||
false AS deletedbyinference,
|
||||
o.trust AS trust,
|
||||
'' AS inferenceprovenance,
|
||||
d.id AS collectedfromid,
|
||||
d.officialname AS collectedfromname,
|
||||
|
||||
o.country || '@@@dnet:countries' AS country,
|
||||
'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions' AS provenanceaction,
|
||||
|
||||
ARRAY[]::text[] AS pid
|
||||
FROM dsm_organizations o
|
||||
LEFT OUTER JOIN dsm_datasources d ON (d.id = o.collectedfrom)
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,53 @@
|
|||
SELECT
|
||||
o.id AS organizationid,
|
||||
coalesce((array_agg(a.acronym))[1], o.name) AS legalshortname,
|
||||
o.name AS legalname,
|
||||
array_agg(DISTINCT n.name) AS "alternativeNames",
|
||||
(array_agg(u.url))[1] AS websiteurl,
|
||||
o.modification_date AS dateoftransformation,
|
||||
false AS inferred,
|
||||
false AS deletedbyinference,
|
||||
0.95 AS trust,
|
||||
'' AS inferenceprovenance,
|
||||
'openaire____::openorgs' AS collectedfromid,
|
||||
'OpenOrgs Database' AS collectedfromname,
|
||||
o.country || '@@@dnet:countries' AS country,
|
||||
'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions' AS provenanceaction,
|
||||
array_agg(DISTINCT i.otherid || '###' || i.type || '@@@dnet:pid_types') AS pid
|
||||
FROM organizations o
|
||||
LEFT OUTER JOIN acronyms a ON (a.id = o.id)
|
||||
LEFT OUTER JOIN urls u ON (u.id = o.id)
|
||||
LEFT OUTER JOIN other_ids i ON (i.id = o.id)
|
||||
LEFT OUTER JOIN other_names n ON (n.id = o.id)
|
||||
GROUP BY
|
||||
o.id,
|
||||
o.name,
|
||||
o.modification_date,
|
||||
o.country
|
||||
|
||||
UNION ALL
|
||||
|
||||
SELECT
|
||||
'openorgsmesh'||substring(o.id, 13)||'-'||md5(n.name) AS organizationid,
|
||||
n.name AS legalshortname,
|
||||
n.name AS legalname,
|
||||
ARRAY[]::text[] AS "alternativeNames",
|
||||
(array_agg(u.url))[1] AS websiteurl,
|
||||
o.modification_date AS dateoftransformation,
|
||||
false AS inferred,
|
||||
false AS deletedbyinference,
|
||||
0.88 AS trust,
|
||||
'' AS inferenceprovenance,
|
||||
'openaire____::openorgs' AS collectedfromid,
|
||||
'OpenOrgs Database' AS collectedfromname,
|
||||
o.country || '@@@dnet:countries' AS country,
|
||||
'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions' AS provenanceaction,
|
||||
array_agg(DISTINCT i.otherid || '###' || i.type || '@@@dnet:pid_types') AS pid
|
||||
FROM other_names n
|
||||
LEFT OUTER JOIN organizations o ON (n.id = o.id)
|
||||
LEFT OUTER JOIN urls u ON (u.id = o.id)
|
||||
LEFT OUTER JOIN other_ids i ON (i.id = o.id)
|
||||
GROUP BY
|
||||
o.id, o.modification_date, o.country, n.name
|
||||
|
||||
|
|
@ -0,0 +1,16 @@
|
|||
SELECT
|
||||
po.project AS project,
|
||||
po.resporganization AS resporganization,
|
||||
po.participantnumber AS participantnumber,
|
||||
po.contribution AS contribution,
|
||||
NULL AS startdate,
|
||||
NULL AS enddate,
|
||||
false AS inferred,
|
||||
false AS deletedbyinference,
|
||||
po.trust AS trust,
|
||||
NULL AS inferenceprovenance,
|
||||
|
||||
po.semanticclass || '@@@' || po.semanticclass || '@@@dnet:project_organization_relations@@@dnet:project_organization_relations' AS semantics,
|
||||
'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions' AS provenanceaction
|
||||
|
||||
FROM project_organization po
|
|
@ -0,0 +1,87 @@
|
|||
SELECT
|
||||
p.id AS projectid,
|
||||
p.code AS code,
|
||||
p.websiteurl AS websiteurl,
|
||||
p.acronym AS acronym,
|
||||
p.title AS title,
|
||||
p.startdate AS startdate,
|
||||
p.enddate AS enddate,
|
||||
p.call_identifier AS callidentifier,
|
||||
p.keywords AS keywords,
|
||||
p.duration AS duration,
|
||||
p.ec_sc39 AS ecsc39,
|
||||
p.oa_mandate_for_publications AS oamandatepublications,
|
||||
p.ec_article29_3 AS ecarticle29_3,
|
||||
p.dateofcollection AS dateofcollection,
|
||||
p.lastupdate AS dateoftransformation,
|
||||
p.inferred AS inferred,
|
||||
p.deletedbyinference AS deletedbyinference,
|
||||
p.trust AS trust,
|
||||
p.inferenceprovenance AS inferenceprovenance,
|
||||
p.optional1 AS optional1,
|
||||
p.optional2 AS optional2,
|
||||
p.jsonextrainfo AS jsonextrainfo,
|
||||
p.contactfullname AS contactfullname,
|
||||
p.contactfax AS contactfax,
|
||||
p.contactphone AS contactphone,
|
||||
p.contactemail AS contactemail,
|
||||
p.summary AS summary,
|
||||
p.currency AS currency,
|
||||
p.totalcost AS totalcost,
|
||||
p.fundedamount AS fundedamount,
|
||||
dc.id AS collectedfromid,
|
||||
dc.officialname AS collectedfromname,
|
||||
p.contracttype || '@@@' || p.contracttypename || '@@@' || p.contracttypescheme || '@@@' || p.contracttypescheme AS contracttype,
|
||||
pac.code || '@@@' || pac.name || '@@@' || pas.code || '@@@' || pas.name AS provenanceaction,
|
||||
array_agg(DISTINCT i.pid || '###' || i.issuertype) AS pid,
|
||||
array_agg(DISTINCT s.name || '###' || sc.code || '@@@' || sc.name || '@@@' || ss.code || '@@@' || ss.name) AS subjects,
|
||||
array_agg(DISTINCT fp.path) AS fundingtree
|
||||
FROM projects p
|
||||
LEFT OUTER JOIN class pac ON (pac.code = p.provenanceactionclass)
|
||||
LEFT OUTER JOIN scheme pas ON (pas.code = p.provenanceactionscheme)
|
||||
|
||||
LEFT OUTER JOIN projectpids pp ON (pp.project = p.id)
|
||||
LEFT OUTER JOIN dsm_identities i ON (i.pid = pp.pid)
|
||||
|
||||
LEFT OUTER JOIN dsm_datasources dc ON (dc.id = p.collectedfrom)
|
||||
|
||||
LEFT OUTER JOIN project_fundingpath pf ON (pf.project = p.id)
|
||||
LEFT OUTER JOIN fundingpaths fp ON (fp.id = pf.funding)
|
||||
|
||||
LEFT OUTER JOIN project_subject ps ON (ps.project = p.id)
|
||||
LEFT OUTER JOIN subjects s ON (s.id = ps.subject)
|
||||
|
||||
LEFT OUTER JOIN class sc ON (sc.code = s.semanticclass)
|
||||
LEFT OUTER JOIN scheme ss ON (ss.code = s.semanticscheme)
|
||||
|
||||
GROUP BY
|
||||
p.id,
|
||||
p.code,
|
||||
p.websiteurl,
|
||||
p.acronym,
|
||||
p.title,
|
||||
p.startdate,
|
||||
p.enddate,
|
||||
p.call_identifier,
|
||||
p.keywords,
|
||||
p.duration,
|
||||
p.ec_sc39,
|
||||
p.oa_mandate_for_publications,
|
||||
p.ec_article29_3,
|
||||
p.dateofcollection,
|
||||
p.inferred,
|
||||
p.deletedbyinference,
|
||||
p.trust,
|
||||
p.inferenceprovenance,
|
||||
p.contactfullname,
|
||||
p.contactfax,
|
||||
p.contactphone,
|
||||
p.contactemail,
|
||||
p.contracttype,
|
||||
p.summary,
|
||||
p.currency,
|
||||
p.totalcost,
|
||||
p.fundedamount,
|
||||
dc.id,
|
||||
dc.officialname,
|
||||
pac.code, pac.name, pas.code, pas.name;
|
|
@ -0,0 +1,17 @@
|
|||
SELECT local_id AS id1, oa_original_id AS id2 FROM openaire_simrels WHERE reltype = 'is_similar'
|
||||
|
||||
UNION ALL
|
||||
|
||||
SELECT
|
||||
o.id AS id1,
|
||||
'openorgsmesh'||substring(o.id, 13)||'-'||md5(a.acronym) AS id2
|
||||
FROM acronyms a
|
||||
LEFT OUTER JOIN organizations o ON (a.id = o.id)
|
||||
|
||||
UNION ALL
|
||||
|
||||
SELECT
|
||||
o.id AS id1,
|
||||
'openorgsmesh'||substring(o.id, 13)||'-'||md5(n.name) AS id2
|
||||
FROM other_names n
|
||||
LEFT OUTER JOIN organizations o ON (n.id = o.id)
|
Loading…
Reference in New Issue