forked from D-Net/dnet-hadoop
This commit is contained in:
parent
44a12d244f
commit
669a509430
|
@ -5,21 +5,12 @@ import java.io.*;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.fs.FSDataInputStream;
|
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
|
||||||
import org.apache.hadoop.fs.Path;
|
|
||||||
import org.apache.http.HttpEntity;
|
import org.apache.http.HttpEntity;
|
||||||
import org.apache.http.HttpResponse;
|
import org.apache.http.HttpResponse;
|
||||||
import org.apache.http.client.HttpClient;
|
import org.apache.http.client.HttpClient;
|
||||||
import org.apache.http.client.methods.HttpPost;
|
import org.apache.http.client.methods.HttpPost;
|
||||||
import org.apache.http.client.methods.HttpPut;
|
import org.apache.http.client.methods.HttpPut;
|
||||||
|
|
||||||
import org.apache.http.entity.ContentType;
|
|
||||||
import org.apache.http.entity.InputStreamEntity;
|
|
||||||
import org.apache.http.entity.StringEntity;
|
import org.apache.http.entity.StringEntity;
|
||||||
|
|
||||||
import org.apache.http.entity.mime.MultipartEntityBuilder;
|
import org.apache.http.entity.mime.MultipartEntityBuilder;
|
||||||
import org.apache.http.impl.client.DefaultHttpClient;
|
import org.apache.http.impl.client.DefaultHttpClient;
|
||||||
import org.apache.http.util.EntityUtils;
|
import org.apache.http.util.EntityUtils;
|
||||||
|
@ -28,6 +19,8 @@ import com.google.gson.Gson;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.zenodo.ZenodoModel;
|
import eu.dnetlib.dhp.oa.graph.dump.zenodo.ZenodoModel;
|
||||||
|
|
||||||
|
//import org.apache.http.entity.mime.MultipartEntityBuilder;
|
||||||
|
|
||||||
public class APIClient implements Serializable {
|
public class APIClient implements Serializable {
|
||||||
|
|
||||||
String urlString;
|
String urlString;
|
||||||
|
@ -81,9 +74,26 @@ public class APIClient implements Serializable {
|
||||||
|
|
||||||
return response.getStatusLine().getStatusCode();
|
return response.getStatusLine().getStatusCode();
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// public int upload(InputStream is, String file_name) throws IOException {
|
||||||
|
// HttpClient client = new DefaultHttpClient();
|
||||||
|
//
|
||||||
|
// HttpPut put = new HttpPut(bucket + "/" + file_name);
|
||||||
|
// put.setHeader("Authorization", "Bearer " + access_token);
|
||||||
|
// put.addHeader("Content-Type", "application/zip");
|
||||||
|
//
|
||||||
|
// HttpEntity data = MultipartEntityBuilder
|
||||||
|
// .create()
|
||||||
|
// // .addPart("file", new ByteArrayInputStream(is));
|
||||||
|
// .addBinaryBody(file_name, is, ContentType.APPLICATION_OCTET_STREAM, file_name)
|
||||||
|
// .build();
|
||||||
|
// put.setEntity(data);
|
||||||
|
//
|
||||||
|
// HttpResponse response = client.execute(put);
|
||||||
|
//
|
||||||
|
// return response.getStatusLine().getStatusCode();
|
||||||
|
// }
|
||||||
|
|
||||||
public int upload(File file, String file_name) throws IOException {
|
public int upload(File file, String file_name) throws IOException {
|
||||||
HttpClient client = new DefaultHttpClient();
|
HttpClient client = new DefaultHttpClient();
|
||||||
|
@ -112,12 +122,11 @@ public class APIClient implements Serializable {
|
||||||
HttpResponse response = client.execute(post);
|
HttpResponse response = client.execute(post);
|
||||||
return response.getStatusLine().getStatusCode();
|
return response.getStatusLine().getStatusCode();
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public int publish() throws IOException {
|
public int publish() throws IOException {
|
||||||
HttpClient client = new DefaultHttpClient();
|
HttpClient client = new DefaultHttpClient();
|
||||||
HttpPost post = new HttpPost(urlString +"/"+ deposition_id +"/actions/publish") ;
|
HttpPost post = new HttpPost(urlString + "/" + deposition_id + "/actions/publish");
|
||||||
post.setHeader("Authorization", "Bearer " + access_token);
|
post.setHeader("Authorization", "Bearer " + access_token);
|
||||||
|
|
||||||
HttpResponse response = client.execute(post);
|
HttpResponse response = client.execute(post);
|
||||||
|
|
|
@ -40,7 +40,7 @@ public class QueryInformationSystem {
|
||||||
this.isLookUp = isLookUpService;
|
this.isLookUp = isLookUpService;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static CommunityMap getMap(List<String> communityMap) {
|
private CommunityMap getMap(List<String> communityMap) {
|
||||||
final CommunityMap map = new CommunityMap();
|
final CommunityMap map = new CommunityMap();
|
||||||
|
|
||||||
communityMap.stream().forEach(xml -> {
|
communityMap.stream().forEach(xml -> {
|
||||||
|
|
|
@ -1,14 +1,11 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.graph.dump;
|
package eu.dnetlib.dhp.oa.graph.dump;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import java.io.File;
|
||||||
import com.google.gson.Gson;
|
import java.io.Serializable;
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
|
||||||
import eu.dnetlib.dhp.common.DbClient;
|
import javax.management.Query;
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.zenodo.Creator;
|
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.zenodo.Metadata;
|
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.zenodo.ZenodoModel;
|
|
||||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
@ -18,20 +15,13 @@ import org.apache.hadoop.fs.LocatedFileStatus;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.fs.RemoteIterator;
|
import org.apache.hadoop.fs.RemoteIterator;
|
||||||
|
|
||||||
import java.io.BufferedWriter;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import java.io.File;
|
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
||||||
import java.io.Serializable;
|
|
||||||
import java.sql.ResultSet;
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.function.Consumer;
|
|
||||||
import java.util.function.Function;
|
|
||||||
|
|
||||||
public class SendToZenodo implements Serializable {
|
public class SendToZenodo implements Serializable {
|
||||||
|
|
||||||
private static final Log log = LogFactory.getLog(SendToZenodo.class);
|
private static final Log log = LogFactory.getLog(SendToZenodo.class);
|
||||||
|
|
||||||
|
|
||||||
public static void main(final String[] args) throws Exception {
|
public static void main(final String[] args) throws Exception {
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||||
IOUtils
|
IOUtils
|
||||||
|
@ -42,48 +32,56 @@ public class SendToZenodo implements Serializable {
|
||||||
|
|
||||||
parser.parseArgument(args);
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
|
||||||
final String hdfsPath = parser.get("hdfsPath");
|
final String hdfsPath = parser.get("hdfsPath");
|
||||||
final String hdfsNameNode = parser.get("hdfsNameNode");
|
final String hdfsNameNode = parser.get("hdfsNameNode");
|
||||||
final String access_token = parser.get("accessToken");
|
final String access_token = parser.get("accessToken");
|
||||||
final String connection_url = parser.get("url");
|
final String connection_url = parser.get("connectionUrl");
|
||||||
final String metadata = parser.get("metadata");
|
final String metadata = parser.get("metadata");
|
||||||
|
final String isLookUpUrl = parser.get("isLookUpUrl");
|
||||||
|
|
||||||
|
QueryInformationSystem qis = new QueryInformationSystem();
|
||||||
|
qis.setIsLookUp(ISLookupClientFactory.getLookUpService(isLookUpUrl));
|
||||||
|
CommunityMap communityMap = qis.getCommunityMap();
|
||||||
|
|
||||||
Configuration conf = new Configuration();
|
Configuration conf = new Configuration();
|
||||||
conf.set("fs.defaultFS", hdfsNameNode);
|
conf.set("fs.defaultFS", hdfsNameNode);
|
||||||
|
|
||||||
FileSystem fileSystem = FileSystem.get(conf);
|
FileSystem fileSystem = FileSystem.get(conf);
|
||||||
|
|
||||||
|
RemoteIterator<LocatedFileStatus> fileStatusListIterator = fileSystem
|
||||||
RemoteIterator<LocatedFileStatus> fileStatusListIterator = fileSystem.listFiles(
|
.listFiles(
|
||||||
new Path(hdfsPath), true);
|
new Path(hdfsPath), true);
|
||||||
APIClient apiClient = new APIClient(connection_url, access_token);
|
APIClient apiClient = new APIClient(connection_url, access_token);
|
||||||
apiClient.connect();
|
apiClient.connect();
|
||||||
while(fileStatusListIterator.hasNext()){
|
while (fileStatusListIterator.hasNext()) {
|
||||||
LocatedFileStatus fileStatus = fileStatusListIterator.next();
|
LocatedFileStatus fileStatus = fileStatusListIterator.next();
|
||||||
|
|
||||||
Path p = fileStatus.getPath();
|
Path p = fileStatus.getPath();
|
||||||
String p_string = p.toString();
|
String p_string = p.toString();
|
||||||
String tmp = p_string.substring(0, p_string.lastIndexOf("/") );
|
String tmp = p_string.substring(0, p_string.lastIndexOf("/"));
|
||||||
String community = tmp.substring(tmp.lastIndexOf("/") + 1);
|
String community = tmp.substring(tmp.lastIndexOf("/") + 1);
|
||||||
log.info("Sending information for community: " + community);
|
log.info("Sending information for community: " + community);
|
||||||
fileSystem.copyToLocalFile(p, new Path("/tmp/" + community));
|
String community_name = communityMap.get(community).replace(" ", "_");
|
||||||
|
log.info("Copying information for community: " + community);
|
||||||
|
fileSystem.copyToLocalFile(p, new Path("/tmp/" + community_name));
|
||||||
File f = new File("/tmp/" + community);
|
File f = new File("/tmp/" + community_name);
|
||||||
apiClient.upload(f, community);
|
try {
|
||||||
|
apiClient.upload(f, community_name);
|
||||||
apiClient.sendMretadata(metadata);
|
apiClient.sendMretadata(metadata);
|
||||||
apiClient.publish();
|
apiClient.publish();
|
||||||
|
} catch (Exception e) {
|
||||||
if (f.exists()){
|
if (f.exists()) {
|
||||||
|
log.info("Deleting information for community: " + community);
|
||||||
|
f.delete();
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
if (f.exists()) {
|
||||||
|
log.info("Deleting information for community: " + community);
|
||||||
f.delete();
|
f.delete();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
Loading…
Reference in New Issue