@ -9,6 +9,7 @@ import eu.openaire.urls_controller.payloads.responces.AssignmentsResponse;
import eu.openaire.urls_controller.util.ControllerConstants ;
import eu.openaire.urls_controller.util.ControllerConstants ;
import eu.openaire.urls_controller.util.FileUtils ;
import eu.openaire.urls_controller.util.FileUtils ;
import eu.openaire.urls_controller.util.GenericUtils ;
import eu.openaire.urls_controller.util.GenericUtils ;
import org.apache.commons.lang3.StringUtils ;
import org.slf4j.Logger ;
import org.slf4j.Logger ;
import org.slf4j.LoggerFactory ;
import org.slf4j.LoggerFactory ;
import org.springframework.http.HttpStatus ;
import org.springframework.http.HttpStatus ;
@ -306,21 +307,21 @@ public class UrlController {
ImpalaConnector . databaseLock . lock ( ) ;
ImpalaConnector . databaseLock . lock ( ) ;
// Store the workerReport into the database.
// Store the workerReport into the database.
String insertIntoPayloadBaseQuery = "INSERT INTO " + ImpalaConnector . databaseName + ".payload (id, original_url, actual_url, date, mimetype, size, hash, location, provenance) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) ";
String insertIntoPayloadBaseQuery = "INSERT INTO " + ImpalaConnector . databaseName + ".payload (id, original_url, actual_url, ` date` , mimetype, size, ` hash` , ` location` , provenance) VALUES ";
String insertIntoAttemptBaseQuery = "INSERT INTO " + ImpalaConnector . databaseName + ".attempt (id, original_url, date, status, error_class, error_message) VALUES (?, ?, ?, ?, ?, ?) ";
String insertIntoAttemptBaseQuery = "INSERT INTO " + ImpalaConnector . databaseName + ".attempt (id, original_url, ` date` , status, error_class, error_message) VALUES ";
String tempInsertQueryName = null ;
String tempInsertQueryName = null ;
PreparedStatement preparedInsertPayloadStatement = null , preparedI nsertAttemptStatement = null ;
Statement insertPayloadStatement = null , i nsertAttemptStatement = null ;
try {
try {
tempInsertQueryName = "insertIntoPayloadBaseQuery" ;
tempInsertQueryName = "insertIntoPayloadBaseQuery" ;
preparedInsertPayloadStatement = con . prepareStatement ( insertIntoPayloadBaseQuery ) ;
insertPayloadStatement = con . createStatement ( ) ;
tempInsertQueryName = "insertIntoAttemptBaseQuery" ;
tempInsertQueryName = "insertIntoAttemptBaseQuery" ;
preparedInsertAttemptStatement = con . prepareStatement ( insertIntoAttemptBaseQuery ) ;
insertAttemptStatement = con . createStatement ( ) ;
} catch ( SQLException sqle ) {
} catch ( SQLException sqle ) {
ImpalaConnector . databaseLock . unlock ( ) ;
ImpalaConnector . databaseLock . unlock ( ) ;
String errorMsg = "Problem when creating the prepared statement for \"" + tempInsertQueryName + "\"!\n" ;
String errorMsg = "Problem when creating the statement for \"" + tempInsertQueryName + "\"!\n" ;
logger . error ( errorMsg + sqle . getMessage ( ) ) ;
logger . error ( errorMsg + sqle . getMessage ( ) ) ;
close PreparedStatements( preparedInsertPayloadStatement , preparedI nsertAttemptStatement, con ) ;
close Statements( insertPayloadStatement , i nsertAttemptStatement, con ) ;
return ResponseEntity . status ( HttpStatus . INTERNAL_SERVER_ERROR ) . body ( errorMsg ) ;
return ResponseEntity . status ( HttpStatus . INTERNAL_SERVER_ERROR ) . body ( errorMsg ) ;
}
}
@ -330,13 +331,17 @@ public class UrlController {
ImpalaConnector . databaseLock . unlock ( ) ;
ImpalaConnector . databaseLock . unlock ( ) ;
String errorMsg = "Problem when setting Connection.AutoCommit to \"false\"!\n" ;
String errorMsg = "Problem when setting Connection.AutoCommit to \"false\"!\n" ;
logger . error ( errorMsg + sqle . getMessage ( ) ) ;
logger . error ( errorMsg + sqle . getMessage ( ) ) ;
close PreparedStatements( preparedInsertPayloadStatement , preparedI nsertAttemptStatement, con ) ;
close Statements( insertPayloadStatement , i nsertAttemptStatement, con ) ;
return ResponseEntity . status ( HttpStatus . INTERNAL_SERVER_ERROR ) . body ( errorMsg ) ;
return ResponseEntity . status ( HttpStatus . INTERNAL_SERVER_ERROR ) . body ( errorMsg ) ;
}
}
String payloadErrorMsg = null ;
String payloadErrorMsg = null ;
int failedCount = 0 ;
int failedCount = 0 ;
// TODO - Think about handling this loop with multiple threads..
// The Impala-server will handle the synchronization itself..
// Check online what happens with "statement.setPoolable()" does it improves speed? in multi or also in single thread?
for ( UrlReport urlReport : urlReports ) {
for ( UrlReport urlReport : urlReports ) {
Payload payload = urlReport . getPayload ( ) ;
Payload payload = urlReport . getPayload ( ) ;
if ( payload = = null ) {
if ( payload = = null ) {
@ -345,26 +350,20 @@ public class UrlController {
continue ;
continue ;
}
}
try { // We use a "PreparedStatement" to do insertions, for security reasons.
try {
preparedInsertPayloadStatement . setString ( 1 , payload . getId ( ) ) ;
preparedInsertPayloadStatement . setString ( 2 , payload . getOriginal_url ( ) ) ;
preparedInsertPayloadStatement . setString ( 3 , payload . getActual_url ( ) ) ;
preparedInsertPayloadStatement . setTimestamp ( 4 , payload . getTimestamp_acquired ( ) ) ;
preparedInsertPayloadStatement . setString ( 5 , payload . getMime_type ( ) ) ;
// The column "size" in the table is of type "String" so we cast the Long to String. The Parquet-format in the database does not work well with integers.
// The column "size" in the table is of type "String" so we cast the Long to String. The Parquet-format in the database does not work well with integers.
String stringSize = null ;
String stringSize = null ;
Long size = payload . getSize ( ) ;
Long size = payload . getSize ( ) ;
if ( size ! = null )
if ( size ! = null )
stringSize = String . valueOf ( size ) ;
stringSize = String . valueOf ( size ) ;
preparedInsertPayloadStatement. setString ( 6 , stringSize ) ;
String insertIntoPayloadFullQuery = insertIntoPayloadBaseQuery + "('" + payload . getId ( ) + "','" + payload . getOriginal_url ( ) + "','" + payload . getActual_url ( ) + "','"
preparedInsertPayloadStatement . setString ( 7 , payload . getHash ( ) ) ;
+ payload . getTimestamp_acquired ( ) + "','" + payload . getMime_type ( ) + "','" + stringSize + "','" + payload . getHash ( ) + "','"
preparedInsertPayloadStatement . setString ( 8 , payload . getLocation ( ) ) ;
+ payload . getLocation ( ) + "','" + payload . getProvenance ( ) + "')" ;
preparedInsertPayloadStatement . setString ( 9 , payload . getProvenance ( ) ) ;
preparedInsertPayloadStatement. executeUpdate ( ) ;
insertPayloadStatement. execute ( insertIntoPayloadFullQuery ) ;
} catch ( SQLException sqle ) {
} catch ( SQLException sqle ) {
logger . error ( "Problem when executing the \"insertIntoPayload Base Query\": " + sqle . getMessage ( ) + "\n\n" ) ;
logger . error ( "Problem when executing the \"insertIntoPayload Full Query\": " + sqle . getMessage ( ) + "\n\n" ) ;
}
}
Error error = urlReport . getError ( ) ;
Error error = urlReport . getError ( ) ;
@ -373,14 +372,15 @@ public class UrlController {
error = new Error ( null , null ) ;
error = new Error ( null , null ) ;
}
}
try { // We use a "PreparedStatement" to do insertions, for security reasons.
try {
preparedInsertAttemptStatement . setString ( 1 , payload . getId ( ) ) ;
String errorCause = error . getMessage ( ) ;
preparedInsertAttemptStatement . setString ( 2 , payload . getOriginal_url ( ) ) ;
if ( errorCause ! = null )
preparedInsertAttemptStatement . setTimestamp ( 3 , payload . getTimestamp_acquired ( ) ) ;
errorCause = StringUtils . replace ( errorCause , "'" , "\\'" , - 1 ) ; // Escape single quotes in the error-cause-message.
preparedInsertAttemptStatement . setString ( 4 , urlReport . getStatus ( ) . toString ( ) ) ;
preparedInsertAttemptStatement . setString ( 5 , String . valueOf ( error . getType ( ) ) ) ; // This covers the case of "null".
String insertIntoAttemptFullQuery = insertIntoAttemptBaseQuery + "('" + payload . getId ( ) + "','" + payload . getOriginal_url ( ) + "','"
preparedInsertAttemptStatement . setString ( 6 , error . getMessage ( ) ) ;
+ payload . getTimestamp_acquired ( ) + "','" + urlReport . getStatus ( ) . toString ( ) + "','" + error . getType ( ) + "','" + errorCause + "')" ;
preparedInsertAttemptStatement . executeUpdate ( ) ;
insertAttemptStatement . execute ( insertIntoAttemptFullQuery ) ;
} catch ( SQLException sqle ) {
} catch ( SQLException sqle ) {
logger . error ( "Problem when executing the \"insertIntoAttemptBaseQuery\": " + sqle . getMessage ( ) + "\n\n" ) ;
logger . error ( "Problem when executing the \"insertIntoAttemptBaseQuery\": " + sqle . getMessage ( ) + "\n\n" ) ;
}
}
@ -394,7 +394,7 @@ public class UrlController {
logger . error ( errorMsg + "\n" + sqle . getMessage ( ) ) ;
logger . error ( errorMsg + "\n" + sqle . getMessage ( ) ) ;
return ResponseEntity . status ( HttpStatus . INTERNAL_SERVER_ERROR ) . body ( errorMsg ) ;
return ResponseEntity . status ( HttpStatus . INTERNAL_SERVER_ERROR ) . body ( errorMsg ) ;
} finally {
} finally {
close PreparedStatements( preparedInsertPayloadStatement , preparedI nsertAttemptStatement, null ) ; // Do not close the connection here!
close Statements( insertPayloadStatement , i nsertAttemptStatement, null ) ; // Do not close the connection here!
}
}
logger . debug ( "Finished inserting the payloads and the attempts into the \"payload\" and \"attempt\" tables. Going to merge the parquet files for those tables." ) ;
logger . debug ( "Finished inserting the payloads and the attempts into the \"payload\" and \"attempt\" tables. Going to merge the parquet files for those tables." ) ;
@ -470,17 +470,17 @@ public class UrlController {
}
}
private boolean close PreparedStatements( PreparedStatement preparedStatement1 , PreparedStatement preparedS tatement2, Connection con ) {
private boolean close Statements( Statement statement1 , Statement s tatement2, Connection con ) {
try {
try {
if ( preparedS tatement1 ! = null )
if ( s tatement1 ! = null )
preparedS tatement1. close ( ) ;
s tatement1. close ( ) ;
if ( preparedS tatement2 ! = null )
if ( s tatement2 ! = null )
preparedS tatement2. close ( ) ;
s tatement2. close ( ) ;
if ( con ! = null )
if ( con ! = null )
con . close ( ) ; // It may have already closed and that's fine.
con . close ( ) ; // It may have already closed and that's fine.
return true ;
return true ;
} catch ( SQLException sqle ) {
} catch ( SQLException sqle ) {
logger . error ( "Could not close the connection with the Impala-database.\n" + sqle . getMessage ( ) ) ;
logger . error ( "Could not close the statements or the connection with the Impala-database.\n" + sqle . getMessage ( ) ) ;
return false ;
return false ;
}
}
}
}