- Handle the case, where, from a group of related records, the initial record which led to a publication-url, failed to have its full-text downloaded. Now we make sure the file-related data for all those related records is kept "null" and a special error is written.
- Code optimization.
This commit is contained in:
parent
d682298850
commit
a1f750a0aa
|
@ -156,6 +156,9 @@ public class PublicationsRetrieverPlugin {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private static final int lengthOfAlreadyDownloadedFromSourceUrlContinuedMessage = ConnSupportUtils.alreadyDownloadedFromSourceUrlContinuedMessage.length();
|
||||||
|
private static final int lengthOfAlreadyDownloadedFromIDMessage = ConnSupportUtils.alreadyDownloadedFromIDMessage.length();
|
||||||
|
|
||||||
public static void addUrlReportsToWorkerReport(Collection<Assignment> assignments)
|
public static void addUrlReportsToWorkerReport(Collection<Assignment> assignments)
|
||||||
{
|
{
|
||||||
// Index the UrlIds with the DatasourceIds for quick-search later.
|
// Index the UrlIds with the DatasourceIds for quick-search later.
|
||||||
|
@ -176,31 +179,40 @@ public class PublicationsRetrieverPlugin {
|
||||||
if ( "true".equals(data.getWasDocumentOrDatasetAccessible()) ) // The reversed order defends against a potential NPE.
|
if ( "true".equals(data.getWasDocumentOrDatasetAccessible()) ) // The reversed order defends against a potential NPE.
|
||||||
{
|
{
|
||||||
status = UrlReport.StatusType.accessible;
|
status = UrlReport.StatusType.accessible;
|
||||||
if ( comment.startsWith(UrlUtils.alreadyDownloadedFromIDMessage, 0) ) {
|
if ( comment.startsWith(ConnSupportUtils.alreadyDownloadedFromIDMessage, 0) ) {
|
||||||
// The file of this docUrl was already downloaded by another docUrl.
|
// The file of this docUrl was already downloaded by another docUrl.
|
||||||
int indexOfAlreadyDownloadedFromSourceUrlMessage = comment.indexOf(UrlUtils.alreadyDownloadedFromSourceUrlContinuedMessage);
|
int indexOfAlreadyDownloadedFromSourceUrlMessage = comment.indexOf(ConnSupportUtils.alreadyDownloadedFromSourceUrlContinuedMessage);
|
||||||
int indexOfAlreadyDownloadedFromSourceUrl = indexOfAlreadyDownloadedFromSourceUrlMessage + UrlUtils.alreadyDownloadedFromSourceUrlContinuedMessage.length();
|
int indexOfAlreadyDownloadedFromSourceUrl = indexOfAlreadyDownloadedFromSourceUrlMessage + lengthOfAlreadyDownloadedFromSourceUrlContinuedMessage;
|
||||||
String initialId = comment.substring(UrlUtils.alreadyDownloadedFromIDMessage.length(), indexOfAlreadyDownloadedFromSourceUrlMessage); // The fileName starts right after the "message".
|
String initialId = comment.substring(lengthOfAlreadyDownloadedFromIDMessage, indexOfAlreadyDownloadedFromSourceUrlMessage); // The fileName starts right after the "message".
|
||||||
String initialSourceUrl = comment.substring(indexOfAlreadyDownloadedFromSourceUrl);
|
String initialSourceUrl = comment.substring(indexOfAlreadyDownloadedFromSourceUrl);
|
||||||
//logger.debug("initialId: " + initialId + " | sourceUrl: " + initialSourceUrl); // DEBUG!
|
//logger.debug("initialId: " + initialId + " | sourceUrl: " + initialSourceUrl); // DEBUG!
|
||||||
// Search that ID and sourceUrl inside the list, if that instance is the first-found one, then get the file-data (there might be duplicate ID-sourceUrl instances, but only one of them has the file-data).
|
// Search that ID and sourceUrl inside the list, if that instance is the first-found one, then get the file-data (there might be duplicate ID-sourceUrl instances, but only one of them has the file-data).
|
||||||
boolean foundAlreadyDownloadedFullText = false;
|
boolean foundAlreadyDownloadedFullText = false;
|
||||||
for ( DataToBeLogged data_2 : FileUtils.dataToBeLoggedList ) {
|
boolean foundIDUrlInWorkerReport = false;
|
||||||
if ( data_2.getUrlId().equals(initialId) && (data_2.getSourceUrl().equals(initialSourceUrl))
|
for ( DataToBeLogged data_2 : FileUtils.dataToBeLoggedList )
|
||||||
&& ! data_2.getComment().startsWith(UrlUtils.alreadyDownloadedFromIDMessage) ) {
|
{
|
||||||
fileLocation = data_2.getComment();
|
if ( ! (data_2.getUrlId().equals(initialId) && (data_2.getSourceUrl().equals(initialSourceUrl))) )
|
||||||
size = data_2.getSize();
|
continue;
|
||||||
hash = data_2.getHash();
|
|
||||||
mimeType = "application/pdf"; // TODO - If support is added for other doc-formats other than "pdf", then make sure the "mime_type" is assigned correctly.
|
foundIDUrlInWorkerReport = true;
|
||||||
foundAlreadyDownloadedFullText = true;
|
String tempFileLocation = data_2.getComment();
|
||||||
break;
|
if ( tempFileLocation.startsWith(ConnSupportUtils.alreadyDownloadedFromIDMessage, 0) || tempFileLocation.startsWith(HttpConnUtils.docFileNotRetrievedMessage, 0) )
|
||||||
}
|
continue;
|
||||||
|
|
||||||
|
fileLocation = tempFileLocation;
|
||||||
|
size = data_2.getSize();
|
||||||
|
hash = data_2.getHash();
|
||||||
|
mimeType = "application/pdf"; // TODO - If support is added for other doc-formats other than "pdf", then make sure the "mime_type" is assigned correctly.
|
||||||
|
foundAlreadyDownloadedFullText = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// In case the "alreadyDownloaded" full-text is not found, we have an error. All file-related data is "null".
|
||||||
|
if ( !foundAlreadyDownloadedFullText ) {
|
||||||
|
String addErrorMessage = ((!foundIDUrlInWorkerReport) ? " | That ID-sourceUrl was not found inside the WorkerReport!" : " | The file was not downloaded!");
|
||||||
|
error = new Error(Error.ErrorType.couldRetry, comment + addErrorMessage); // We can still try to download it from the found docUrl, in the future.
|
||||||
}
|
}
|
||||||
// In case the "alreadyDownloaded" full-text is not found, we have an error.
|
|
||||||
if ( !foundAlreadyDownloadedFullText )
|
|
||||||
error = new Error(Error.ErrorType.couldRetry, comment + " | That ID-sourceUrl was not found inside the WorkerReport!"); // We can still try to download it from the found docUrl, in the future.
|
|
||||||
}
|
}
|
||||||
else if ( ! comment.contains(HttpConnUtils.docFileNotRetrievedMessage) ) { // If it was downloaded without an error.
|
else if ( ! comment.startsWith(HttpConnUtils.docFileNotRetrievedMessage, 0) ) { // If it was downloaded without an error.
|
||||||
fileLocation = comment; // This is the full-file-path.
|
fileLocation = comment; // This is the full-file-path.
|
||||||
mimeType = "application/pdf";
|
mimeType = "application/pdf";
|
||||||
} else // Else the file was not retrieved, so all file-related data are kept "null".
|
} else // Else the file was not retrieved, so all file-related data are kept "null".
|
||||||
|
|
Loading…
Reference in New Issue