Skip to content

Commit

Permalink
Fix issue #549 Show a message if DownloadFrom/-To value of a crawler …
Browse files Browse the repository at this point in the history
…is in wrong format
  • Loading branch information
thomas694 committed Jul 7, 2024
1 parent 134bdd0 commit cd99347
Show file tree
Hide file tree
Showing 12 changed files with 168 additions and 62 deletions.
40 changes: 26 additions & 14 deletions src/TumblThree/TumblThree.Applications/Crawler/TumblrBlogCrawler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -418,6 +418,11 @@ private async Task CrawlPageAsync(int pageNumber)
incompleteCrawl = true;
HandleTimeoutException(timeoutException, Resources.Crawling);
}
catch (FormatException formatException)
{
Logger.Error("TumblrBlogCrawler:CrawlPageAsync: {0}", formatException);
ShellService.ShowError(formatException, "{0}: {1}", Blog.Name, formatException.Message);
}
catch (Exception e)
{
Logger.Error("TumblrBlogCrawler.CrawlPageAsync: {0}", e);
Expand All @@ -436,24 +441,31 @@ private bool PostWithinTimeSpan(Post post)
return true;
}

long downloadFromUnixTime = 0;
long downloadToUnixTime = long.MaxValue;
if (!string.IsNullOrEmpty(Blog.DownloadFrom))
try
{
DateTime downloadFrom = DateTime.ParseExact(Blog.DownloadFrom, "yyyyMMdd", CultureInfo.InvariantCulture,
DateTimeStyles.None);
downloadFromUnixTime = new DateTimeOffset(downloadFrom).ToUnixTimeSeconds();
}
long downloadFromUnixTime = 0;
long downloadToUnixTime = long.MaxValue;
if (!string.IsNullOrEmpty(Blog.DownloadFrom))
{
DateTime downloadFrom = DateTime.ParseExact(Blog.DownloadFrom, "yyyyMMdd", CultureInfo.InvariantCulture,
DateTimeStyles.None);
downloadFromUnixTime = new DateTimeOffset(downloadFrom).ToUnixTimeSeconds();
}

if (!string.IsNullOrEmpty(Blog.DownloadTo))
if (!string.IsNullOrEmpty(Blog.DownloadTo))
{
DateTime downloadTo = DateTime.ParseExact(Blog.DownloadTo, "yyyyMMdd", CultureInfo.InvariantCulture,
DateTimeStyles.None).AddDays(1);
downloadToUnixTime = new DateTimeOffset(downloadTo).ToUnixTimeSeconds();
}

long postTime = post.UnixTimestamp;
return downloadFromUnixTime <= postTime && postTime < downloadToUnixTime;
}
catch (System.FormatException)
{
DateTime downloadTo = DateTime.ParseExact(Blog.DownloadTo, "yyyyMMdd", CultureInfo.InvariantCulture,
DateTimeStyles.None).AddDays(1);
downloadToUnixTime = new DateTimeOffset(downloadTo).ToUnixTimeSeconds();
throw new FormatException(Resources.BlogValueHasWrongFormat);
}

long postTime = post.UnixTimestamp;
return downloadFromUnixTime <= postTime && postTime < downloadToUnixTime;
}

private bool CheckPostAge(TumblrApiJson response)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,11 @@ private async Task CrawlPageAsync(int pageNumber)
incompleteCrawl = true;
HandleTimeoutException(timeoutException, Resources.Crawling);
}
catch (FormatException formatException)
{
Logger.Error("TumblrHiddenCrawler.CrawlPageAsync: {0}", formatException);
ShellService.ShowError(formatException, "{0}: {1}", Blog.Name, formatException.Message);
}
catch (Exception ex)
{
Logger.Error("TumblrHiddenCrawler.CrawlPageAsync: {0}", ex);
Expand Down Expand Up @@ -307,24 +312,31 @@ private bool PostWithinTimeSpan(Post post)
return true;
}

long downloadFromUnixTime = 0;
long downloadToUnixTime = long.MaxValue;
if (!string.IsNullOrEmpty(Blog.DownloadFrom))
try
{
DateTime downloadFrom = DateTime.ParseExact(Blog.DownloadFrom, "yyyyMMdd", CultureInfo.InvariantCulture,
DateTimeStyles.None);
downloadFromUnixTime = new DateTimeOffset(downloadFrom).ToUnixTimeSeconds();
}
long downloadFromUnixTime = 0;
long downloadToUnixTime = long.MaxValue;
if (!string.IsNullOrEmpty(Blog.DownloadFrom))
{
DateTime downloadFrom = DateTime.ParseExact(Blog.DownloadFrom, "yyyyMMdd", CultureInfo.InvariantCulture,
DateTimeStyles.None);
downloadFromUnixTime = new DateTimeOffset(downloadFrom).ToUnixTimeSeconds();
}

if (!string.IsNullOrEmpty(Blog.DownloadTo))
if (!string.IsNullOrEmpty(Blog.DownloadTo))
{
DateTime downloadTo = DateTime.ParseExact(Blog.DownloadTo, "yyyyMMdd", CultureInfo.InvariantCulture,
DateTimeStyles.None).AddDays(1);
downloadToUnixTime = new DateTimeOffset(downloadTo).ToUnixTimeSeconds();
}

long postTime = Convert.ToInt64(post.Timestamp);
return downloadFromUnixTime <= postTime && postTime < downloadToUnixTime;
}
catch (System.FormatException)
{
DateTime downloadTo = DateTime.ParseExact(Blog.DownloadTo, "yyyyMMdd", CultureInfo.InvariantCulture,
DateTimeStyles.None).AddDays(1);
downloadToUnixTime = new DateTimeOffset(downloadTo).ToUnixTimeSeconds();
throw new FormatException(Resources.BlogValueHasWrongFormat);
}

long postTime = Convert.ToInt64(post.Timestamp);
return downloadFromUnixTime <= postTime && postTime < downloadToUnixTime;
}

private async Task<bool> CheckIfLoggedInAsync()
Expand Down Expand Up @@ -426,7 +438,7 @@ private async Task AddUrlsToDownloadListAsync(TumblrJson response, int crawlerNu
Logger.Verbose("TumblrHiddenCrawler.AddUrlsToDownloadListAsync: {0}", e);
}
}
catch (Exception e)
catch (Exception e) when (!(e is FormatException))
{
Logger.Error("TumblrHiddenCrawler.AddUrlsToDownloadListAsync: {0}", e);
ShellService.ShowError(e, "{0}: Error parsing post!", Blog.Name);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,11 @@ private async Task CrawlPageAsync(int crawlerNumber)
ShellService.ShowError(limitExceededException, "{0}: {1}", Blog.Name, limitExceededException.Message);
}
}
catch (FormatException formatException)
{
Logger.Error("TumblrLikedByCrawler:CrawlPageAsync: {0}", formatException);
ShellService.ShowError(formatException, "{0}: {1}", Blog.Name, formatException.Message);
}
catch (Exception e)
{
Logger.Error("TumblrLikedByCrawler:CrawlPageAsync: {0}", e);
Expand Down Expand Up @@ -761,10 +766,16 @@ private bool CheckIfWithinTimespan(long pagination)
return true;
}

DateTime downloadFrom = DateTime.ParseExact(Blog.DownloadFrom, "yyyyMMdd", CultureInfo.InvariantCulture,
DateTimeStyles.None);
var dateTimeOffset = new DateTimeOffset(downloadFrom);
return pagination >= dateTimeOffset.ToUnixTimeSeconds();
try
{
DateTime downloadFrom = DateTime.ParseExact(Blog.DownloadFrom, "yyyyMMdd", CultureInfo.InvariantCulture, DateTimeStyles.None);
var dateTimeOffset = new DateTimeOffset(downloadFrom);
return pagination >= dateTimeOffset.ToUnixTimeSeconds();
}
catch (System.FormatException)
{
throw new FormatException(Resources.BlogValueHasWrongFormat);
}
}

private async Task GetAlreadyExistingCrawlerDataFilesAsync()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,11 @@ private async Task CrawlPageAsync()
{
HandleTimeoutException(timeoutException, Resources.Crawling);
}
catch (FormatException formatException)
{
Logger.Error("TumblrSearchCrawler.CrawlPageAsync: {0}", formatException);
ShellService.ShowError(formatException, "{0}: {1}", Blog.Name, formatException.Message);
}
catch (Exception e)
{
Logger.Error("TumblrSearchCrawler.CrawlPageAsync: {0}", e);
Expand Down Expand Up @@ -288,7 +293,7 @@ private void DownloadPage(dynamic page)
Logger.Verbose("TumblrSearchCrawler.DownloadPage: {0}", e);
}
}
catch (Exception ex)
catch (Exception ex) when (!(ex is FormatException))
{
Logger.Error("TumblrSearchCrawler.DownloadMedia: {0}", ex);
ShellService.ShowError(ex, "{0}: Error parsing post!", Blog.Name);
Expand All @@ -299,7 +304,7 @@ private void DownloadPage(dynamic page)
{
HandleTimeoutException(timeoutException, Resources.Crawling);
}
catch (Exception e)
catch (Exception e) when (!(e is FormatException))
{
Logger.Error("TumblrSearchCrawler.DownloadPage: {0}", e);
}
Expand Down Expand Up @@ -366,10 +371,17 @@ private bool CheckIfWithinTimespan(long pagination)
return true;
}

DateTime downloadFrom = DateTime.ParseExact(Blog.DownloadFrom, "yyyyMMdd", CultureInfo.InvariantCulture,
DateTimeStyles.None);
var dateTimeOffset = new DateTimeOffset(downloadFrom);
return pagination >= dateTimeOffset.ToUnixTimeSeconds();
try
{
DateTime downloadFrom = DateTime.ParseExact(Blog.DownloadFrom, "yyyyMMdd", CultureInfo.InvariantCulture,
DateTimeStyles.None);
var dateTimeOffset = new DateTimeOffset(downloadFrom);
return pagination >= dateTimeOffset.ToUnixTimeSeconds();
}
catch (System.FormatException)
{
throw new FormatException(Resources.BlogValueHasWrongFormat);
}
}

private void DownloadText(dynamic dynPost, Post post)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -159,9 +159,14 @@ private async Task CrawlPageAsync()
{
HandleTimeoutException(timeoutException, Resources.Crawling);
}
catch (FormatException formatException)
{
Logger.Error("TumblrTagSearchCrawler.CrawlPageAsync: {0}", formatException);
ShellService.ShowError(formatException, "{0}: {1}", Blog.Name, formatException.Message);
}
catch (Exception e)
{
Logger.Error("CrawlPageAsync: {0}", e);
Logger.Error("TumblrTagSearchCrawler.CrawlPageAsync: {0}", e);
}
finally
{
Expand Down Expand Up @@ -222,7 +227,7 @@ private void DownloadMedia(TumblrTaggedSearchApi page)
}
}
}
catch (Exception e)
catch (Exception e) when (!(e is FormatException))
{
Logger.Error("DownloadMedia: {0}", e);
}
Expand Down Expand Up @@ -273,7 +278,7 @@ private void DownloadMedia(IList<TaggedPost> elements)
}
}
}
catch (Exception e)
catch (Exception e) when (!(e is FormatException))
{
Logger.Error("DownloadMedia: {0}", e);
}
Expand All @@ -286,10 +291,16 @@ private bool CheckIfWithinTimespan(long pagination)
return true;
}

DateTime downloadFrom = DateTime.ParseExact(Blog.DownloadFrom, "yyyyMMdd", CultureInfo.InvariantCulture,
DateTimeStyles.None);
var dateTimeOffset = new DateTimeOffset(downloadFrom);
return pagination >= dateTimeOffset.ToUnixTimeSeconds();
try
{
DateTime downloadFrom = DateTime.ParseExact(Blog.DownloadFrom, "yyyyMMdd", CultureInfo.InvariantCulture, DateTimeStyles.None);
var dateTimeOffset = new DateTimeOffset(downloadFrom);
return pagination >= dateTimeOffset.ToUnixTimeSeconds();
}
catch (System.FormatException)
{
throw new FormatException(Resources.BlogValueHasWrongFormat);
}
}

private void DownloadMedia(Content content, Post post, int index) //String id, long timestamp, IList<string> tags
Expand Down
45 changes: 29 additions & 16 deletions src/TumblThree/TumblThree.Applications/Crawler/TwitterCrawler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -638,6 +638,12 @@ private async Task CrawlPageAsync(int pageNo)
retries++;
Thread.Sleep(2000);
}
catch (Exception e) when (e is FormatException)
{
Logger.Error("TwitterCrawler.CrawlPageAsync: {0}", e);
ShellService.ShowError(e, "{0}: {1}", Blog.Name, e.Message);
retries = 450;
}
catch (Exception e)
{
Debug.WriteLine(e.ToString());
Expand Down Expand Up @@ -681,25 +687,32 @@ private bool PostWithinTimeSpan(Tweet post)
return true;
}

long downloadFromUnixTime = 0;
long downloadToUnixTime = long.MaxValue;
if (!string.IsNullOrEmpty(Blog.DownloadFrom))
try
{
DateTime downloadFrom = DateTime.ParseExact(Blog.DownloadFrom, "yyyyMMdd", CultureInfo.InvariantCulture,
DateTimeStyles.None);
downloadFromUnixTime = new DateTimeOffset(downloadFrom).ToUnixTimeSeconds();
}
long downloadFromUnixTime = 0;
long downloadToUnixTime = long.MaxValue;
if (!string.IsNullOrEmpty(Blog.DownloadFrom))
{
DateTime downloadFrom = DateTime.ParseExact(Blog.DownloadFrom, "yyyyMMdd", CultureInfo.InvariantCulture,
DateTimeStyles.None);
downloadFromUnixTime = new DateTimeOffset(downloadFrom).ToUnixTimeSeconds();
}

if (!string.IsNullOrEmpty(Blog.DownloadTo))
if (!string.IsNullOrEmpty(Blog.DownloadTo))
{
DateTime downloadTo = DateTime.ParseExact(Blog.DownloadTo, "yyyyMMdd", CultureInfo.InvariantCulture,
DateTimeStyles.None).AddDays(1);
downloadToUnixTime = new DateTimeOffset(downloadTo).ToUnixTimeSeconds();
}

DateTime createdAt = DateTime.ParseExact(post.Legacy.CreatedAt, twitterDateTemplate, new CultureInfo("en-US"));
long postTime = ((DateTimeOffset)createdAt).ToUnixTimeSeconds();
return downloadFromUnixTime <= postTime && postTime < downloadToUnixTime;
}
catch (System.FormatException)
{
DateTime downloadTo = DateTime.ParseExact(Blog.DownloadTo, "yyyyMMdd", CultureInfo.InvariantCulture,
DateTimeStyles.None).AddDays(1);
downloadToUnixTime = new DateTimeOffset(downloadTo).ToUnixTimeSeconds();
throw new FormatException(Resources.BlogValueHasWrongFormat);
}

DateTime createdAt = DateTime.ParseExact(post.Legacy.CreatedAt, twitterDateTemplate, new CultureInfo("en-US"));
long postTime = ((DateTimeOffset)createdAt).ToUnixTimeSeconds();
return downloadFromUnixTime <= postTime && postTime < downloadToUnixTime;
}

private bool CheckPostAge(TimelineTweets response)
Expand Down Expand Up @@ -772,7 +785,7 @@ private async Task AddUrlsToDownloadListAsync(List<Entry> entries)
Logger.Verbose("TwitterCrawler.AddUrlsToDownloadListAsync: {0}", e);
}
}
catch (Exception e)
catch (Exception e) when (!(e is FormatException))
{
Logger.Error("TwitterCrawler.AddUrlsToDownloadListAsync: {0}", e);
ShellService.ShowError(e, "{0}: Error parsing tweet!", Blog.Name);
Expand Down
14 changes: 14 additions & 0 deletions src/TumblThree/TumblThree.Applications/ErrorExceptions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -115,4 +115,18 @@ public APIException(string message, Exception innerException) : base(message, in

protected APIException(SerializationInfo info, StreamingContext context) : base(info, context) { }
}

[Serializable]
public class FormatException : Exception
{
public FormatException(Exception innerException) : base(innerException?.Message, innerException) { }

public FormatException() { }

public FormatException(string message) : base(message) { }

public FormatException(string message, Exception innerException) : base(message, innerException) { }

protected FormatException(SerializationInfo info, StreamingContext context) : base(info, context) { }
}
}

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -507,4 +507,7 @@ Trotzdem fortfahren und schließen?</value>
<data name="NotLoggedInX" xml:space="preserve">
<value>Sie müssen sich bei x.com anmelden zum Laden von {0}. Dazu in den Einstellungen-&gt;Authentifizieren klicken.</value>
</data>
<data name="BlogValueHasWrongFormat" xml:space="preserve">
<value>Ein auf der Detailseite eingegebener Wert hat das falsche Format.</value>
</data>
</root>
Original file line number Diff line number Diff line change
Expand Up @@ -501,4 +501,7 @@ De lo contrario, debe cambiar al menos las entradas de ubicación "Descargar" y
<data name="NotLoggedInX" xml:space="preserve">
<value>Debes iniciar sesión en x.com para descargar {0}. Vaya a Configuración-&gt; Autenticar.</value>
</data>
<data name="BlogValueHasWrongFormat" xml:space="preserve">
<value>Un valor ingresado en la página de detalles tiene el formato incorrecto.</value>
</data>
</root>
Original file line number Diff line number Diff line change
Expand Up @@ -519,4 +519,7 @@ Continuer à fermer quand même ?</value>
<data name="NotLoggedInX" xml:space="preserve">
<value>Vous devez vous connecter à x.com pour télécharger {0}. Accédez aux Paramètres-&gt; Authentification.</value>
</data>
<data name="BlogValueHasWrongFormat" xml:space="preserve">
<value>Une valeur saisie sur la page de détails a un format incorrect.</value>
</data>
</root>
Loading

0 comments on commit cd99347

Please sign in to comment.