Skip to content

Commit

Permalink
Write warcinfo records to the redirect WARC file too
Browse files Browse the repository at this point in the history
  • Loading branch information
ato committed Aug 6, 2024
1 parent 6f72765 commit 04ca3e6
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 1 deletion.
4 changes: 4 additions & 0 deletions src/au/gov/nla/httrack2warc/Httrack2Warc.java
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,10 @@ public void convertDirectory(Path sourceDirectory) throws IOException {
Set<String> processedFiles = new HashSet<>();
LinkRewriter linkRewriter = rewriteLinks ? new LinkRewriter(crawl) : null;

if (redirectWriter.warc != warc) {
redirectWriter.warc.writeWarcinfoRecord(UUID.randomUUID(), launchInstant, warcInfo);
}

crawl.forEach(record -> {
if (isUrlExcluded(record.getUrl())) {
log.info("Excluded {}", record.getUrl());
Expand Down
2 changes: 1 addition & 1 deletion src/au/gov/nla/httrack2warc/RedirectWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
*/
public class RedirectWriter implements Closeable {
private final String prefix;
private final WarcWriter warc;
final WarcWriter warc;

public RedirectWriter(String prefix, WarcWriter warc) {
this.prefix = prefix;
Expand Down

0 comments on commit 04ca3e6

Please sign in to comment.