From e6e1768d7ccc6dce4a4c09e74889ffedb78710e8 Mon Sep 17 00:00:00 2001 From: Mm2PL Date: Fri, 27 Jan 2023 22:28:54 +0100 Subject: [PATCH 1/3] Make it use /list endpoint --- cmd/justgrep/justgrep.go | 52 ++++++--- justlog_api.go | 243 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 280 insertions(+), 15 deletions(-) diff --git a/cmd/justgrep/justgrep.go b/cmd/justgrep/justgrep.go index 0cfd069..4773a6a 100644 --- a/cmd/justgrep/justgrep.go +++ b/cmd/justgrep/justgrep.go @@ -348,7 +348,7 @@ func main() { api = &justgrep.UserJustlogAPI{User: (*args.user)[1:], Channel: channel, URL: justlogUrl, IsId: true} } else { api = &justgrep.UserJustlogAPI{User: *args.user, Channel: channel, URL: justlogUrl} - } + } } else { api = &justgrep.ChannelJustlogAPI{Channel: channel, URL: justlogUrl} } @@ -419,22 +419,42 @@ func searchLogs( ) { nextDate := args.endTime ctx, cancel := context.WithCancel(context.Background()) + defer cancel() var channel string - step := api.GetApproximateOffset() switch api.(type) { default: channel = fmt.Sprintf("[unknown] (%t)", api) - step = time.Hour * 24 case *justgrep.UserJustlogAPI: channel = api.(*justgrep.UserJustlogAPI).Channel case *justgrep.ChannelJustlogAPI: channel = api.(*justgrep.ChannelJustlogAPI).Channel } - totalSteps := float64(args.endTime.Sub(args.startTime) / step) + availableLogs, err := api.GetAvailableLogs(ctx, &httpClient) + if err != nil { + u, _ := url.Parse(*args.url) + fmt.Fprintf( + os.Stderr, + "Failed to fetch available logs on %s: %s", + u.Redacted(), + err, + ) + os.Exit(1) + } - defer cancel() - for { - stepsLeft := float64(nextDate.Sub(args.startTime) / step) + toFetch, err := availableLogs.Snip(args.startTime, args.endTime) + if err != nil { + u, _ := url.Parse(*args.url) + fmt.Fprintf( + os.Stderr, + "Host %s returned a malformed response for logs: %s", + u.Redacted(), + err, + ) + } + + totalSteps := len(toFetch) + for i, entry := range toFetch { + stepsLeft := totalSteps - i if *args.verbose { nowTime := time.Now() timeTaken := float64(nowTime.Sub(progress.BeginTime) / time.Second) @@ -447,8 +467,8 @@ func searchLogs( "Processed %.2f MB (%d lines and counting)\n", progress.TotalResults[justgrep.ResultOk], channel, - nextDate.Format("2006-01-02"), - makeProgressBar(totalSteps, stepsLeft), + entry.ToDate().Format("2006-01-02"), + makeProgressBar(float64(totalSteps), float64(stepsLeft)), progress.CountLines/int(timeTaken), float64(progress.CountBytes/1000/1000)/timeTaken, @@ -463,15 +483,21 @@ func searchLogs( Found: progress.TotalResults[justgrep.ResultOk], Channel: channel, NextDate: nextDate.Format(time.RFC3339), - TotalSteps: totalSteps, - LeftSteps: stepsLeft, + TotalSteps: float64(totalSteps), + LeftSteps: float64(stepsLeft), Progress: *progress, }, ) } download := make(chan *justgrep.Message) - var err error - nextDate, err = justgrep.FetchForDate(ctx, api, nextDate, download, progress, &httpClient) + err = justgrep.FetchForLogEntry( + ctx, + api, + entry, + download, + progress, + &httpClient, + ) if err != nil { if *args.progressJson { _ = json.NewEncoder(os.Stderr).Encode( diff --git a/justlog_api.go b/justlog_api.go index 2fb8a55..37ed792 100644 --- a/justlog_api.go +++ b/justlog_api.go @@ -6,15 +6,25 @@ import ( "encoding/json" "errors" "fmt" + "io" + "log" "net/http" + "net/url" "os" + "strconv" + "strings" "time" ) type JustlogAPI interface { MakeURL(date time.Time) string + NextLogFile(currentDate time.Time) time.Time + GetApproximateOffset() time.Duration + + // GetAvailableLogs fetches logs available from justlog + GetAvailableLogs(ctx context.Context, client *http.Client) (LogsList, error) } type ProgressState struct { @@ -77,8 +87,8 @@ func FetchForDate( progress *ProgressState, client *http.Client, ) (time.Time, error) { - url := api.MakeURL(date) - err := fetch(ctx, url, client, output, progress) + u := api.MakeURL(date) + err := fetch(ctx, u, client, output, progress) if err != nil { return time.Time{}, err } else { @@ -86,6 +96,18 @@ func FetchForDate( } } +func FetchForLogEntry( + ctx context.Context, + api JustlogAPI, + logs AvailableLogEntry, + output chan *Message, + progress *ProgressState, + client *http.Client, +) error { + _, err := FetchForDate(ctx, api, logs.ToDate(), output, progress, client) + return err +} + type UserJustlogAPI struct { JustlogAPI @@ -175,6 +197,223 @@ func GetChannelsFromJustLog(ctx context.Context, client *http.Client, url string return channels, nil } +// AvailableLogEntry describes an element from justlog's /list api array +type AvailableLogEntry struct { + RawYear string `json:"year"` + RawMonth string `json:"month"` + + // Only for /list without a user + RawDay string `json:"day"` + + Year int + Month int + + // As with RawDay, Day only makes sense for non-user logs, otherwise will be 0 + Day int +} + +func (l *AvailableLogEntry) Parse() error { + if l.Year != 0 { + return nil + } + year, err := strconv.ParseInt(l.RawYear, 10, 64) + if err != nil { + return err + } + month, err := strconv.ParseInt(l.RawMonth, 10, 64) + if err != nil { + return err + } + l.Year = int(year) + l.Month = int(month) + if l.RawDay != "" { + day, err := strconv.ParseInt(l.RawDay, 10, 64) + if err != nil { + return err + } + l.Day = int(day) + } + return nil +} + +// ToDate converts the AvailableLogEntry to a time.Time with 0 seconds past midnight on the day (or the first of the month). +// It may panic if Parse() wasn't called before and parsing fails +func (l *AvailableLogEntry) ToDate() time.Time { + if err := l.Parse(); err != nil { + log.Panicf( + "Unexpectidly errored while converting a log entry to a date: %s, "+ + "call justgrep.AvailableLogEntry.Parse() explicitly to avoid this", + err, + ) + } + day := l.Day + if l.Day == 0 { + day = 1 + } + return time.Date( + l.Year, + time.Month(l.Month), + day, + 0, + 0, + 0, + 0, + time.UTC, + ) +} + +type availableLogsResponse struct { + AvailableLogs []AvailableLogEntry `json:"availableLogs"` +} +type LogsList []AvailableLogEntry + +func (l LogsList) EnsureParsed() error { + for i, logs := range l { + err := logs.Parse() + if err != nil { + return err + } + // copy by value in iterator? + l[i] = logs + } + return nil +} + +func (l LogsList) Snip(early time.Time, late time.Time) (LogsList, error) { + if early.After(late) { + log.Panicf("THIS SHOULD NOT HAPPEN, early > late: %#v > %#v!!!", early, late) + } + err := l.EnsureParsed() + if err != nil { + return nil, err + } + out := LogsList{} + + for _, logs := range l { + if logs.RawDay != "" { // will not be present on non-user requests + dayBegin := time.Date( + logs.Year, + time.Month(logs.Month), + logs.Day, + 0, + 0, + 0, + 0, + time.UTC, + ) + dayEnd := dayBegin.AddDate(0, 0, 1).Add(-time.Second) + + if fitsInRange(dayBegin, early, late) || fitsInRange(dayEnd, early, late) { + out = append(out, logs) + } + } else { + monthBegin := time.Date( + logs.Year, + time.Month(logs.Month), + 1, + 0, + 0, + 0, + 0, + time.UTC, + ) + monthEnd := monthBegin.AddDate(0, 1, 0).Add(-time.Second) + if fitsInRange(late, monthBegin, monthEnd) || + fitsInRange(early, monthBegin, monthEnd) || + fitsInRange(monthEnd, early, late) { + out = append(out, logs) + } + } + } + return out, nil +} + +// fitsInRange checks if early < t < late +func fitsInRange(t time.Time, early time.Time, late time.Time) bool { + if t.After(late) { + return false + } + if t.Before(early) { + return false + } + return true +} + +func (api ChannelJustlogAPI) GetAvailableLogs(ctx context.Context, client *http.Client) (LogsList, error) { + u, err := url.Parse(api.URL) + if err != nil { + return nil, err + } + list, _ := u.Parse("/list") + q := list.Query() + q.Add("channel", api.Channel) + + list.RawQuery = q.Encode() + + req, err := http.NewRequestWithContext(ctx, "GET", list.String(), nil) + if err != nil { + return nil, err + } + req.Header.Set("User-Agent", UserAgent) + resp, err := client.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + if resp.StatusCode != 200 { + errorBytes, _ := io.ReadAll(resp.Body) + errorMsg := string(errorBytes) + return nil, fmt.Errorf("%s: %s", resp.Status, strings.Trim(errorMsg, "\r\n")) + } + output := availableLogsResponse{} + err = json.NewDecoder(resp.Body).Decode(&output) + if err != nil { + return nil, err + } + return output.AvailableLogs, nil + +} + +func (api UserJustlogAPI) GetAvailableLogs(ctx context.Context, client *http.Client) (LogsList, error) { + u, err := url.Parse(api.URL) + if err != nil { + return nil, err + } + list, _ := u.Parse("/list") + q := list.Query() + q.Add("channel", api.Channel) + if api.IsId { + q.Add("userid", api.User) + } else { + q.Add("user", api.User) + } + + list.RawQuery = q.Encode() + + req, err := http.NewRequestWithContext(ctx, "GET", list.String(), nil) + if err != nil { + return nil, err + } + req.Header.Set("User-Agent", UserAgent) + resp, err := client.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + if resp.StatusCode != 200 { + errorBytes, _ := io.ReadAll(resp.Body) + errorMsg := string(errorBytes) + return nil, fmt.Errorf("%s: %s", resp.Status, strings.Trim(errorMsg, "\r\n")) + } + output := availableLogsResponse{} + err = json.NewDecoder(resp.Body).Decode(&output) + if err != nil { + return nil, err + } + return output.AvailableLogs, nil + +} + func (api ChannelJustlogAPI) GetApproximateOffset() time.Duration { return time.Hour * 24 } From 17a3dd6da5cb1f016252ff6b11dc4fba8028108e Mon Sep 17 00:00:00 2001 From: Mm2PL Date: Fri, 27 Jan 2023 22:51:13 +0100 Subject: [PATCH 2/3] Add documentation for JustlogAPI interface --- justlog_api.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/justlog_api.go b/justlog_api.go index 37ed792..de630d6 100644 --- a/justlog_api.go +++ b/justlog_api.go @@ -17,10 +17,15 @@ import ( ) type JustlogAPI interface { + // MakeURL creates a URL to download the data from justlog MakeURL(date time.Time) string + // NextLogFile is deprecated. It returns currentDate.Add(api.GetApproximateOffset) NextLogFile(currentDate time.Time) time.Time + // GetApproximateOffset describes roughly how often new files are made in justlog for this api. + // This function shouldn't be treated as anything more than a UI suggestion, use GetAvailableLogs for precise data + // instead GetApproximateOffset() time.Duration // GetAvailableLogs fetches logs available from justlog From 53c91aa0df349a79c99a2427d85b9f9851fa0c47 Mon Sep 17 00:00:00 2001 From: Mm2PL Date: Fri, 27 Jan 2023 22:53:01 +0100 Subject: [PATCH 3/3] Add basic test for LogsList.Snip() --- justlog_api_test.go | 112 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 justlog_api_test.go diff --git a/justlog_api_test.go b/justlog_api_test.go new file mode 100644 index 0000000..01e1a19 --- /dev/null +++ b/justlog_api_test.go @@ -0,0 +1,112 @@ +package justgrep + +import ( + "testing" + "time" +) + +func TestLogsList_Snip(t *testing.T) { + l := LogsList{ + AvailableLogEntry{ + RawYear: "2022", + RawMonth: "12", + }, + AvailableLogEntry{ + RawYear: "2022", + RawMonth: "11", + }, + AvailableLogEntry{ + RawYear: "2022", + RawMonth: "10", + }, + AvailableLogEntry{ + RawYear: "2022", + RawMonth: "9", + }, + AvailableLogEntry{ + RawYear: "2022", + RawMonth: "8", + }, + AvailableLogEntry{ + RawYear: "2022", + RawMonth: "7", + }, + AvailableLogEntry{ + RawYear: "2022", + RawMonth: "6", + }, + AvailableLogEntry{ + RawYear: "2022", + RawMonth: "5", + }, + AvailableLogEntry{ + RawYear: "2022", + RawMonth: "4", + }, + AvailableLogEntry{ + RawYear: "2022", + RawMonth: "3", + }, + AvailableLogEntry{ + RawYear: "2022", + RawMonth: "2", + }, + AvailableLogEntry{ + RawYear: "2022", + RawMonth: "1", + }, + } + err := l.EnsureParsed() + assert(t, "err", err, nil) + + have, err := l.Snip( + time.Date(2022, 4, 1, 0, 0, 0, 0, time.UTC), + time.Date(2022, 10, 1, 0, 0, 0, 0, time.UTC), + ) + assert(t, "err", err, nil) + expect := LogsList{ + AvailableLogEntry{ + RawYear: "2022", + RawMonth: "10", + }, + AvailableLogEntry{ + RawYear: "2022", + RawMonth: "9", + }, + AvailableLogEntry{ + RawYear: "2022", + RawMonth: "8", + }, + AvailableLogEntry{ + RawYear: "2022", + RawMonth: "7", + }, + AvailableLogEntry{ + RawYear: "2022", + RawMonth: "6", + }, + AvailableLogEntry{ + RawYear: "2022", + RawMonth: "5", + }, + AvailableLogEntry{ + RawYear: "2022", + RawMonth: "4", + }, + } + expect.EnsureParsed() + if len(have) != len(expect) { + t.Errorf( + "assertion on LogsListSnip failed: length doesn't match, have %d, expected %d: %q vs %q", + len(have), + len(expect), + have, + expect, + ) + } + for i, elem := range have { + if elem != expect[i] { + t.Errorf("assertion on LogsListSnip[%d] failed: have %#v, expected %#v", i, elem, expect[i]) + } + } +}