diff --git a/analyze/analyze.go b/analyze/analyze.go index af72cd3..800a3ea 100644 --- a/analyze/analyze.go +++ b/analyze/analyze.go @@ -26,7 +26,7 @@ import ( "strconv" "time" - "github.com/spencerkimball/stargazers/fetch" + "github.com/netdata/stargazers/fetch" ) const ( @@ -49,22 +49,6 @@ func (slice Stargazers) Swap(i, j int) { slice[i], slice[j] = slice[j], slice[i] } -type Contributors []*fetch.Stargazer - -func (slice Contributors) Len() int { - return len(slice) -} - -func (slice Contributors) Less(i, j int) bool { - iC, _, _ := slice[i].TotalCommits() - jC, _, _ := slice[j].TotalCommits() - return iC > jC /* descending order */ -} - -func (slice Contributors) Swap(i, j int) { - slice[i], slice[j] = slice[j], slice[i] -} - type RepoCount struct { name string count int @@ -89,18 +73,6 @@ func RunAll(c *fetch.Context, sg []*fetch.Stargazer, rs map[string]*fetch.Repo) if err := RunCumulativeStars(c, sg); err != nil { return err } - if err := RunCorrelatedRepos(c, "starred", sg, rs); err != nil { - return err - } - if err := RunCorrelatedRepos(c, "subscribed", sg, rs); err != nil { - return err - } - if err := RunFollowers(c, sg); err != nil { - return err - } - if err := RunCommitters(c, sg, rs); err != nil { - return err - } if err := RunAttributesByTime(c, sg, rs); err != nil { return err } @@ -163,89 +135,6 @@ func RunCumulativeStars(c *fetch.Context, sg []*fetch.Stargazer) error { return nil } -// RunCorrelatedRepos creates a map from repo name to count of -// repos for repo lists of each stargazer. -func RunCorrelatedRepos(c *fetch.Context, listType string, sg []*fetch.Stargazer, rs map[string]*fetch.Repo) error { - log.Printf("running correlated starred repos analysis") - - // Open file and prepare. - f, err := createFile(c, fmt.Sprintf("correlated_%s_repos.csv", listType)) - if err != nil { - return fmt.Errorf("failed to create file: %s", err) - } - defer f.Close() - w := csv.NewWriter(f) - if err := w.Write([]string{"Repository", "URL", "Count", "Committers", "Commits", "Additions", "Deletions"}); err != nil { - return fmt.Errorf("failed to write to CSV: %s", err) - } - // Compute counts. - counts := map[string]int{} - for _, s := range sg { - repos := s.Starred - if listType == "subscribed" { - repos = s.Subscribed - } - for _, rName := range repos { - counts[rName]++ - } - } - // Sort repos by count. - repos := RepoCounts{} - for rName, count := range counts { - repos = append(repos, &RepoCount{name: rName, count: count}) - } - sort.Sort(repos) - // Output repos by count (respecting minimum threshold). - for i, r := range repos { - if i > nMostCorrelated { - break - } - c, a, d := rs[r.name].TotalCommits() - url := fmt.Sprintf("https://github.com/%s", rs[r.name].FullName) - if err := w.Write([]string{r.name, url, strconv.Itoa(r.count), strconv.Itoa(len(rs[r.name].Statistics)), - strconv.Itoa(c), strconv.Itoa(a), strconv.Itoa(d)}); err != nil { - return fmt.Errorf("failed to write to CSV: %s", err) - } - } - w.Flush() - log.Printf("wrote correlated %s repos analysis to %s", listType, f.Name()) - - // Open histogram file. - fHist, err := createFile(c, fmt.Sprintf("correlated_%s_repos_hist.csv", listType)) - if err != nil { - return fmt.Errorf("failed to create file: %s", err) - } - defer fHist.Close() - wHist := csv.NewWriter(fHist) - if err := wHist.Write([]string{"Correlation", "Count"}); err != nil { - return fmt.Errorf("failed to write to CSV: %s", err) - } - lastCorrelation := 0 - count := 0 - for _, r := range repos { - if lastCorrelation != r.count { - if count > 0 { - if err := wHist.Write([]string{strconv.Itoa(lastCorrelation), strconv.Itoa(count)}); err != nil { - return fmt.Errorf("failed to write to CSV: %s", err) - } - } - lastCorrelation = r.count - count = 1 - } else { - count++ - } - } - if count > 0 { - if err := wHist.Write([]string{strconv.Itoa(lastCorrelation), strconv.Itoa(count)}); err != nil { - return fmt.Errorf("failed to write to CSV: %s", err) - } - } - wHist.Flush() - log.Printf("wrote correlated %s repos histogram to %s", listType, fHist.Name()) - - return nil -} - // RunFollowers computes the size of follower networks, as well as // the count of shared followers. func RunFollowers(c *fetch.Context, sg []*fetch.Stargazer) error { @@ -258,7 +147,7 @@ func RunFollowers(c *fetch.Context, sg []*fetch.Stargazer) error { } defer f.Close() w := csv.NewWriter(f) - if err := w.Write([]string{"Name", "Login", "URL", "Avatar URL", "Company", "Location", "Followers", "Shared Followers"}); err != nil { + if err := w.Write([]string{"Email", "Name", "Login", "URL", "Avatar URL", "Company", "Location", "Followers", "Shared Followers"}); err != nil { return fmt.Errorf("failed to write to CSV: %s", err) } @@ -279,7 +168,7 @@ func RunFollowers(c *fetch.Context, sg []*fetch.Stargazer) error { } } url := fmt.Sprintf("https://github.com/%s", s.Login) - if err := w.Write([]string{s.Name, s.Login, url, s.AvatarURL, s.Company, s.Location, strconv.Itoa(s.User.Followers), strconv.Itoa(sharedCount)}); err != nil { + if err := w.Write([]string{s.Email, s.Name, s.Login, url, s.AvatarURL, s.Company, s.Location, strconv.Itoa(s.User.Followers), strconv.Itoa(sharedCount)}); err != nil { return fmt.Errorf("failed to write to CSV: %s", err) } } @@ -289,121 +178,6 @@ func RunFollowers(c *fetch.Context, sg []*fetch.Stargazer) error { return nil } -// RunCommitters lists stargazers by commits to subscribed repos, from -// most prolific committer to least. -func RunCommitters(c *fetch.Context, sg []*fetch.Stargazer, rs map[string]*fetch.Repo) error { - log.Printf("running committers analysis") - - // Open file and prepare. - f, err := createFile(c, "committers.csv") - if err != nil { - return fmt.Errorf("failed to create file: %s", err) - } - defer f.Close() - w := csv.NewWriter(f) - if err := w.Write([]string{"Login", "Email", "Commits", "Additions", "Deletions"}); err != nil { - return fmt.Errorf("failed to write to CSV: %s", err) - } - - // Sort the stargazers. - slice := Contributors(sg) - sort.Sort(slice) - - // Now accumulate by days. - for _, s := range slice { - c, a, d := s.TotalCommits() - if c == 0 { - break - } - if err := w.Write([]string{s.Login, s.Email, strconv.Itoa(c), strconv.Itoa(a), strconv.Itoa(d)}); err != nil { - return fmt.Errorf("failed to write to CSV: %s", err) - } - } - w.Flush() - log.Printf("wrote committers analysis to %s", f.Name()) - - return nil -} - -// RunCumulativeStars creates a table of date and cumulative -// star count for the provided stargazers. -func RunAttributesByTime(c *fetch.Context, sg []*fetch.Stargazer, rs map[string]*fetch.Repo) error { - log.Printf("running stargazer attributes by time analysis") - - // Open file and prepare. - f, err := createFile(c, "attributes_by_time.csv") - if err != nil { - return fmt.Errorf("failed to create file: %s", err) - } - defer f.Close() - w := csv.NewWriter(f) - if err := w.Write([]string{"Date", "New Stars", "Avg Age", "Avg Followers", "Avg Commits"}); err != nil { - return fmt.Errorf("failed to write to CSV: %s", err) - } - - output := func(day int64, count, age, followers, commits int) error { - t := time.Unix(day*60*60*24, 0) - avgAge := fmt.Sprintf("%.2f", float64(age)/float64(count)) - avgFollowers := fmt.Sprintf("%.2f", float64(followers)/float64(count)) - avgCommits := fmt.Sprintf("%.2f", float64(commits)/float64(count)) - if err := w.Write([]string{t.Format("01/02/2006"), strconv.Itoa(count), avgAge, avgFollowers, avgCommits}); err != nil { - return fmt.Errorf("failed to write to CSV: %s", err) - } - return nil - } - - const daySeconds = 60 * 60 * 24 - - // Sort the stargazers. - slice := Stargazers(sg) - sort.Sort(slice) - - // Accumulation factor means the count of days over which to average each sample. - factor := int64(7) // weekly - - // Now accumulate by days. - firstDay := int64(0) - lastDay := int64(0) - count, age, followers, commits := 0, 0, 0, 0 - for _, s := range slice { - t, err := time.Parse(time.RFC3339, s.StarredAt) - if err != nil { - return err - } - day := t.Unix() / daySeconds - if firstDay == 0 { - firstDay = day - } - if day != lastDay && (day-firstDay)%factor == 0 { - if count > 0 { - if err := output(lastDay, count, age, followers, commits); err != nil { - return err - } - } - lastDay = day - count = 1 - age = int(s.Age() / daySeconds) - followers = len(s.Followers) - commits, _, _ = s.TotalCommits() - } else { - count++ - age += int(s.Age() / daySeconds) - followers += len(s.Followers) - c, _, _ := s.TotalCommits() - commits += c - } - } - if count > 0 { - if err := output(lastDay, count, age, followers, commits); err != nil { - return err - } - } - w.Flush() - log.Printf("wrote stargazer attributes by time analysis to %s", f.Name()) - - return nil -} - func createFile(c *fetch.Context, baseName string) (*os.File, error) { filename := filepath.Join(c.CacheDir, c.Repo, baseName) f, err := os.Create(filename) diff --git a/cmd/analyze.go b/cmd/analyze.go index b69fd52..4d64cc2 100644 --- a/cmd/analyze.go +++ b/cmd/analyze.go @@ -20,8 +20,8 @@ import ( "errors" "log" - "github.com/spencerkimball/stargazers/analyze" - "github.com/spencerkimball/stargazers/fetch" + "github.com/netdata/stargazers/analyze" + "github.com/netdata/stargazers/fetch" "github.com/spf13/cobra" ) diff --git a/cmd/clear.go b/cmd/clear.go index 9bcdcaf..d4d95bd 100644 --- a/cmd/clear.go +++ b/cmd/clear.go @@ -20,7 +20,7 @@ import ( "errors" "log" - "github.com/spencerkimball/stargazers/fetch" + "github.com/netdata/stargazers/fetch" "github.com/spf13/cobra" ) diff --git a/cmd/fetch.go b/cmd/fetch.go index 1ac3120..61b8d23 100644 --- a/cmd/fetch.go +++ b/cmd/fetch.go @@ -20,7 +20,7 @@ import ( "errors" "log" - "github.com/spencerkimball/stargazers/fetch" + "github.com/netdata/stargazers/fetch" "github.com/spf13/cobra" ) diff --git a/fetch/cache.go b/fetch/cache.go index 7f72771..9c120e0 100644 --- a/fetch/cache.go +++ b/fetch/cache.go @@ -48,6 +48,7 @@ func getCache(c *Context, req *http.Request) (*http.Response, error) { return nil, err } log.Printf("found %q in response cache", req.URL.String()) + log.Printf("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA") return resp, err } @@ -90,7 +91,8 @@ func putCache(c *Context, req *http.Request, resp *http.Response) error { // of the configured cache dir, with any access token stripped out. func cacheEntryFilename(c *Context, url string) string { newUrl := strings.Replace(url, fmt.Sprintf("access_token=%s", c.Token), "", 1) - return filepath.Join(c.CacheDir, c.Repo, sanitize.BaseName(newUrl)) + log.Printf("filepath %s", filepath.Join(c.CacheDir, c.Repo, c.requestType, sanitize.BaseName(newUrl))) + return filepath.Join(c.CacheDir, c.Repo, c.requestType, sanitize.BaseName(newUrl)) } // clearEntry clears a specified cache entry. diff --git a/fetch/fetch.go b/fetch/fetch.go index e1c2b10..4a111ea 100644 --- a/fetch/fetch.go +++ b/fetch/fetch.go @@ -59,7 +59,7 @@ func (e *httpError) Error() string { } // linkRE provides parsing of the "Link" HTTP header directive. -var linkRE = regexp.MustCompile(`^<(.*)>; rel="next", <(.*)>; rel="last".*`) +var linkRE = regexp.MustCompile(`^.*<(.*)>; rel="next", <(.*)>; rel="last".*`) // fetchURL fetches the specified URL. The cache (specified in // c.CacheDir) is consulted first and if not found, the specified URL diff --git a/fetch/query.go b/fetch/query.go index 7016dfa..40ef439 100644 --- a/fetch/query.go +++ b/fetch/query.go @@ -24,7 +24,6 @@ import ( "os" "path/filepath" "strconv" - "strings" "time" ) @@ -51,6 +50,8 @@ type Context struct { CacheDir string // Cache directory acceptHeader string // Optional Accept: header value + + requestType string // Current request type (easiest way to add subdirs to the cached files) } type User struct { @@ -159,35 +160,10 @@ type Repo struct { Statistics map[string]*Contribution `json:"statistics"` } -// meetsThresholds returns whether the repo meets any of the minimal -// thresholds to qualify for contributor statistics querying. -func (r *Repo) meetsThresholds() bool { - return r.StargazersCount > minStargazers || r.ForksCount > minForks || r.OpenIssues > minOpenIssues -} - -// TotalCommits returns the total commits as well as additions -// and deletions. -func (r *Repo) TotalCommits() (int, int, int) { - c, a, d := 0, 0, 0 - for _, contrib := range r.Statistics { - c += contrib.Commits - a += contrib.Additions - d += contrib.Deletions - } - return c, a, d -} - // Stargazer holds all information and further query URLs for a stargazer. type Stargazer struct { User `json:"user"` StarredAt string `json:"starred_at"` - - Followers []*User `json:"follower_list"` - Starred []string `json:"starred"` // Slice of repos by full name - Subscribed []string `json:"subscribed"` // Slice of repos by full name - - // Contributions to subscribed repos (by repo FullName). - Contributions map[string]*Contribution `json:"contributions"` } // Age returns the age (time from current time to created at @@ -202,50 +178,23 @@ func (s *Stargazer) Age() int64 { return curDay - createT.Unix() } -// TotalCommits returns the total commits as well as additions and -// deletions, ranged over all tracked contributions. -func (s *Stargazer) TotalCommits() (int, int, int) { - c, a, d := 0, 0, 0 - for _, contrib := range s.Contributions { - c += contrib.Commits - a += contrib.Additions - d += contrib.Deletions - } - return c, a, d -} - // QueryAll recursively descends into GitHub API endpoints, starting // with the list of stargazers for the repo. func QueryAll(c *Context) error { + // Unique map of repos by repo full name. + rs := map[string]*Repo{} + // Query all stargazers for the repo. + c.requestType = "stargazers" sg, err := QueryStargazers(c) if err != nil { return err } // Query stargazer user info for all stargazers. + c.requestType = "userinfo" if err = QueryUserInfo(c, sg); err != nil { return err } - // Query followers for all stargazers. - if err = QueryFollowers(c, sg); err != nil { - return err - } - - // Unique map of repos by repo full name. - rs := map[string]*Repo{} - - // Query starred repos for all stargazers. - if err = QueryStarred(c, sg, rs); err != nil { - return err - } - // Query subscribed repos for all stargazers. - if err = QuerySubscribed(c, sg, rs); err != nil { - return err - } - // Query contributions to subscribed repos for all stargazers. - if err = QueryContributions(c, sg, rs); err != nil { - return err - } return SaveState(c, sg, rs) } @@ -286,164 +235,6 @@ func QueryUserInfo(c *Context, sg []*Stargazer) error { return nil } -// QueryFollowers queries each stargazers list of followers. -func QueryFollowers(c *Context, sg []*Stargazer) error { - log.Printf("querying followers for each of %s stargazers...", format(len(sg))) - total := 0 - fmt.Printf("*** 0 followers for 0 stargazers") - uniqueFollowers := map[int]struct{}{} - for i, s := range sg { - var err error - url := fmt.Sprintf("%s", s.FollowersURL) - for len(url) > 0 { - fetched := []*User{} - url, err = fetchURL(c, url, &fetched, false /* don't refresh followers */) - if err != nil { - return err - } - for _, u := range fetched { - uniqueFollowers[u.ID] = struct{}{} - } - s.Followers = append(s.Followers, fetched...) - total += len(fetched) - fmt.Printf("\r*** %s followers (%s unique) for %s stargazers", - format(total), format(len(uniqueFollowers)), format(i+1)) - } - } - fmt.Printf("\n") - return nil -} - -// QueryStarred queries all starred repos for each stargazer. -func QueryStarred(c *Context, sg []*Stargazer, rs map[string]*Repo) error { - log.Printf("querying starred repos for each of %s stargazers...", format(len(sg))) - starred := 0 - fmt.Printf("*** 0 starred repos for 0 stargazers") - uniqueStarred := map[int]struct{}{} - for i, s := range sg { - var err error - url := s.StarredURL - url = strings.Replace(url, "{/owner}{/repo}", "", 1) - for len(url) > 0 && len(s.Starred) < maxStarred { - fetched := []*Repo{} - url, err = fetchURL(c, url, &fetched, false /* don't refresh starred repos */) - if err != nil { - return err - } - for _, r := range fetched { - if _, ok := rs[r.FullName]; !ok { - rs[r.FullName] = r - } - uniqueStarred[r.ID] = struct{}{} - s.Starred = append(s.Starred, r.FullName) - } - starred += len(fetched) - fmt.Printf("\r*** %s starred repos (%s unique) for %s stargazers", - format(starred), format(len(uniqueStarred)), format(i+1)) - } - } - fmt.Printf("\n") - return nil -} - -// QuerySubscribed queries all subscribed repos for each stargazer. -func QuerySubscribed(c *Context, sg []*Stargazer, rs map[string]*Repo) error { - log.Printf("querying subscribed repos for each of %s stargazers...", format(len(sg))) - subscribed := 0 - fmt.Printf("*** 0 subscribed repos for 0 stargazers") - uniqueSubscribed := map[int]struct{}{} - for i, s := range sg { - var err error - url := s.SubscriptionsURL - for len(url) > 0 && len(s.Subscribed) < maxSubscribed { - fetched := []*Repo{} - url, err = fetchURL(c, url, &fetched, false /* don't refresh subscribed repos */) - if err != nil { - return err - } - for _, r := range fetched { - if _, ok := rs[r.FullName]; !ok { - rs[r.FullName] = r - } - uniqueSubscribed[r.ID] = struct{}{} - s.Subscribed = append(s.Subscribed, r.FullName) - } - subscribed += len(fetched) - fmt.Printf("\r*** %s subscribed repos (%s unique) for %s stargazers", - format(subscribed), format(len(uniqueSubscribed)), format(i+1)) - } - } - fmt.Printf("\n") - return nil -} - -// QueryContributions queries all contributions to subscribed repos -// for each stargazer. -func QueryContributions(c *Context, sg []*Stargazer, rs map[string]*Repo) error { - log.Printf("querying contributions to subscribed repos for each of %s stargazers...", format(len(sg))) - authors := map[string]struct{}{} - for _, s := range sg { - authors[s.Login] = struct{}{} - } - commits := 0 - subscribed := 0 - qualifying := 0 - uniqueRepos := map[int]struct{}{} - fmt.Printf("*** 0 commits from 0 repos (0 qual, 0 total) for 0 stargazers") - for i, s := range sg { - for _, rName := range s.Subscribed { - r, ok := rs[rName] - if !ok { - log.Fatalf("missing %s repo", rName) - } - subscribed++ - if !r.meetsThresholds() { - continue - } - if _, ok := uniqueRepos[r.ID]; !ok { - uniqueRepos[r.ID] = struct{}{} - } - qualifying++ - if r.Statistics == nil { - if err := QueryStatistics(c, r, authors); err != nil { - return err - } - } - if contrib, ok := r.Statistics[s.Login]; ok { - commits += int(contrib.Commits) - if s.Contributions == nil { - s.Contributions = map[string]*Contribution{} - } - s.Contributions[r.FullName] = contrib - } - fmt.Printf("\r*** %s commits from %s repos (%s qual, %s total) for %s stargazers", - format(commits), format(len(uniqueRepos)), format(qualifying), format(subscribed), format(i+1)) - } - } - fmt.Printf("\n") - return nil -} - -// QueryStatistics queries contributor stats for the specified repo. -func QueryStatistics(c *Context, r *Repo, authors map[string]struct{}) error { - r.Statistics = map[string]*Contribution{} - var err error - url := fmt.Sprintf("%srepos/%s/stats/contributors", githubAPI, r.FullName) - for len(url) > 0 { - fetched := []*Contributor{} - url, err = fetchURL(c, url, &fetched, false /* don't refresh */) - if err != nil { - return err - } - for _, c := range fetched { - if _, ok := authors[c.Author.Login]; ok { - r.Statistics[c.Author.Login] = makeContribution(c) - } - } - } - return nil -} - // SaveState writes all queried stargazer and repo data. func SaveState(c *Context, sg []*Stargazer, rs map[string]*Repo) error { log.Printf("saving state") diff --git a/main.go b/main.go index b63087f..80accc6 100644 --- a/main.go +++ b/main.go @@ -23,7 +23,7 @@ import ( "reflect" "strings" - "github.com/spencerkimball/stargazers/cmd" + "github.com/netdata/stargazers/cmd" "github.com/spf13/cobra" "github.com/spf13/cobra/doc" )