Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Changes for netdata #15

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
232 changes: 3 additions & 229 deletions analyze/analyze.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ import (
"strconv"
"time"

"github.com/spencerkimball/stargazers/fetch"
"github.com/netdata/stargazers/fetch"
)

const (
Expand All @@ -49,22 +49,6 @@ func (slice Stargazers) Swap(i, j int) {
slice[i], slice[j] = slice[j], slice[i]
}

type Contributors []*fetch.Stargazer

func (slice Contributors) Len() int {
return len(slice)
}

func (slice Contributors) Less(i, j int) bool {
iC, _, _ := slice[i].TotalCommits()
jC, _, _ := slice[j].TotalCommits()
return iC > jC /* descending order */
}

func (slice Contributors) Swap(i, j int) {
slice[i], slice[j] = slice[j], slice[i]
}

type RepoCount struct {
name string
count int
Expand All @@ -89,18 +73,6 @@ func RunAll(c *fetch.Context, sg []*fetch.Stargazer, rs map[string]*fetch.Repo)
if err := RunCumulativeStars(c, sg); err != nil {
return err
}
if err := RunCorrelatedRepos(c, "starred", sg, rs); err != nil {
return err
}
if err := RunCorrelatedRepos(c, "subscribed", sg, rs); err != nil {
return err
}
if err := RunFollowers(c, sg); err != nil {
return err
}
if err := RunCommitters(c, sg, rs); err != nil {
return err
}
if err := RunAttributesByTime(c, sg, rs); err != nil {
return err
}
Expand Down Expand Up @@ -163,89 +135,6 @@ func RunCumulativeStars(c *fetch.Context, sg []*fetch.Stargazer) error {
return nil
}

// RunCorrelatedRepos creates a map from repo name to count of
// repos for repo lists of each stargazer.
func RunCorrelatedRepos(c *fetch.Context, listType string, sg []*fetch.Stargazer, rs map[string]*fetch.Repo) error {
log.Printf("running correlated starred repos analysis")

// Open file and prepare.
f, err := createFile(c, fmt.Sprintf("correlated_%s_repos.csv", listType))
if err != nil {
return fmt.Errorf("failed to create file: %s", err)
}
defer f.Close()
w := csv.NewWriter(f)
if err := w.Write([]string{"Repository", "URL", "Count", "Committers", "Commits", "Additions", "Deletions"}); err != nil {
return fmt.Errorf("failed to write to CSV: %s", err)
}
// Compute counts.
counts := map[string]int{}
for _, s := range sg {
repos := s.Starred
if listType == "subscribed" {
repos = s.Subscribed
}
for _, rName := range repos {
counts[rName]++
}
}
// Sort repos by count.
repos := RepoCounts{}
for rName, count := range counts {
repos = append(repos, &RepoCount{name: rName, count: count})
}
sort.Sort(repos)
// Output repos by count (respecting minimum threshold).
for i, r := range repos {
if i > nMostCorrelated {
break
}
c, a, d := rs[r.name].TotalCommits()
url := fmt.Sprintf("https://github.com/%s", rs[r.name].FullName)
if err := w.Write([]string{r.name, url, strconv.Itoa(r.count), strconv.Itoa(len(rs[r.name].Statistics)),
strconv.Itoa(c), strconv.Itoa(a), strconv.Itoa(d)}); err != nil {
return fmt.Errorf("failed to write to CSV: %s", err)
}
}
w.Flush()
log.Printf("wrote correlated %s repos analysis to %s", listType, f.Name())

// Open histogram file.
fHist, err := createFile(c, fmt.Sprintf("correlated_%s_repos_hist.csv", listType))
if err != nil {
return fmt.Errorf("failed to create file: %s", err)
}
defer fHist.Close()
wHist := csv.NewWriter(fHist)
if err := wHist.Write([]string{"Correlation", "Count"}); err != nil {
return fmt.Errorf("failed to write to CSV: %s", err)
}
lastCorrelation := 0
count := 0
for _, r := range repos {
if lastCorrelation != r.count {
if count > 0 {
if err := wHist.Write([]string{strconv.Itoa(lastCorrelation), strconv.Itoa(count)}); err != nil {
return fmt.Errorf("failed to write to CSV: %s", err)
}
}
lastCorrelation = r.count
count = 1
} else {
count++
}
}
if count > 0 {
if err := wHist.Write([]string{strconv.Itoa(lastCorrelation), strconv.Itoa(count)}); err != nil {
return fmt.Errorf("failed to write to CSV: %s", err)
}
}
wHist.Flush()
log.Printf("wrote correlated %s repos histogram to %s", listType, fHist.Name())

return nil
}

// RunFollowers computes the size of follower networks, as well as
// the count of shared followers.
func RunFollowers(c *fetch.Context, sg []*fetch.Stargazer) error {
Expand All @@ -258,7 +147,7 @@ func RunFollowers(c *fetch.Context, sg []*fetch.Stargazer) error {
}
defer f.Close()
w := csv.NewWriter(f)
if err := w.Write([]string{"Name", "Login", "URL", "Avatar URL", "Company", "Location", "Followers", "Shared Followers"}); err != nil {
if err := w.Write([]string{"Email", "Name", "Login", "URL", "Avatar URL", "Company", "Location", "Followers", "Shared Followers"}); err != nil {
return fmt.Errorf("failed to write to CSV: %s", err)
}

Expand All @@ -279,7 +168,7 @@ func RunFollowers(c *fetch.Context, sg []*fetch.Stargazer) error {
}
}
url := fmt.Sprintf("https://github.com/%s", s.Login)
if err := w.Write([]string{s.Name, s.Login, url, s.AvatarURL, s.Company, s.Location, strconv.Itoa(s.User.Followers), strconv.Itoa(sharedCount)}); err != nil {
if err := w.Write([]string{s.Email, s.Name, s.Login, url, s.AvatarURL, s.Company, s.Location, strconv.Itoa(s.User.Followers), strconv.Itoa(sharedCount)}); err != nil {
return fmt.Errorf("failed to write to CSV: %s", err)
}
}
Expand All @@ -289,121 +178,6 @@ func RunFollowers(c *fetch.Context, sg []*fetch.Stargazer) error {
return nil
}

// RunCommitters lists stargazers by commits to subscribed repos, from
// most prolific committer to least.
func RunCommitters(c *fetch.Context, sg []*fetch.Stargazer, rs map[string]*fetch.Repo) error {
log.Printf("running committers analysis")

// Open file and prepare.
f, err := createFile(c, "committers.csv")
if err != nil {
return fmt.Errorf("failed to create file: %s", err)
}
defer f.Close()
w := csv.NewWriter(f)
if err := w.Write([]string{"Login", "Email", "Commits", "Additions", "Deletions"}); err != nil {
return fmt.Errorf("failed to write to CSV: %s", err)
}

// Sort the stargazers.
slice := Contributors(sg)
sort.Sort(slice)

// Now accumulate by days.
for _, s := range slice {
c, a, d := s.TotalCommits()
if c == 0 {
break
}
if err := w.Write([]string{s.Login, s.Email, strconv.Itoa(c), strconv.Itoa(a), strconv.Itoa(d)}); err != nil {
return fmt.Errorf("failed to write to CSV: %s", err)
}
}
w.Flush()
log.Printf("wrote committers analysis to %s", f.Name())

return nil
}

// RunCumulativeStars creates a table of date and cumulative
// star count for the provided stargazers.
func RunAttributesByTime(c *fetch.Context, sg []*fetch.Stargazer, rs map[string]*fetch.Repo) error {
log.Printf("running stargazer attributes by time analysis")

// Open file and prepare.
f, err := createFile(c, "attributes_by_time.csv")
if err != nil {
return fmt.Errorf("failed to create file: %s", err)
}
defer f.Close()
w := csv.NewWriter(f)
if err := w.Write([]string{"Date", "New Stars", "Avg Age", "Avg Followers", "Avg Commits"}); err != nil {
return fmt.Errorf("failed to write to CSV: %s", err)
}

output := func(day int64, count, age, followers, commits int) error {
t := time.Unix(day*60*60*24, 0)
avgAge := fmt.Sprintf("%.2f", float64(age)/float64(count))
avgFollowers := fmt.Sprintf("%.2f", float64(followers)/float64(count))
avgCommits := fmt.Sprintf("%.2f", float64(commits)/float64(count))
if err := w.Write([]string{t.Format("01/02/2006"), strconv.Itoa(count), avgAge, avgFollowers, avgCommits}); err != nil {
return fmt.Errorf("failed to write to CSV: %s", err)
}
return nil
}

const daySeconds = 60 * 60 * 24

// Sort the stargazers.
slice := Stargazers(sg)
sort.Sort(slice)

// Accumulation factor means the count of days over which to average each sample.
factor := int64(7) // weekly

// Now accumulate by days.
firstDay := int64(0)
lastDay := int64(0)
count, age, followers, commits := 0, 0, 0, 0
for _, s := range slice {
t, err := time.Parse(time.RFC3339, s.StarredAt)
if err != nil {
return err
}
day := t.Unix() / daySeconds
if firstDay == 0 {
firstDay = day
}
if day != lastDay && (day-firstDay)%factor == 0 {
if count > 0 {
if err := output(lastDay, count, age, followers, commits); err != nil {
return err
}
}
lastDay = day
count = 1
age = int(s.Age() / daySeconds)
followers = len(s.Followers)
commits, _, _ = s.TotalCommits()
} else {
count++
age += int(s.Age() / daySeconds)
followers += len(s.Followers)
c, _, _ := s.TotalCommits()
commits += c
}
}
if count > 0 {
if err := output(lastDay, count, age, followers, commits); err != nil {
return err
}
}
w.Flush()
log.Printf("wrote stargazer attributes by time analysis to %s", f.Name())

return nil
}

func createFile(c *fetch.Context, baseName string) (*os.File, error) {
filename := filepath.Join(c.CacheDir, c.Repo, baseName)
f, err := os.Create(filename)
Expand Down
4 changes: 2 additions & 2 deletions cmd/analyze.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ import (
"errors"
"log"

"github.com/spencerkimball/stargazers/analyze"
"github.com/spencerkimball/stargazers/fetch"
"github.com/netdata/stargazers/analyze"
"github.com/netdata/stargazers/fetch"
"github.com/spf13/cobra"
)

Expand Down
2 changes: 1 addition & 1 deletion cmd/clear.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ import (
"errors"
"log"

"github.com/spencerkimball/stargazers/fetch"
"github.com/netdata/stargazers/fetch"
"github.com/spf13/cobra"
)

Expand Down
2 changes: 1 addition & 1 deletion cmd/fetch.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ import (
"errors"
"log"

"github.com/spencerkimball/stargazers/fetch"
"github.com/netdata/stargazers/fetch"
"github.com/spf13/cobra"
)

Expand Down
4 changes: 3 additions & 1 deletion fetch/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ func getCache(c *Context, req *http.Request) (*http.Response, error) {
return nil, err
}
log.Printf("found %q in response cache", req.URL.String())
log.Printf("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA")

return resp, err
}
Expand Down Expand Up @@ -90,7 +91,8 @@ func putCache(c *Context, req *http.Request, resp *http.Response) error {
// of the configured cache dir, with any access token stripped out.
func cacheEntryFilename(c *Context, url string) string {
newUrl := strings.Replace(url, fmt.Sprintf("access_token=%s", c.Token), "", 1)
return filepath.Join(c.CacheDir, c.Repo, sanitize.BaseName(newUrl))
log.Printf("filepath %s", filepath.Join(c.CacheDir, c.Repo, c.requestType, sanitize.BaseName(newUrl)))
return filepath.Join(c.CacheDir, c.Repo, c.requestType, sanitize.BaseName(newUrl))
}

// clearEntry clears a specified cache entry.
Expand Down
2 changes: 1 addition & 1 deletion fetch/fetch.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ func (e *httpError) Error() string {
}

// linkRE provides parsing of the "Link" HTTP header directive.
var linkRE = regexp.MustCompile(`^<(.*)>; rel="next", <(.*)>; rel="last".*`)
var linkRE = regexp.MustCompile(`^.*<(.*)>; rel="next", <(.*)>; rel="last".*`)

// fetchURL fetches the specified URL. The cache (specified in
// c.CacheDir) is consulted first and if not found, the specified URL
Expand Down
Loading