diff --git a/tools/src/cmd/git-stats/main.go b/tools/src/cmd/git-stats/main.go new file mode 100644 index 0000000000..f97b60441c --- /dev/null +++ b/tools/src/cmd/git-stats/main.go @@ -0,0 +1,395 @@ +// Copyright 2022 The Tint Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// git-stats gathers statistics about changes made to a git repo. +package main + +import ( + "flag" + "fmt" + "os" + "os/exec" + "regexp" + "runtime" + "sort" + "strings" + "sync" + "text/tabwriter" + "time" + + "dawn.googlesource.com/dawn/tools/src/container" + "dawn.googlesource.com/dawn/tools/src/git" +) + +// Flags +var ( + repo = flag.String("repo", ".", "path to git directory") + afterFlag = flag.String("after", "", "start date") + beforeFlag = flag.String("before", "", "end date") + daysFlag = flag.Int("days", 182, "interval in days (used if --after is not specified)") +) + +// main entry point +func main() { + flag.Parse() + if err := run(); err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } +} + +// Date format strings +const ( + yyyymmdd = "2006-01-02" + yyyymm = "2006-01" +) + +// Returns true if the file with the given path should be included for addition / deletion stats. +func shouldConsiderLinesOfFile(path string) bool { + for _, ignore := range []string{ + "Doxyfile", + "package-lock.json", + "src/tint/builtin_table.inl", + "src/tint/resolver/intrinsic_table.inl", + "test/tint/", + "third_party/gn/webgpu-cts/test_list.txt", + "third_party/khronos/", + "webgpu-cts/", + "src/external/petamoriken", + } { + if strings.HasPrefix(path, ignore) { + return false + } + } + return true +} + +// Returns true if the commit with the given hash should be included for addition / deletion stats. +func shouldConsiderLinesOfCommit(hash string) bool { + for _, ignore := range []string{ + "41e4d9a34c1d9dcb2eef3ff39ff9c1f987bfa02a", // Consistent formatting for Dawn/Tint. + "e87ac76f7ddf9237f3022cda90224bd0691fb318", // Merge tint -> dawn + "b0acbd436dbd499505a3fa8bf89e69231ec4d1e0", // Fix build/namespaces issues + } { + if hash == ignore { + return false + } + } + return true +} + +// Regular expression used to parse the email from an author string. Example: +// Bob Bobson +// ____________^^^^^^^^^^^^^^^_ +var reEmail = regexp.MustCompile(`<([^>]+)>`) + +func run() error { + // Parse the --after and --before flags + var after, before time.Time + var err error + if *beforeFlag != "" { + before, err = time.Parse(yyyymmdd, *beforeFlag) + if err != nil { + return fmt.Errorf("Couldn't parse before date: %w", err) + } + } else { + before = time.Now() + } + if *afterFlag != "" { + after, err = time.Parse(yyyymmdd, *afterFlag) + if err != nil { + return fmt.Errorf("Couldn't parse after date: %w", err) + } + } else { + after = before.Add(-time.Hour * time.Duration(24**daysFlag)) + } + + // Find 'git' + gitExe, err := exec.LookPath("git") + if err != nil { + return err + } + + // Create the git.Git wrapper + g, err := git.New(gitExe) + if err != nil { + return err + } + + // Open the repo + r, err := g.Open(*repo) + if err != nil { + return err + } + + // Information obtained about a single commit + type CommitStat struct { + author string + commit *git.CommitInfo + insertions int + deletions int + fileDeltas container.Map[string, int] + } + + // Kick a goroutine to gather all the commits in the git log between + // 'after' and 'before', streaming the commits to the 'commits' chan. + // This chan will be closed by the goroutine when all commits have been + // gathered. + commits := make(chan git.CommitInfo, 256) + go func() { + log, err := r.LogBetween(after, before, &git.LogBetweenOptions{}) + if err != nil { + panic(fmt.Errorf("failed to gather commits: %w", err)) + } + for _, commit := range log { + commits <- commit + } + close(commits) + }() + + // Kick 'numWorkers' goroutines to gather the commit statistics of the + // commits in the 'commits' chan, streaming the commit statistics to the + // 'commitStats' chan. + commitStats := make(chan CommitStat, 256) + numWorkers := runtime.NumCPU() + wg := sync.WaitGroup{} + wg.Add(numWorkers) + for worker := 0; worker < numWorkers; worker++ { + go func() { + defer wg.Done() + for commit := range commits { + commit := commit + email := reEmail.FindStringSubmatch(commit.Author)[1] + stats, err := r.Stats(commit, nil) + if err != nil { + panic(fmt.Errorf("failed to get stats for commit '%v': %w", commit.Hash, err)) + } + + s := CommitStat{ + author: email, + commit: &commit, + fileDeltas: container.NewMap[string, int](), + } + if shouldConsiderLinesOfCommit(commit.Hash.String()) { + for file, stats := range stats { + if shouldConsiderLinesOfFile(file) { + s.insertions += stats.Insertions + s.deletions += stats.Deletions + s.fileDeltas[file] = stats.Insertions + stats.Deletions + } + } + } + commitStats <- s + } + }() + } + + // Kick a helper goroutine that waits for all the goroutines that feed the + // 'commitStats' chan to complete, and then closes the 'commitStats' chan. + go func() { + wg.Wait() + close(commitStats) + }() + + // CommitDelta holds the sum of line additions and deletions for a given + // commit. + type CommitDelta struct { + commit *git.CommitInfo + delta int + } + + // Stream in the commit statistics from the 'commitStats' chan, and collect + // statistics by author and by file. + statsByAuthor := container.NewMap[string, AuthorStats]() + fileDeltas := container.NewMap[string, int]() + commitDeltas := []CommitDelta{} + for cs := range commitStats { + as := statsByAuthor[cs.author] + as.insertions += cs.insertions + as.deletions += cs.deletions + as.commits++ + if as.commitsByMonth == nil { + as.commitsByMonth = container.NewMap[string, int]() + } + month := cs.commit.Date.Format(yyyymm) + as.commitsByMonth[month] = as.commitsByMonth[month] + 1 + statsByAuthor[cs.author] = as + + commitDelta := 0 + for path, delta := range cs.fileDeltas { + fileDeltas[path] = fileDeltas[path] + delta + commitDelta += delta + } + commitDeltas = append(commitDeltas, CommitDelta{cs.commit, commitDelta}) + } + + // Transform the 'statsByAuthor' map, so that authors that have statistics + // for both a @google.com and @chromium.org account have all their + // statistics merged into the @google.com account. + for google, g := range statsByAuthor { + if strings.HasSuffix(google, "@google.com") { + combined := strings.TrimSuffix(google, "@google.com") + chromium := combined + "@chromium.org" + if c, hasChromium := statsByAuthor[chromium]; hasChromium { + statsByAuthor[google] = combine(g, c) + delete(statsByAuthor, chromium) + } + } + } + + // Print those stats! + + fmt.Printf("Between %v and %v:\n", after, before) + + // Print the top 10 most modified files. + // This is helpful to identify files that are automatically generated, which + // we should exclude from the statistics. + { + type FileDelta struct { + file string + delta int + } + l := make([]FileDelta, 0, len(fileDeltas)) + for file, delta := range fileDeltas { + l = append(l, FileDelta{file, delta}) + } + sort.Slice(l, func(i, j int) bool { return l[i].delta > l[j].delta }) + n := len(l) + if n > 10 { + n = 10 + } + fmt.Println() + fmt.Printf("Top %v most modified files:\n", n) + fmt.Println() + tw := tabwriter.NewWriter(os.Stdout, 0, 0, 0, ' ', 0) + fmt.Fprintln(tw, " delta\t | file") + for _, fd := range l[:n] { + fmt.Fprintln(tw, + " ", fd.delta, + "\t |", fd.file) + } + tw.Flush() + } + + // Print the top 10 largest commits. + // This is helpful to identify commits that may contain a large bulk + // refactor, which we should exclude from the statistics. + { + sort.Slice(commitDeltas, func(i, j int) bool { + return commitDeltas[i].delta > commitDeltas[j].delta + }) + n := len(commitDeltas) + if n > 10 { + n = 10 + } + fmt.Println() + fmt.Printf("Top %v largest commits:\n", n) + fmt.Println() + tw := tabwriter.NewWriter(os.Stdout, 0, 0, 0, ' ', 0) + fmt.Fprintln(tw, + " delta\t | author\t | hash\t | description") + for _, fd := range commitDeltas[:n] { + fmt.Fprintln(tw, + " ", fd.delta, + "\t |", fd.commit.Author, + "\t |", fd.commit.Hash.String()[:6], + "\t |", fd.commit.Subject) + } + tw.Flush() + } + + // Print the contributions by author. + { + fmt.Println() + fmt.Println("Total contributions by author:") + tw := tabwriter.NewWriter(os.Stdout, 0, 0, 0, ' ', 0) + fmt.Println() + fmt.Fprintln(tw, " author\t | commits\t | added\t | removed") + for _, author := range statsByAuthor.Keys() { + s := statsByAuthor[author] + fmt.Fprintln(tw, + " "+author, + "\t |", s.commits, + "\t |", s.insertions, + "\t |", s.deletions) + } + tw.Flush() + } + + // Print the per-author contributions by month. + { + allMonths := container.NewSet[string]() + for _, author := range statsByAuthor { + for month := range author.commitsByMonth { + allMonths.Add(month) + } + } + + months := allMonths.List() + + fmt.Println() + fmt.Println("Commits by author by month:") + tw := tabwriter.NewWriter(os.Stdout, 0, 0, 0, ' ', 0) + fmt.Println() + fmt.Fprintf(tw, " author") + for _, month := range months { + fmt.Fprint(tw, "\t | ", month) + } + fmt.Fprintln(tw) + + for _, author := range statsByAuthor.Keys() { + fmt.Fprint(tw, " ", author) + cbm := statsByAuthor[author].commitsByMonth + for _, month := range months { + fmt.Fprint(tw, "\t | ", cbm[month]) + } + fmt.Fprintln(tw) + } + tw.Flush() + } + + return nil +} + +type AuthorStats struct { + commits int + commitsByMonth container.Map[string, int] + insertions int + deletions int +} + +// combine returns a new AuthorStats, with the summed statistics of 'a' and 'b'. +func combine(a, b AuthorStats) AuthorStats { + out := AuthorStats{ + commits: a.commits + b.commits, + insertions: a.insertions + b.insertions, + deletions: a.deletions + b.deletions, + } + out.commitsByMonth = container.NewMap[string, int]() + for month, commits := range a.commitsByMonth { + out.commitsByMonth[month] = commits + } + for month, commits := range b.commitsByMonth { + out.commitsByMonth[month] = out.commitsByMonth[month] + commits + } + return out +} + +func today() time.Time { + return time.Now() +} + +func date(t time.Time) string { + return t.Format(yyyymmdd) +} diff --git a/tools/src/git/git.go b/tools/src/git/git.go index f08169da6c..0889b44095 100644 --- a/tools/src/git/git.go +++ b/tools/src/git/git.go @@ -24,6 +24,7 @@ import ( "os" "os/exec" "path/filepath" + "strconv" "strings" "time" ) @@ -299,14 +300,7 @@ type LogOptions struct { Timeout time.Duration } -// CommitInfo describes a single git commit -type CommitInfo struct { - Hash Hash - Date time.Time - Author string - Subject string - Description string -} +const logPrettyFormatArg = "--pretty=format:ǁ%Hǀ%cIǀ%an <%ae>ǀ%sǀ%b" // Log returns the list of commits between two references (inclusive). // The first returned commit is the most recent. @@ -322,7 +316,7 @@ func (r Repository) Log(opt *LogOptions) ([]CommitInfo, error) { if opt.From != "" { rng = opt.From + "^.." + rng } - args = append(args, rng, "--pretty=format:ǁ%Hǀ%cIǀ%an <%ae>ǀ%sǀ%b") + args = append(args, rng, logPrettyFormatArg) out, err := r.run(nil, opt.Timeout, args...) if err != nil { return nil, err @@ -330,6 +324,94 @@ func (r Repository) Log(opt *LogOptions) ([]CommitInfo, error) { return parseLog(out) } +// Optional settings for Repository.LogBetween +type LogBetweenOptions struct { + // Timeout for the operation + Timeout time.Duration +} + +// LogBetween returns the list of commits between two timestamps +// The first returned commit is the most recent. +func (r Repository) LogBetween(since, until time.Time, opt *LogBetweenOptions) ([]CommitInfo, error) { + if opt == nil { + opt = &LogBetweenOptions{} + } + args := []string{"log", + "--since", since.Format(time.RFC3339), + "--until", until.Format(time.RFC3339), + logPrettyFormatArg, + } + out, err := r.run(nil, opt.Timeout, args...) + if err != nil { + return nil, err + } + return parseLog(out) +} + +// FileStats describes the changes to a given file in a commit +type FileStats struct { + Insertions int + Deletions int +} + +// CommitStats is a map of file to FileStats +type CommitStats map[string]FileStats + +// Optional settings for Repository.Stats +type StatsOptions struct { + // Timeout for the operation + Timeout time.Duration +} + +// StatsOptions returns the statistics for a given change +func (r Repository) Stats(commit CommitInfo, opt *StatsOptions) (CommitStats, error) { + if opt == nil { + opt = &StatsOptions{} + } + + hash := commit.Hash.String() + args := []string{"diff", "--numstat", hash, hash + "^"} + out, err := r.run(nil, opt.Timeout, args...) + if err != nil { + return nil, err + } + stats := CommitStats{} + for _, line := range strings.Split(out, "\n") { + if out == "" { + continue + } + parts := strings.Split(line, "\t") + if len(parts) != 3 { + return nil, fmt.Errorf("failed to parse stat line: '%v'", line) + } + insertions, deletions := 0, 0 + if parts[0] != "-" { + insertions, err = strconv.Atoi(parts[0]) + if err != nil { + return nil, fmt.Errorf("failed to stat insertions '%v': %w", parts[0], err) + } + } + if parts[1] != "-" { + deletions, err = strconv.Atoi(parts[1]) + if err != nil { + return nil, fmt.Errorf("failed to stat deletions '%v': %w", parts[1], err) + } + } + file := parts[2] + stats[file] = FileStats{Insertions: insertions, Deletions: deletions} + } + return stats, nil +} + +// CommitInfo describes a single git commit +type CommitInfo struct { + Hash Hash + Date time.Time + Author string + Subject string + Description string +} + // Optional settings for Repository.ConfigOptions type ConfigOptions struct { // Timeout for the operation