Add tools/src/cmd/git-stats

Can be run with:
./tools/run git-stats

Change-Id: Ie46be01ff3318ff50acdb13eb809d5304f867f04
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/111180
Auto-Submit: Ben Clayton <bclayton@google.com>
Commit-Queue: David Neto <dneto@google.com>
Kokoro: Kokoro <noreply+kokoro@google.com>
Reviewed-by: David Neto <dneto@google.com>
This commit is contained in:
Ben Clayton 2022-11-23 21:08:52 +00:00 committed by Dawn LUCI CQ
parent 7c6e229a18
commit d6800098e7
2 changed files with 486 additions and 9 deletions

View File

@ -0,0 +1,395 @@
// Copyright 2022 The Tint Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// git-stats gathers statistics about changes made to a git repo.
package main
import (
"flag"
"fmt"
"os"
"os/exec"
"regexp"
"runtime"
"sort"
"strings"
"sync"
"text/tabwriter"
"time"
"dawn.googlesource.com/dawn/tools/src/container"
"dawn.googlesource.com/dawn/tools/src/git"
)
// Flags
var (
repo = flag.String("repo", ".", "path to git directory")
afterFlag = flag.String("after", "", "start date")
beforeFlag = flag.String("before", "", "end date")
daysFlag = flag.Int("days", 182, "interval in days (used if --after is not specified)")
)
// main entry point
func main() {
flag.Parse()
if err := run(); err != nil {
fmt.Fprintln(os.Stderr, err)
os.Exit(1)
}
}
// Date format strings
const (
yyyymmdd = "2006-01-02"
yyyymm = "2006-01"
)
// Returns true if the file with the given path should be included for addition / deletion stats.
func shouldConsiderLinesOfFile(path string) bool {
for _, ignore := range []string{
"Doxyfile",
"package-lock.json",
"src/tint/builtin_table.inl",
"src/tint/resolver/intrinsic_table.inl",
"test/tint/",
"third_party/gn/webgpu-cts/test_list.txt",
"third_party/khronos/",
"webgpu-cts/",
"src/external/petamoriken",
} {
if strings.HasPrefix(path, ignore) {
return false
}
}
return true
}
// Returns true if the commit with the given hash should be included for addition / deletion stats.
func shouldConsiderLinesOfCommit(hash string) bool {
for _, ignore := range []string{
"41e4d9a34c1d9dcb2eef3ff39ff9c1f987bfa02a", // Consistent formatting for Dawn/Tint.
"e87ac76f7ddf9237f3022cda90224bd0691fb318", // Merge tint -> dawn
"b0acbd436dbd499505a3fa8bf89e69231ec4d1e0", // Fix build/namespaces issues
} {
if hash == ignore {
return false
}
}
return true
}
// Regular expression used to parse the email from an author string. Example:
// Bob Bobson <bob@bobmail.com>
// ____________^^^^^^^^^^^^^^^_
var reEmail = regexp.MustCompile(`<([^>]+)>`)
func run() error {
// Parse the --after and --before flags
var after, before time.Time
var err error
if *beforeFlag != "" {
before, err = time.Parse(yyyymmdd, *beforeFlag)
if err != nil {
return fmt.Errorf("Couldn't parse before date: %w", err)
}
} else {
before = time.Now()
}
if *afterFlag != "" {
after, err = time.Parse(yyyymmdd, *afterFlag)
if err != nil {
return fmt.Errorf("Couldn't parse after date: %w", err)
}
} else {
after = before.Add(-time.Hour * time.Duration(24**daysFlag))
}
// Find 'git'
gitExe, err := exec.LookPath("git")
if err != nil {
return err
}
// Create the git.Git wrapper
g, err := git.New(gitExe)
if err != nil {
return err
}
// Open the repo
r, err := g.Open(*repo)
if err != nil {
return err
}
// Information obtained about a single commit
type CommitStat struct {
author string
commit *git.CommitInfo
insertions int
deletions int
fileDeltas container.Map[string, int]
}
// Kick a goroutine to gather all the commits in the git log between
// 'after' and 'before', streaming the commits to the 'commits' chan.
// This chan will be closed by the goroutine when all commits have been
// gathered.
commits := make(chan git.CommitInfo, 256)
go func() {
log, err := r.LogBetween(after, before, &git.LogBetweenOptions{})
if err != nil {
panic(fmt.Errorf("failed to gather commits: %w", err))
}
for _, commit := range log {
commits <- commit
}
close(commits)
}()
// Kick 'numWorkers' goroutines to gather the commit statistics of the
// commits in the 'commits' chan, streaming the commit statistics to the
// 'commitStats' chan.
commitStats := make(chan CommitStat, 256)
numWorkers := runtime.NumCPU()
wg := sync.WaitGroup{}
wg.Add(numWorkers)
for worker := 0; worker < numWorkers; worker++ {
go func() {
defer wg.Done()
for commit := range commits {
commit := commit
email := reEmail.FindStringSubmatch(commit.Author)[1]
stats, err := r.Stats(commit, nil)
if err != nil {
panic(fmt.Errorf("failed to get stats for commit '%v': %w", commit.Hash, err))
}
s := CommitStat{
author: email,
commit: &commit,
fileDeltas: container.NewMap[string, int](),
}
if shouldConsiderLinesOfCommit(commit.Hash.String()) {
for file, stats := range stats {
if shouldConsiderLinesOfFile(file) {
s.insertions += stats.Insertions
s.deletions += stats.Deletions
s.fileDeltas[file] = stats.Insertions + stats.Deletions
}
}
}
commitStats <- s
}
}()
}
// Kick a helper goroutine that waits for all the goroutines that feed the
// 'commitStats' chan to complete, and then closes the 'commitStats' chan.
go func() {
wg.Wait()
close(commitStats)
}()
// CommitDelta holds the sum of line additions and deletions for a given
// commit.
type CommitDelta struct {
commit *git.CommitInfo
delta int
}
// Stream in the commit statistics from the 'commitStats' chan, and collect
// statistics by author and by file.
statsByAuthor := container.NewMap[string, AuthorStats]()
fileDeltas := container.NewMap[string, int]()
commitDeltas := []CommitDelta{}
for cs := range commitStats {
as := statsByAuthor[cs.author]
as.insertions += cs.insertions
as.deletions += cs.deletions
as.commits++
if as.commitsByMonth == nil {
as.commitsByMonth = container.NewMap[string, int]()
}
month := cs.commit.Date.Format(yyyymm)
as.commitsByMonth[month] = as.commitsByMonth[month] + 1
statsByAuthor[cs.author] = as
commitDelta := 0
for path, delta := range cs.fileDeltas {
fileDeltas[path] = fileDeltas[path] + delta
commitDelta += delta
}
commitDeltas = append(commitDeltas, CommitDelta{cs.commit, commitDelta})
}
// Transform the 'statsByAuthor' map, so that authors that have statistics
// for both a @google.com and @chromium.org account have all their
// statistics merged into the @google.com account.
for google, g := range statsByAuthor {
if strings.HasSuffix(google, "@google.com") {
combined := strings.TrimSuffix(google, "@google.com")
chromium := combined + "@chromium.org"
if c, hasChromium := statsByAuthor[chromium]; hasChromium {
statsByAuthor[google] = combine(g, c)
delete(statsByAuthor, chromium)
}
}
}
// Print those stats!
fmt.Printf("Between %v and %v:\n", after, before)
// Print the top 10 most modified files.
// This is helpful to identify files that are automatically generated, which
// we should exclude from the statistics.
{
type FileDelta struct {
file string
delta int
}
l := make([]FileDelta, 0, len(fileDeltas))
for file, delta := range fileDeltas {
l = append(l, FileDelta{file, delta})
}
sort.Slice(l, func(i, j int) bool { return l[i].delta > l[j].delta })
n := len(l)
if n > 10 {
n = 10
}
fmt.Println()
fmt.Printf("Top %v most modified files:\n", n)
fmt.Println()
tw := tabwriter.NewWriter(os.Stdout, 0, 0, 0, ' ', 0)
fmt.Fprintln(tw, " delta\t | file")
for _, fd := range l[:n] {
fmt.Fprintln(tw,
" ", fd.delta,
"\t |", fd.file)
}
tw.Flush()
}
// Print the top 10 largest commits.
// This is helpful to identify commits that may contain a large bulk
// refactor, which we should exclude from the statistics.
{
sort.Slice(commitDeltas, func(i, j int) bool {
return commitDeltas[i].delta > commitDeltas[j].delta
})
n := len(commitDeltas)
if n > 10 {
n = 10
}
fmt.Println()
fmt.Printf("Top %v largest commits:\n", n)
fmt.Println()
tw := tabwriter.NewWriter(os.Stdout, 0, 0, 0, ' ', 0)
fmt.Fprintln(tw,
" delta\t | author\t | hash\t | description")
for _, fd := range commitDeltas[:n] {
fmt.Fprintln(tw,
" ", fd.delta,
"\t |", fd.commit.Author,
"\t |", fd.commit.Hash.String()[:6],
"\t |", fd.commit.Subject)
}
tw.Flush()
}
// Print the contributions by author.
{
fmt.Println()
fmt.Println("Total contributions by author:")
tw := tabwriter.NewWriter(os.Stdout, 0, 0, 0, ' ', 0)
fmt.Println()
fmt.Fprintln(tw, " author\t | commits\t | added\t | removed")
for _, author := range statsByAuthor.Keys() {
s := statsByAuthor[author]
fmt.Fprintln(tw,
" "+author,
"\t |", s.commits,
"\t |", s.insertions,
"\t |", s.deletions)
}
tw.Flush()
}
// Print the per-author contributions by month.
{
allMonths := container.NewSet[string]()
for _, author := range statsByAuthor {
for month := range author.commitsByMonth {
allMonths.Add(month)
}
}
months := allMonths.List()
fmt.Println()
fmt.Println("Commits by author by month:")
tw := tabwriter.NewWriter(os.Stdout, 0, 0, 0, ' ', 0)
fmt.Println()
fmt.Fprintf(tw, " author")
for _, month := range months {
fmt.Fprint(tw, "\t | ", month)
}
fmt.Fprintln(tw)
for _, author := range statsByAuthor.Keys() {
fmt.Fprint(tw, " ", author)
cbm := statsByAuthor[author].commitsByMonth
for _, month := range months {
fmt.Fprint(tw, "\t | ", cbm[month])
}
fmt.Fprintln(tw)
}
tw.Flush()
}
return nil
}
type AuthorStats struct {
commits int
commitsByMonth container.Map[string, int]
insertions int
deletions int
}
// combine returns a new AuthorStats, with the summed statistics of 'a' and 'b'.
func combine(a, b AuthorStats) AuthorStats {
out := AuthorStats{
commits: a.commits + b.commits,
insertions: a.insertions + b.insertions,
deletions: a.deletions + b.deletions,
}
out.commitsByMonth = container.NewMap[string, int]()
for month, commits := range a.commitsByMonth {
out.commitsByMonth[month] = commits
}
for month, commits := range b.commitsByMonth {
out.commitsByMonth[month] = out.commitsByMonth[month] + commits
}
return out
}
func today() time.Time {
return time.Now()
}
func date(t time.Time) string {
return t.Format(yyyymmdd)
}

View File

@ -24,6 +24,7 @@ import (
"os"
"os/exec"
"path/filepath"
"strconv"
"strings"
"time"
)
@ -299,14 +300,7 @@ type LogOptions struct {
Timeout time.Duration
}
// CommitInfo describes a single git commit
type CommitInfo struct {
Hash Hash
Date time.Time
Author string
Subject string
Description string
}
const logPrettyFormatArg = "--pretty=format:ǁ%Hǀ%cIǀ%an <%ae>ǀ%sǀ%b"
// Log returns the list of commits between two references (inclusive).
// The first returned commit is the most recent.
@ -322,7 +316,7 @@ func (r Repository) Log(opt *LogOptions) ([]CommitInfo, error) {
if opt.From != "" {
rng = opt.From + "^.." + rng
}
args = append(args, rng, "--pretty=format:ǁ%Hǀ%cIǀ%an <%ae>ǀ%sǀ%b")
args = append(args, rng, logPrettyFormatArg)
out, err := r.run(nil, opt.Timeout, args...)
if err != nil {
return nil, err
@ -330,6 +324,94 @@ func (r Repository) Log(opt *LogOptions) ([]CommitInfo, error) {
return parseLog(out)
}
// Optional settings for Repository.LogBetween
type LogBetweenOptions struct {
// Timeout for the operation
Timeout time.Duration
}
// LogBetween returns the list of commits between two timestamps
// The first returned commit is the most recent.
func (r Repository) LogBetween(since, until time.Time, opt *LogBetweenOptions) ([]CommitInfo, error) {
if opt == nil {
opt = &LogBetweenOptions{}
}
args := []string{"log",
"--since", since.Format(time.RFC3339),
"--until", until.Format(time.RFC3339),
logPrettyFormatArg,
}
out, err := r.run(nil, opt.Timeout, args...)
if err != nil {
return nil, err
}
return parseLog(out)
}
// FileStats describes the changes to a given file in a commit
type FileStats struct {
Insertions int
Deletions int
}
// CommitStats is a map of file to FileStats
type CommitStats map[string]FileStats
// Optional settings for Repository.Stats
type StatsOptions struct {
// Timeout for the operation
Timeout time.Duration
}
// StatsOptions returns the statistics for a given change
func (r Repository) Stats(commit CommitInfo, opt *StatsOptions) (CommitStats, error) {
if opt == nil {
opt = &StatsOptions{}
}
hash := commit.Hash.String()
args := []string{"diff", "--numstat", hash, hash + "^"}
out, err := r.run(nil, opt.Timeout, args...)
if err != nil {
return nil, err
}
stats := CommitStats{}
for _, line := range strings.Split(out, "\n") {
if out == "" {
continue
}
parts := strings.Split(line, "\t")
if len(parts) != 3 {
return nil, fmt.Errorf("failed to parse stat line: '%v'", line)
}
insertions, deletions := 0, 0
if parts[0] != "-" {
insertions, err = strconv.Atoi(parts[0])
if err != nil {
return nil, fmt.Errorf("failed to stat insertions '%v': %w", parts[0], err)
}
}
if parts[1] != "-" {
deletions, err = strconv.Atoi(parts[1])
if err != nil {
return nil, fmt.Errorf("failed to stat deletions '%v': %w", parts[1], err)
}
}
file := parts[2]
stats[file] = FileStats{Insertions: insertions, Deletions: deletions}
}
return stats, nil
}
// CommitInfo describes a single git commit
type CommitInfo struct {
Hash Hash
Date time.Time
Author string
Subject string
Description string
}
// Optional settings for Repository.ConfigOptions
type ConfigOptions struct {
// Timeout for the operation