Add tools/src/cmd/git-stats
Can be run with: ./tools/run git-stats Change-Id: Ie46be01ff3318ff50acdb13eb809d5304f867f04 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/111180 Auto-Submit: Ben Clayton <bclayton@google.com> Commit-Queue: David Neto <dneto@google.com> Kokoro: Kokoro <noreply+kokoro@google.com> Reviewed-by: David Neto <dneto@google.com>
This commit is contained in:
parent
7c6e229a18
commit
d6800098e7
|
@ -0,0 +1,395 @@
|
|||
// Copyright 2022 The Tint Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// git-stats gathers statistics about changes made to a git repo.
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"regexp"
|
||||
"runtime"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
"text/tabwriter"
|
||||
"time"
|
||||
|
||||
"dawn.googlesource.com/dawn/tools/src/container"
|
||||
"dawn.googlesource.com/dawn/tools/src/git"
|
||||
)
|
||||
|
||||
// Flags
|
||||
var (
|
||||
repo = flag.String("repo", ".", "path to git directory")
|
||||
afterFlag = flag.String("after", "", "start date")
|
||||
beforeFlag = flag.String("before", "", "end date")
|
||||
daysFlag = flag.Int("days", 182, "interval in days (used if --after is not specified)")
|
||||
)
|
||||
|
||||
// main entry point
|
||||
func main() {
|
||||
flag.Parse()
|
||||
if err := run(); err != nil {
|
||||
fmt.Fprintln(os.Stderr, err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
// Date format strings
|
||||
const (
|
||||
yyyymmdd = "2006-01-02"
|
||||
yyyymm = "2006-01"
|
||||
)
|
||||
|
||||
// Returns true if the file with the given path should be included for addition / deletion stats.
|
||||
func shouldConsiderLinesOfFile(path string) bool {
|
||||
for _, ignore := range []string{
|
||||
"Doxyfile",
|
||||
"package-lock.json",
|
||||
"src/tint/builtin_table.inl",
|
||||
"src/tint/resolver/intrinsic_table.inl",
|
||||
"test/tint/",
|
||||
"third_party/gn/webgpu-cts/test_list.txt",
|
||||
"third_party/khronos/",
|
||||
"webgpu-cts/",
|
||||
"src/external/petamoriken",
|
||||
} {
|
||||
if strings.HasPrefix(path, ignore) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// Returns true if the commit with the given hash should be included for addition / deletion stats.
|
||||
func shouldConsiderLinesOfCommit(hash string) bool {
|
||||
for _, ignore := range []string{
|
||||
"41e4d9a34c1d9dcb2eef3ff39ff9c1f987bfa02a", // Consistent formatting for Dawn/Tint.
|
||||
"e87ac76f7ddf9237f3022cda90224bd0691fb318", // Merge tint -> dawn
|
||||
"b0acbd436dbd499505a3fa8bf89e69231ec4d1e0", // Fix build/namespaces issues
|
||||
} {
|
||||
if hash == ignore {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// Regular expression used to parse the email from an author string. Example:
|
||||
// Bob Bobson <bob@bobmail.com>
|
||||
// ____________^^^^^^^^^^^^^^^_
|
||||
var reEmail = regexp.MustCompile(`<([^>]+)>`)
|
||||
|
||||
func run() error {
|
||||
// Parse the --after and --before flags
|
||||
var after, before time.Time
|
||||
var err error
|
||||
if *beforeFlag != "" {
|
||||
before, err = time.Parse(yyyymmdd, *beforeFlag)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Couldn't parse before date: %w", err)
|
||||
}
|
||||
} else {
|
||||
before = time.Now()
|
||||
}
|
||||
if *afterFlag != "" {
|
||||
after, err = time.Parse(yyyymmdd, *afterFlag)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Couldn't parse after date: %w", err)
|
||||
}
|
||||
} else {
|
||||
after = before.Add(-time.Hour * time.Duration(24**daysFlag))
|
||||
}
|
||||
|
||||
// Find 'git'
|
||||
gitExe, err := exec.LookPath("git")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Create the git.Git wrapper
|
||||
g, err := git.New(gitExe)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Open the repo
|
||||
r, err := g.Open(*repo)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Information obtained about a single commit
|
||||
type CommitStat struct {
|
||||
author string
|
||||
commit *git.CommitInfo
|
||||
insertions int
|
||||
deletions int
|
||||
fileDeltas container.Map[string, int]
|
||||
}
|
||||
|
||||
// Kick a goroutine to gather all the commits in the git log between
|
||||
// 'after' and 'before', streaming the commits to the 'commits' chan.
|
||||
// This chan will be closed by the goroutine when all commits have been
|
||||
// gathered.
|
||||
commits := make(chan git.CommitInfo, 256)
|
||||
go func() {
|
||||
log, err := r.LogBetween(after, before, &git.LogBetweenOptions{})
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("failed to gather commits: %w", err))
|
||||
}
|
||||
for _, commit := range log {
|
||||
commits <- commit
|
||||
}
|
||||
close(commits)
|
||||
}()
|
||||
|
||||
// Kick 'numWorkers' goroutines to gather the commit statistics of the
|
||||
// commits in the 'commits' chan, streaming the commit statistics to the
|
||||
// 'commitStats' chan.
|
||||
commitStats := make(chan CommitStat, 256)
|
||||
numWorkers := runtime.NumCPU()
|
||||
wg := sync.WaitGroup{}
|
||||
wg.Add(numWorkers)
|
||||
for worker := 0; worker < numWorkers; worker++ {
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
for commit := range commits {
|
||||
commit := commit
|
||||
email := reEmail.FindStringSubmatch(commit.Author)[1]
|
||||
stats, err := r.Stats(commit, nil)
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("failed to get stats for commit '%v': %w", commit.Hash, err))
|
||||
}
|
||||
|
||||
s := CommitStat{
|
||||
author: email,
|
||||
commit: &commit,
|
||||
fileDeltas: container.NewMap[string, int](),
|
||||
}
|
||||
if shouldConsiderLinesOfCommit(commit.Hash.String()) {
|
||||
for file, stats := range stats {
|
||||
if shouldConsiderLinesOfFile(file) {
|
||||
s.insertions += stats.Insertions
|
||||
s.deletions += stats.Deletions
|
||||
s.fileDeltas[file] = stats.Insertions + stats.Deletions
|
||||
}
|
||||
}
|
||||
}
|
||||
commitStats <- s
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
// Kick a helper goroutine that waits for all the goroutines that feed the
|
||||
// 'commitStats' chan to complete, and then closes the 'commitStats' chan.
|
||||
go func() {
|
||||
wg.Wait()
|
||||
close(commitStats)
|
||||
}()
|
||||
|
||||
// CommitDelta holds the sum of line additions and deletions for a given
|
||||
// commit.
|
||||
type CommitDelta struct {
|
||||
commit *git.CommitInfo
|
||||
delta int
|
||||
}
|
||||
|
||||
// Stream in the commit statistics from the 'commitStats' chan, and collect
|
||||
// statistics by author and by file.
|
||||
statsByAuthor := container.NewMap[string, AuthorStats]()
|
||||
fileDeltas := container.NewMap[string, int]()
|
||||
commitDeltas := []CommitDelta{}
|
||||
for cs := range commitStats {
|
||||
as := statsByAuthor[cs.author]
|
||||
as.insertions += cs.insertions
|
||||
as.deletions += cs.deletions
|
||||
as.commits++
|
||||
if as.commitsByMonth == nil {
|
||||
as.commitsByMonth = container.NewMap[string, int]()
|
||||
}
|
||||
month := cs.commit.Date.Format(yyyymm)
|
||||
as.commitsByMonth[month] = as.commitsByMonth[month] + 1
|
||||
statsByAuthor[cs.author] = as
|
||||
|
||||
commitDelta := 0
|
||||
for path, delta := range cs.fileDeltas {
|
||||
fileDeltas[path] = fileDeltas[path] + delta
|
||||
commitDelta += delta
|
||||
}
|
||||
commitDeltas = append(commitDeltas, CommitDelta{cs.commit, commitDelta})
|
||||
}
|
||||
|
||||
// Transform the 'statsByAuthor' map, so that authors that have statistics
|
||||
// for both a @google.com and @chromium.org account have all their
|
||||
// statistics merged into the @google.com account.
|
||||
for google, g := range statsByAuthor {
|
||||
if strings.HasSuffix(google, "@google.com") {
|
||||
combined := strings.TrimSuffix(google, "@google.com")
|
||||
chromium := combined + "@chromium.org"
|
||||
if c, hasChromium := statsByAuthor[chromium]; hasChromium {
|
||||
statsByAuthor[google] = combine(g, c)
|
||||
delete(statsByAuthor, chromium)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Print those stats!
|
||||
|
||||
fmt.Printf("Between %v and %v:\n", after, before)
|
||||
|
||||
// Print the top 10 most modified files.
|
||||
// This is helpful to identify files that are automatically generated, which
|
||||
// we should exclude from the statistics.
|
||||
{
|
||||
type FileDelta struct {
|
||||
file string
|
||||
delta int
|
||||
}
|
||||
l := make([]FileDelta, 0, len(fileDeltas))
|
||||
for file, delta := range fileDeltas {
|
||||
l = append(l, FileDelta{file, delta})
|
||||
}
|
||||
sort.Slice(l, func(i, j int) bool { return l[i].delta > l[j].delta })
|
||||
n := len(l)
|
||||
if n > 10 {
|
||||
n = 10
|
||||
}
|
||||
fmt.Println()
|
||||
fmt.Printf("Top %v most modified files:\n", n)
|
||||
fmt.Println()
|
||||
tw := tabwriter.NewWriter(os.Stdout, 0, 0, 0, ' ', 0)
|
||||
fmt.Fprintln(tw, " delta\t | file")
|
||||
for _, fd := range l[:n] {
|
||||
fmt.Fprintln(tw,
|
||||
" ", fd.delta,
|
||||
"\t |", fd.file)
|
||||
}
|
||||
tw.Flush()
|
||||
}
|
||||
|
||||
// Print the top 10 largest commits.
|
||||
// This is helpful to identify commits that may contain a large bulk
|
||||
// refactor, which we should exclude from the statistics.
|
||||
{
|
||||
sort.Slice(commitDeltas, func(i, j int) bool {
|
||||
return commitDeltas[i].delta > commitDeltas[j].delta
|
||||
})
|
||||
n := len(commitDeltas)
|
||||
if n > 10 {
|
||||
n = 10
|
||||
}
|
||||
fmt.Println()
|
||||
fmt.Printf("Top %v largest commits:\n", n)
|
||||
fmt.Println()
|
||||
tw := tabwriter.NewWriter(os.Stdout, 0, 0, 0, ' ', 0)
|
||||
fmt.Fprintln(tw,
|
||||
" delta\t | author\t | hash\t | description")
|
||||
for _, fd := range commitDeltas[:n] {
|
||||
fmt.Fprintln(tw,
|
||||
" ", fd.delta,
|
||||
"\t |", fd.commit.Author,
|
||||
"\t |", fd.commit.Hash.String()[:6],
|
||||
"\t |", fd.commit.Subject)
|
||||
}
|
||||
tw.Flush()
|
||||
}
|
||||
|
||||
// Print the contributions by author.
|
||||
{
|
||||
fmt.Println()
|
||||
fmt.Println("Total contributions by author:")
|
||||
tw := tabwriter.NewWriter(os.Stdout, 0, 0, 0, ' ', 0)
|
||||
fmt.Println()
|
||||
fmt.Fprintln(tw, " author\t | commits\t | added\t | removed")
|
||||
for _, author := range statsByAuthor.Keys() {
|
||||
s := statsByAuthor[author]
|
||||
fmt.Fprintln(tw,
|
||||
" "+author,
|
||||
"\t |", s.commits,
|
||||
"\t |", s.insertions,
|
||||
"\t |", s.deletions)
|
||||
}
|
||||
tw.Flush()
|
||||
}
|
||||
|
||||
// Print the per-author contributions by month.
|
||||
{
|
||||
allMonths := container.NewSet[string]()
|
||||
for _, author := range statsByAuthor {
|
||||
for month := range author.commitsByMonth {
|
||||
allMonths.Add(month)
|
||||
}
|
||||
}
|
||||
|
||||
months := allMonths.List()
|
||||
|
||||
fmt.Println()
|
||||
fmt.Println("Commits by author by month:")
|
||||
tw := tabwriter.NewWriter(os.Stdout, 0, 0, 0, ' ', 0)
|
||||
fmt.Println()
|
||||
fmt.Fprintf(tw, " author")
|
||||
for _, month := range months {
|
||||
fmt.Fprint(tw, "\t | ", month)
|
||||
}
|
||||
fmt.Fprintln(tw)
|
||||
|
||||
for _, author := range statsByAuthor.Keys() {
|
||||
fmt.Fprint(tw, " ", author)
|
||||
cbm := statsByAuthor[author].commitsByMonth
|
||||
for _, month := range months {
|
||||
fmt.Fprint(tw, "\t | ", cbm[month])
|
||||
}
|
||||
fmt.Fprintln(tw)
|
||||
}
|
||||
tw.Flush()
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
type AuthorStats struct {
|
||||
commits int
|
||||
commitsByMonth container.Map[string, int]
|
||||
insertions int
|
||||
deletions int
|
||||
}
|
||||
|
||||
// combine returns a new AuthorStats, with the summed statistics of 'a' and 'b'.
|
||||
func combine(a, b AuthorStats) AuthorStats {
|
||||
out := AuthorStats{
|
||||
commits: a.commits + b.commits,
|
||||
insertions: a.insertions + b.insertions,
|
||||
deletions: a.deletions + b.deletions,
|
||||
}
|
||||
out.commitsByMonth = container.NewMap[string, int]()
|
||||
for month, commits := range a.commitsByMonth {
|
||||
out.commitsByMonth[month] = commits
|
||||
}
|
||||
for month, commits := range b.commitsByMonth {
|
||||
out.commitsByMonth[month] = out.commitsByMonth[month] + commits
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func today() time.Time {
|
||||
return time.Now()
|
||||
}
|
||||
|
||||
func date(t time.Time) string {
|
||||
return t.Format(yyyymmdd)
|
||||
}
|
|
@ -24,6 +24,7 @@ import (
|
|||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
@ -299,14 +300,7 @@ type LogOptions struct {
|
|||
Timeout time.Duration
|
||||
}
|
||||
|
||||
// CommitInfo describes a single git commit
|
||||
type CommitInfo struct {
|
||||
Hash Hash
|
||||
Date time.Time
|
||||
Author string
|
||||
Subject string
|
||||
Description string
|
||||
}
|
||||
const logPrettyFormatArg = "--pretty=format:ǁ%Hǀ%cIǀ%an <%ae>ǀ%sǀ%b"
|
||||
|
||||
// Log returns the list of commits between two references (inclusive).
|
||||
// The first returned commit is the most recent.
|
||||
|
@ -322,7 +316,7 @@ func (r Repository) Log(opt *LogOptions) ([]CommitInfo, error) {
|
|||
if opt.From != "" {
|
||||
rng = opt.From + "^.." + rng
|
||||
}
|
||||
args = append(args, rng, "--pretty=format:ǁ%Hǀ%cIǀ%an <%ae>ǀ%sǀ%b")
|
||||
args = append(args, rng, logPrettyFormatArg)
|
||||
out, err := r.run(nil, opt.Timeout, args...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
@ -330,6 +324,94 @@ func (r Repository) Log(opt *LogOptions) ([]CommitInfo, error) {
|
|||
return parseLog(out)
|
||||
}
|
||||
|
||||
// Optional settings for Repository.LogBetween
|
||||
type LogBetweenOptions struct {
|
||||
// Timeout for the operation
|
||||
Timeout time.Duration
|
||||
}
|
||||
|
||||
// LogBetween returns the list of commits between two timestamps
|
||||
// The first returned commit is the most recent.
|
||||
func (r Repository) LogBetween(since, until time.Time, opt *LogBetweenOptions) ([]CommitInfo, error) {
|
||||
if opt == nil {
|
||||
opt = &LogBetweenOptions{}
|
||||
}
|
||||
args := []string{"log",
|
||||
"--since", since.Format(time.RFC3339),
|
||||
"--until", until.Format(time.RFC3339),
|
||||
logPrettyFormatArg,
|
||||
}
|
||||
out, err := r.run(nil, opt.Timeout, args...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return parseLog(out)
|
||||
}
|
||||
|
||||
// FileStats describes the changes to a given file in a commit
|
||||
type FileStats struct {
|
||||
Insertions int
|
||||
Deletions int
|
||||
}
|
||||
|
||||
// CommitStats is a map of file to FileStats
|
||||
type CommitStats map[string]FileStats
|
||||
|
||||
// Optional settings for Repository.Stats
|
||||
type StatsOptions struct {
|
||||
// Timeout for the operation
|
||||
Timeout time.Duration
|
||||
}
|
||||
|
||||
// StatsOptions returns the statistics for a given change
|
||||
func (r Repository) Stats(commit CommitInfo, opt *StatsOptions) (CommitStats, error) {
|
||||
if opt == nil {
|
||||
opt = &StatsOptions{}
|
||||
}
|
||||
|
||||
hash := commit.Hash.String()
|
||||
args := []string{"diff", "--numstat", hash, hash + "^"}
|
||||
out, err := r.run(nil, opt.Timeout, args...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stats := CommitStats{}
|
||||
for _, line := range strings.Split(out, "\n") {
|
||||
if out == "" {
|
||||
continue
|
||||
}
|
||||
parts := strings.Split(line, "\t")
|
||||
if len(parts) != 3 {
|
||||
return nil, fmt.Errorf("failed to parse stat line: '%v'", line)
|
||||
}
|
||||
insertions, deletions := 0, 0
|
||||
if parts[0] != "-" {
|
||||
insertions, err = strconv.Atoi(parts[0])
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to stat insertions '%v': %w", parts[0], err)
|
||||
}
|
||||
}
|
||||
if parts[1] != "-" {
|
||||
deletions, err = strconv.Atoi(parts[1])
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to stat deletions '%v': %w", parts[1], err)
|
||||
}
|
||||
}
|
||||
file := parts[2]
|
||||
stats[file] = FileStats{Insertions: insertions, Deletions: deletions}
|
||||
}
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
// CommitInfo describes a single git commit
|
||||
type CommitInfo struct {
|
||||
Hash Hash
|
||||
Date time.Time
|
||||
Author string
|
||||
Subject string
|
||||
Description string
|
||||
}
|
||||
|
||||
// Optional settings for Repository.ConfigOptions
|
||||
type ConfigOptions struct {
|
||||
// Timeout for the operation
|
||||
|
|
Loading…
Reference in New Issue