diff --git a/GEMINI.md b/AGENTS.md similarity index 100% rename from GEMINI.md rename to AGENTS.md diff --git a/cmd/pierre/main.go b/cmd/pierre/main.go index 566f2e2..059d33a 100644 --- a/cmd/pierre/main.go +++ b/cmd/pierre/main.go @@ -37,7 +37,14 @@ type LLMConfig struct { Model string `help:"Model to use" env:"LLM_MODEL"` } +type ReviewConfig struct { + MaxChunkChars int `help:"Maximum diff chunk size in characters (default 60000)" default:"60000"` + Guidelines []string `help:"Project-specific review guidelines"` + DisableComments bool `help:"Do not post comments to the Git provider (dry‑run mode)"` +} + type Config struct { + Review ReviewConfig `embed:"" prefix:"review-"` GitProvider string `help:"Git provider (bitbucket or gitea)" env:"GIT_PROVIDER"` Bitbucket BitbucketConfig `embed:"" prefix:"bitbucket-"` Gitea GiteaConfig `embed:"" prefix:"gitea-"` @@ -117,7 +124,7 @@ func main() { log.Fatalf("Error initializing AI: %v", err) } - pierreService := pierre.New(ai, git) + pierreService := pierre.New(ai, git, cfg.Review.MaxChunkChars, cfg.Review.Guidelines, cfg.Review.DisableComments) if err := pierreService.MakeReview(context.Background(), cfg.Repo.Owner, cfg.Repo.Repo, cfg.Repo.PRID); err != nil { log.Fatalf("Error during review: %v", err) } diff --git a/go.mod b/go.mod index 9fee8d2..5bd5c91 100644 --- a/go.mod +++ b/go.mod @@ -7,7 +7,9 @@ require ( github.com/alecthomas/kong v1.14.0 github.com/alecthomas/kong-yaml v0.2.0 github.com/google/generative-ai-go v0.20.1 + github.com/google/go-cmp v0.7.0 github.com/ollama/ollama v0.16.0 + github.com/sashabaranov/go-openai v1.41.2 google.golang.org/api v0.186.0 ) @@ -34,7 +36,6 @@ require ( github.com/googleapis/gax-go/v2 v2.12.5 // indirect github.com/hashicorp/go-version v1.7.0 // indirect github.com/mailru/easyjson v0.7.7 // indirect - github.com/sashabaranov/go-openai v1.41.2 // indirect github.com/wk8/go-ordered-map/v2 v2.1.8 // indirect go.opencensus.io v0.24.0 // indirect go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.51.0 // indirect diff --git a/internal/gitadapters/bitbucket/controller.go b/internal/gitadapters/bitbucket/controller.go index 75b3a30..904efd0 100644 --- a/internal/gitadapters/bitbucket/controller.go +++ b/internal/gitadapters/bitbucket/controller.go @@ -31,7 +31,7 @@ func (b *BitbucketAdapter) GetDiff(ctx context.Context, projectKey, repositorySl if response.StatusCode != http.StatusOK { sb := &strings.Builder{} io.Copy(sb, response.Body) - err = fmt.Errorf("error while fetching bitbucket diff staus %d, body %s", response.Status, sb.String()) + err = fmt.Errorf("error while fetching bitbucket diff status %d, body %s", response.StatusCode, sb.String()) } diff = response.Body diff --git a/internal/pierre/judge.go b/internal/pierre/judge.go index 27c81fa..623d637 100644 --- a/internal/pierre/judge.go +++ b/internal/pierre/judge.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "io" + "strings" "git.schreifuchs.ch/schreifuchs/pierre-bot/internal/chatter" ) @@ -19,21 +20,113 @@ func (s *Service) judgePR(ctx context.Context, diff io.Reader) (comments []Comme if err != nil { return nil, fmt.Errorf("failed to read diff: %w", err) } - err = s.chat.GenerateStructured(ctx, []chatter.Message{ - { - Role: chatter.RoleSystem, - Content: ` - You are a very strict senior software architect. - You review **only** newly added or modified lines in a unified diff, together with the immediate hunk context. - You do **not** report issues that appear **solely** in deleted lines (“-”) or that have already been fixed by the change. - No comments are made on pure formatting/whitespace changes or reordering that does not alter the program’s behavior. - `, - }, - { - Role: chatter.RoleUser, - Content: fmt.Sprintf("Hello please review my PR. Write comments where improvements are necessary in new lines.\n Here is the git diff of it: %s", string(diffBytes)), - }, - }, &comments) + // Determine chunk size (use default if not set) + maxSize := s.maxChunkSize + if maxSize <= 0 { + maxSize = 60000 // default 60KB ~ 15k tokens + } + + chunks := splitDiffIntoChunks(diffBytes, maxSize) + allComments := []Comment{} + + // Build optional guidelines text + guidelinesText := "" + if len(s.guidelines) > 0 { + guidelinesText = "\nProject guidelines:\n" + for _, g := range s.guidelines { + guidelinesText += "- " + g + "\n" + } + } + + baseSystem := strings.TrimSpace(` +You are a very strict senior software architect. +You review **only** newly added or modified lines in a unified diff, together with the immediate hunk context. +You do **not** report issues that appear **solely** in deleted lines (“-") or that have already been fixed by the change. +No comments are made on pure formatting/whitespace changes or reordering that does not alter the program’s behavior.`) + guidelinesText + + for i, chunk := range chunks { + // Add a small header so the model knows this is a fragment + header := fmt.Sprintf("\n--- Chunk %d of %d ---\n", i+1, len(chunks)) + userContent := fmt.Sprintf("Hello please review my PR. Write comments where improvements are necessary in new lines.%s\nHere is the git diff of it: %s", header, chunk) + + var chunkComments []Comment + err = s.chat.GenerateStructured(ctx, []chatter.Message{{ + Role: chatter.RoleSystem, + Content: baseSystem, + }, { + Role: chatter.RoleUser, + Content: userContent, + }}, &chunkComments) + if err != nil { + return nil, err + } + allComments = append(allComments, chunkComments...) + } + + // De‑duplicate comments (keyed by file:line) + unique := make(map[string]Comment) + for _, c := range allComments { + key := fmt.Sprintf("%s:%d", c.File, c.Line) + unique[key] = c + } + for _, v := range unique { + comments = append(comments, v) + } return } + +// splitDiffIntoChunks splits a diff into chunks that do not exceed maxSize bytes. +// It tries to split on file boundaries ("diff --git") first, then on hunk boundaries (@@), +// and finally on a hard byte limit. +func splitDiffIntoChunks(diff []byte, maxSize int) []string { + if len(diff) <= maxSize { + return []string{string(diff)} + } + content := string(diff) + // Split by file headers + parts := strings.Split(content, "\ndiff --git ") + chunks := []string{} + var current strings.Builder + for idx, part := range parts { + seg := part + if idx != 0 { + // Preserve the leading newline that was removed by Split + seg = "\n" + "diff --git " + part + } + if current.Len()+len(seg) > maxSize && current.Len() > 0 { + chunks = append(chunks, current.String()) + current.Reset() + } + if len(seg) > maxSize { + // Split further by hunks + hunks := strings.Split(seg, "\n@@ ") + for j, h := range hunks { + hseg := h + if j != 0 { + hseg = "@@ " + h + } + if current.Len()+len(hseg) > maxSize && current.Len() > 0 { + chunks = append(chunks, current.String()) + current.Reset() + } + if len(hseg) > maxSize { + for len(hseg) > maxSize { + chunks = append(chunks, hseg[:maxSize]) + hseg = hseg[maxSize:] + } + current.WriteString(hseg) + } else { + current.WriteString(hseg) + } + } + } else { + current.WriteString(seg) + } + } + if current.Len() > 0 { + chunks = append(chunks, current.String()) + } + return chunks +} + diff --git a/internal/pierre/judge_test.go b/internal/pierre/judge_test.go new file mode 100644 index 0000000..d83bc6c --- /dev/null +++ b/internal/pierre/judge_test.go @@ -0,0 +1,127 @@ +package pierre + +import ( + "bytes" + "context" + "io" + "strings" + "testing" + + "git.schreifuchs.ch/schreifuchs/pierre-bot/internal/chatter" + "github.com/google/go-cmp/cmp" +) + +// mockChat implements the ChatAdapter interface for testing. +type mockChat struct{} + +func (m *mockChat) GenerateStructured(ctx context.Context, msgs []chatter.Message, target interface{}) error { + if cSlice, ok := target.(*[]Comment); ok { + *cSlice = []Comment{{File: "file.go", Line: 1, Message: "test comment"}} + return nil + } + return nil +} + +func (m *mockChat) GetProviderName() string { return "mock" } + +// mockGit implements the GitAdapter interface for testing. +type mockGit struct{} + +func (g *mockGit) GetDiff(ctx context.Context, owner, repo string, prID int) (io.ReadCloser, error) { + diff := "diff --git a/file1.go b/file1.go\n+line1\n" + "diff --git a/file2.go b/file2.go\n+line2\n" + return io.NopCloser(bytes.NewReader([]byte(diff))), nil +} + +func (g *mockGit) AddComment(ctx context.Context, owner, repo string, prID int, comment Comment) error { + return nil +} + +func TestSplitDiffIntoChunks(t *testing.T) { + cases := []struct { + name string + diff string + maxSize int + wantChunks int // 0 means we don't assert exact count + wantPrefixes []string + checkRecombine bool + }{ + { + name: "small diff", + diff: "diff --git a/file1.txt b/file1.txt\n+added line\n", + maxSize: 1000, + wantChunks: 1, + wantPrefixes: []string{"diff --git a/file1.txt"}, + checkRecombine: true, + }, + { + name: "multiple files", + diff: "diff --git a/file1.txt b/file1.txt\n+added line 1\n" + + "diff --git a/file2.txt b/file2.txt\n+added line 2\n", + maxSize: 50, + wantChunks: 2, + wantPrefixes: []string{"diff --git a/file1.txt", "diff --git a/file2.txt"}, + checkRecombine: false, + }, + { + name: "large single file", + diff: func() string { + line := "+very long added line that will be repeated many times to exceed the chunk size\n" + return "diff --git a/large.txt b/large.txt\n" + strings.Repeat(line, 200) + }(), + maxSize: 500, + wantChunks: 0, + wantPrefixes: nil, + checkRecombine: true, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + chunks := splitDiffIntoChunks([]byte(tc.diff), tc.maxSize) + if tc.wantChunks > 0 && len(chunks) != tc.wantChunks { + t.Fatalf("expected %d chunks, got %d", tc.wantChunks, len(chunks)) + } + for i, prefix := range tc.wantPrefixes { + if i >= len(chunks) { + t.Fatalf("missing chunk %d for prefix check", i) + } + trimmed := strings.TrimPrefix(chunks[i], "\n") + if !strings.HasPrefix(trimmed, prefix) { + t.Fatalf("chunk %d does not start with expected prefix %q: %s", i, prefix, chunks[i]) + } + } + for i, c := range chunks { + if tc.maxSize > 0 && len(c) > tc.maxSize { + t.Fatalf("chunk %d exceeds max size %d: %d", i, tc.maxSize, len(c)) + } + } + if tc.checkRecombine { + recombined := strings.Join(chunks, "") + if diff := cmp.Diff(tc.diff, recombined); diff != "" { + t.Fatalf("recombined diff differs:\n%s", diff) + } + } + }) + } +} + +func TestJudgePR_ChunkAggregationAndDeduplication(t *testing.T) { + svc := &Service{ + maxChunkSize: 50, + guidelines: nil, + git: &mockGit{}, + chat: &mockChat{}, + } + diffReader, err := svc.git.GetDiff(context.Background(), "", "", 0) + if err != nil { + t.Fatalf("failed to get diff: %v", err) + } + defer diffReader.Close() + comments, err := svc.judgePR(context.Background(), diffReader) + if err != nil { + t.Fatalf("judgePR error: %v", err) + } + if got, want := len(comments), 1; got != want { + t.Fatalf("expected %d comment after deduplication, got %d", want, got) + } +} diff --git a/internal/pierre/resource.go b/internal/pierre/resource.go index ecdc8b6..9cd952d 100644 --- a/internal/pierre/resource.go +++ b/internal/pierre/resource.go @@ -8,14 +8,20 @@ import ( ) type Service struct { - git GitAdapter - chat ChatAdapter + maxChunkSize int + guidelines []string + disableComments bool + git GitAdapter + chat ChatAdapter } -func New(chat ChatAdapter, git GitAdapter) *Service { +func New(chat ChatAdapter, git GitAdapter, maxChunkSize int, guidelines []string, disableComments bool) *Service { return &Service{ - git: git, - chat: chat, + git: git, + chat: chat, + maxChunkSize: maxChunkSize, + guidelines: guidelines, + disableComments: disableComments, } } diff --git a/internal/pierre/review.go b/internal/pierre/review.go index d3fb8c3..f287790 100644 --- a/internal/pierre/review.go +++ b/internal/pierre/review.go @@ -29,9 +29,12 @@ func (s *Service) MakeReview(ctx context.Context, organisation string, repo stri fmt.Printf("File: %s\nLine: %d\nMessage: %s\n%s\n", c.File, c.Line, c.Message, "---") - if err := s.git.AddComment(ctx, organisation, repo, prID, c); err != nil { + if s.disableComments { + log.Printf("dry-run: not posting comment for %s:%d", c.File, c.Line) + } else if err := s.git.AddComment(ctx, organisation, repo, prID, c); err != nil { log.Printf("Failed to add comment: %v", err) } + } return nil