From 985fac347c84b10c8f642f37ba206a1423d95205 Mon Sep 17 00:00:00 2001 From: u80864958 Date: Fri, 13 Feb 2026 16:18:49 +0100 Subject: [PATCH] feat(pierre): add diff chunking and configurable review settings --- GEMINI.md => AGENTS.md | 0 cmd/pierre/main.go | 8 +- go.mod | 3 +- internal/gitadapters/bitbucket/controller.go | 2 +- internal/pierre/judge.go | 121 ++++++++++++++++--- internal/pierre/judge_test.go | 57 +++++++++ internal/pierre/resource.go | 14 ++- 7 files changed, 182 insertions(+), 23 deletions(-) rename GEMINI.md => AGENTS.md (100%) create mode 100644 internal/pierre/judge_test.go diff --git a/GEMINI.md b/AGENTS.md similarity index 100% rename from GEMINI.md rename to AGENTS.md diff --git a/cmd/pierre/main.go b/cmd/pierre/main.go index 566f2e2..f158d12 100644 --- a/cmd/pierre/main.go +++ b/cmd/pierre/main.go @@ -37,7 +37,13 @@ type LLMConfig struct { Model string `help:"Model to use" env:"LLM_MODEL"` } +type ReviewConfig struct { + MaxChunkChars int `help:"Maximum diff chunk size in characters (default 60000)" default:"60000"` + Guidelines []string `help:"Project-specific review guidelines"` +} + type Config struct { + Review ReviewConfig `embed:"" prefix:"review-"` GitProvider string `help:"Git provider (bitbucket or gitea)" env:"GIT_PROVIDER"` Bitbucket BitbucketConfig `embed:"" prefix:"bitbucket-"` Gitea GiteaConfig `embed:"" prefix:"gitea-"` @@ -117,7 +123,7 @@ func main() { log.Fatalf("Error initializing AI: %v", err) } - pierreService := pierre.New(ai, git) + pierreService := pierre.New(ai, git, cfg.Review.MaxChunkChars, cfg.Review.Guidelines) if err := pierreService.MakeReview(context.Background(), cfg.Repo.Owner, cfg.Repo.Repo, cfg.Repo.PRID); err != nil { log.Fatalf("Error during review: %v", err) } diff --git a/go.mod b/go.mod index 9fee8d2..5bd5c91 100644 --- a/go.mod +++ b/go.mod @@ -7,7 +7,9 @@ require ( github.com/alecthomas/kong v1.14.0 github.com/alecthomas/kong-yaml v0.2.0 github.com/google/generative-ai-go v0.20.1 + github.com/google/go-cmp v0.7.0 github.com/ollama/ollama v0.16.0 + github.com/sashabaranov/go-openai v1.41.2 google.golang.org/api v0.186.0 ) @@ -34,7 +36,6 @@ require ( github.com/googleapis/gax-go/v2 v2.12.5 // indirect github.com/hashicorp/go-version v1.7.0 // indirect github.com/mailru/easyjson v0.7.7 // indirect - github.com/sashabaranov/go-openai v1.41.2 // indirect github.com/wk8/go-ordered-map/v2 v2.1.8 // indirect go.opencensus.io v0.24.0 // indirect go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.51.0 // indirect diff --git a/internal/gitadapters/bitbucket/controller.go b/internal/gitadapters/bitbucket/controller.go index 75b3a30..904efd0 100644 --- a/internal/gitadapters/bitbucket/controller.go +++ b/internal/gitadapters/bitbucket/controller.go @@ -31,7 +31,7 @@ func (b *BitbucketAdapter) GetDiff(ctx context.Context, projectKey, repositorySl if response.StatusCode != http.StatusOK { sb := &strings.Builder{} io.Copy(sb, response.Body) - err = fmt.Errorf("error while fetching bitbucket diff staus %d, body %s", response.Status, sb.String()) + err = fmt.Errorf("error while fetching bitbucket diff status %d, body %s", response.StatusCode, sb.String()) } diff = response.Body diff --git a/internal/pierre/judge.go b/internal/pierre/judge.go index 27c81fa..448b9f9 100644 --- a/internal/pierre/judge.go +++ b/internal/pierre/judge.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "io" + "strings" "git.schreifuchs.ch/schreifuchs/pierre-bot/internal/chatter" ) @@ -19,21 +20,111 @@ func (s *Service) judgePR(ctx context.Context, diff io.Reader) (comments []Comme if err != nil { return nil, fmt.Errorf("failed to read diff: %w", err) } - err = s.chat.GenerateStructured(ctx, []chatter.Message{ - { - Role: chatter.RoleSystem, - Content: ` - You are a very strict senior software architect. - You review **only** newly added or modified lines in a unified diff, together with the immediate hunk context. - You do **not** report issues that appear **solely** in deleted lines (“-”) or that have already been fixed by the change. - No comments are made on pure formatting/whitespace changes or reordering that does not alter the program’s behavior. - `, - }, - { - Role: chatter.RoleUser, - Content: fmt.Sprintf("Hello please review my PR. Write comments where improvements are necessary in new lines.\n Here is the git diff of it: %s", string(diffBytes)), - }, - }, &comments) + // Determine chunk size (use default if not set) + maxSize := s.maxChunkSize + if maxSize <= 0 { + maxSize = 60000 // default 60KB ~ 15k tokens + } + + chunks := splitDiffIntoChunks(diffBytes, maxSize) + allComments := []Comment{} + + // Build optional guidelines text + guidelinesText := "" + if len(s.guidelines) > 0 { + guidelinesText = "\nProject guidelines:\n" + for _, g := range s.guidelines { + guidelinesText += "- " + g + "\n" + } + } + + baseSystem := ` + You are a very strict senior software architect. + You review **only** newly added or modified lines in a unified diff, together with the immediate hunk context. + You do **not** report issues that appear **solely** in deleted lines (“-”) or that have already been fixed by the change. + No comments are made on pure formatting/whitespace changes or reordering that does not alter the program’s behavior.` + guidelinesText + ` + ` + + for i, chunk := range chunks { + // Add a small header so the model knows this is a fragment + header := fmt.Sprintf("\n--- Chunk %d of %d ---\n", i+1, len(chunks)) + userContent := fmt.Sprintf("Hello please review my PR. Write comments where improvements are necessary in new lines.%s\nHere is the git diff of it: %s", header, chunk) + + err = s.chat.GenerateStructured(ctx, []chatter.Message{{ + Role: chatter.RoleSystem, + Content: baseSystem, + }, { + Role: chatter.RoleUser, + Content: userContent, + }}, &comments) + if err != nil { + return nil, err + } + allComments = append(allComments, comments...) + } + + // De‑duplicate comments (keyed by file:line) + unique := make(map[string]Comment) + for _, c := range allComments { + key := fmt.Sprintf("%s:%d", c.File, c.Line) + unique[key] = c + } + for _, v := range unique { + comments = append(comments, v) + } return } + +// splitDiffIntoChunks splits a diff into chunks that do not exceed maxSize bytes. +// It tries to split on file boundaries ("diff --git") first, then on hunk boundaries (@@), +// and finally on a hard byte limit. +func splitDiffIntoChunks(diff []byte, maxSize int) []string { + if len(diff) <= maxSize { + return []string{string(diff)} + } + content := string(diff) + // Split by file headers + parts := strings.Split(content, "\ndiff --git ") + chunks := []string{} + var current strings.Builder + for idx, part := range parts { + seg := part + if idx != 0 { + seg = "diff --git " + part + } + if current.Len()+len(seg) > maxSize && current.Len() > 0 { + chunks = append(chunks, current.String()) + current.Reset() + } + if len(seg) > maxSize { + // Split further by hunks + hunks := strings.Split(seg, "\n@@ ") + for j, h := range hunks { + hseg := h + if j != 0 { + hseg = "@@ " + h + } + if current.Len()+len(hseg) > maxSize && current.Len() > 0 { + chunks = append(chunks, current.String()) + current.Reset() + } + if len(hseg) > maxSize { + for len(hseg) > maxSize { + chunks = append(chunks, hseg[:maxSize]) + hseg = hseg[maxSize:] + } + current.WriteString(hseg) + } else { + current.WriteString(hseg) + } + } + } else { + current.WriteString(seg) + } + } + if current.Len() > 0 { + chunks = append(chunks, current.String()) + } + return chunks +} diff --git a/internal/pierre/judge_test.go b/internal/pierre/judge_test.go new file mode 100644 index 0000000..0aee11c --- /dev/null +++ b/internal/pierre/judge_test.go @@ -0,0 +1,57 @@ +package pierre + +import ( + "github.com/google/go-cmp/cmp" + "strings" + "testing" +) + +func TestSplitDiffIntoChunks_SmallDiff(t *testing.T) { + diff := "diff --git a/file1.txt b/file1.txt\n+added line\n" + max := 1000 + chunks := splitDiffIntoChunks([]byte(diff), max) + if got, want := len(chunks), 1; got != want { + t.Fatalf("expected %d chunk, got %d", want, got) + } + if diff != chunks[0] { + t.Fatalf("chunk content changed: %s", diff) + } +} + +func TestSplitDiffIntoChunks_MultipleFiles(t *testing.T) { + diff := "diff --git a/file1.txt b/file1.txt\n+added line 1\n" + + "diff --git a/file2.txt b/file2.txt\n+added line 2\n" + max := 50 // each file diff is less than this, but total exceeds + chunks := splitDiffIntoChunks([]byte(diff), max) + if got, want := len(chunks), 2; got != want { + t.Fatalf("expected %d chunks, got %d", want, got) + } + // Ensure each chunk starts with the proper file header + if !strings.HasPrefix(chunks[0], "diff --git a/file1.txt") { + t.Fatalf("first chunk does not contain file1 header: %s", chunks[0]) + } + if !strings.HasPrefix(chunks[1], "diff --git a/file2.txt") { + t.Fatalf("second chunk does not contain file2 header: %s", chunks[1]) + } +} + +func TestSplitDiffIntoChunks_LargeSingleFile(t *testing.T) { + // Create a diff that exceeds maxSize by repeating a line + line := "+very long added line that will be repeated many times to exceed the chunk size\n" + diff := "diff --git a/large.txt b/large.txt\n" + strings.Repeat(line, 200) + max := 500 // small limit + chunks := splitDiffIntoChunks([]byte(diff), max) + // Ensure no chunk exceeds max size + for i, c := range chunks { + if len(c) > max { + t.Fatalf("chunk %d exceeds max size: %d > %d", i, len(c), max) + } + } + // Recombine chunks and compare to original (ignoring possible split boundaries) + recombined := strings.Join(chunks, "") + if diff != recombined { + if d := cmp.Diff(diff, recombined); d != "" { + t.Fatalf("recombined diff differs:\n%s", d) + } + } +} diff --git a/internal/pierre/resource.go b/internal/pierre/resource.go index ecdc8b6..b562d53 100644 --- a/internal/pierre/resource.go +++ b/internal/pierre/resource.go @@ -8,14 +8,18 @@ import ( ) type Service struct { - git GitAdapter - chat ChatAdapter + maxChunkSize int + guidelines []string + git GitAdapter + chat ChatAdapter } -func New(chat ChatAdapter, git GitAdapter) *Service { +func New(chat ChatAdapter, git GitAdapter, maxChunkSize int, guidelines []string) *Service { return &Service{ - git: git, - chat: chat, + git: git, + chat: chat, + maxChunkSize: maxChunkSize, + guidelines: guidelines, } }