Compare commits

..

1 Commits

Author SHA1 Message Date
u80864958
61d538d4a5 feat(pierre): add diff chunking and configurable review settings 2026-02-13 17:00:18 +01:00
5 changed files with 142 additions and 64 deletions

View File

@@ -40,6 +40,7 @@ type LLMConfig struct {
type ReviewConfig struct { type ReviewConfig struct {
MaxChunkChars int `help:"Maximum diff chunk size in characters (default 60000)" default:"60000"` MaxChunkChars int `help:"Maximum diff chunk size in characters (default 60000)" default:"60000"`
Guidelines []string `help:"Project-specific review guidelines"` Guidelines []string `help:"Project-specific review guidelines"`
DisableComments bool `help:"Do not post comments to the Git provider (dryrun mode)"`
} }
type Config struct { type Config struct {
@@ -123,7 +124,7 @@ func main() {
log.Fatalf("Error initializing AI: %v", err) log.Fatalf("Error initializing AI: %v", err)
} }
pierreService := pierre.New(ai, git, cfg.Review.MaxChunkChars, cfg.Review.Guidelines) pierreService := pierre.New(ai, git, cfg.Review.MaxChunkChars, cfg.Review.Guidelines, cfg.Review.DisableComments)
if err := pierreService.MakeReview(context.Background(), cfg.Repo.Owner, cfg.Repo.Repo, cfg.Repo.PRID); err != nil { if err := pierreService.MakeReview(context.Background(), cfg.Repo.Owner, cfg.Repo.Repo, cfg.Repo.PRID); err != nil {
log.Fatalf("Error during review: %v", err) log.Fatalf("Error during review: %v", err)
} }

View File

@@ -39,29 +39,29 @@ func (s *Service) judgePR(ctx context.Context, diff io.Reader) (comments []Comme
} }
} }
baseSystem := ` baseSystem := strings.TrimSpace(`
You are a very strict senior software architect. You are a very strict senior software architect.
You review **only** newly added or modified lines in a unified diff, together with the immediate hunk context. You review **only** newly added or modified lines in a unified diff, together with the immediate hunk context.
You do **not** report issues that appear **solely** in deleted lines (“-) or that have already been fixed by the change. You do **not** report issues that appear **solely** in deleted lines (“-") or that have already been fixed by the change.
No comments are made on pure formatting/whitespace changes or reordering that does not alter the programs behavior.` + guidelinesText + ` No comments are made on pure formatting/whitespace changes or reordering that does not alter the programs behavior.`) + guidelinesText
`
for i, chunk := range chunks { for i, chunk := range chunks {
// Add a small header so the model knows this is a fragment // Add a small header so the model knows this is a fragment
header := fmt.Sprintf("\n--- Chunk %d of %d ---\n", i+1, len(chunks)) header := fmt.Sprintf("\n--- Chunk %d of %d ---\n", i+1, len(chunks))
userContent := fmt.Sprintf("Hello please review my PR. Write comments where improvements are necessary in new lines.%s\nHere is the git diff of it: %s", header, chunk) userContent := fmt.Sprintf("Hello please review my PR. Write comments where improvements are necessary in new lines.%s\nHere is the git diff of it: %s", header, chunk)
var chunkComments []Comment
err = s.chat.GenerateStructured(ctx, []chatter.Message{{ err = s.chat.GenerateStructured(ctx, []chatter.Message{{
Role: chatter.RoleSystem, Role: chatter.RoleSystem,
Content: baseSystem, Content: baseSystem,
}, { }, {
Role: chatter.RoleUser, Role: chatter.RoleUser,
Content: userContent, Content: userContent,
}}, &comments) }}, &chunkComments)
if err != nil { if err != nil {
return nil, err return nil, err
} }
allComments = append(allComments, comments...) allComments = append(allComments, chunkComments...)
} }
// Deduplicate comments (keyed by file:line) // Deduplicate comments (keyed by file:line)
@@ -91,7 +91,8 @@ func splitDiffIntoChunks(diff []byte, maxSize int) []string {
for idx, part := range parts { for idx, part := range parts {
seg := part seg := part
if idx != 0 { if idx != 0 {
seg = "diff --git " + part // Preserve the leading newline that was removed by Split
seg = "\n" + "diff --git " + part
} }
if current.Len()+len(seg) > maxSize && current.Len() > 0 { if current.Len()+len(seg) > maxSize && current.Len() > 0 {
chunks = append(chunks, current.String()) chunks = append(chunks, current.String())
@@ -128,3 +129,4 @@ func splitDiffIntoChunks(diff []byte, maxSize int) []string {
} }
return chunks return chunks
} }

View File

@@ -1,57 +1,127 @@
package pierre package pierre
import ( import (
"github.com/google/go-cmp/cmp" "bytes"
"context"
"io"
"strings" "strings"
"testing" "testing"
"git.schreifuchs.ch/schreifuchs/pierre-bot/internal/chatter"
"github.com/google/go-cmp/cmp"
) )
func TestSplitDiffIntoChunks_SmallDiff(t *testing.T) { // mockChat implements the ChatAdapter interface for testing.
diff := "diff --git a/file1.txt b/file1.txt\n+added line\n" type mockChat struct{}
max := 1000
chunks := splitDiffIntoChunks([]byte(diff), max) func (m *mockChat) GenerateStructured(ctx context.Context, msgs []chatter.Message, target interface{}) error {
if got, want := len(chunks), 1; got != want { if cSlice, ok := target.(*[]Comment); ok {
t.Fatalf("expected %d chunk, got %d", want, got) *cSlice = []Comment{{File: "file.go", Line: 1, Message: "test comment"}}
} return nil
if diff != chunks[0] {
t.Fatalf("chunk content changed: %s", diff)
} }
return nil
} }
func TestSplitDiffIntoChunks_MultipleFiles(t *testing.T) { func (m *mockChat) GetProviderName() string { return "mock" }
diff := "diff --git a/file1.txt b/file1.txt\n+added line 1\n" +
"diff --git a/file2.txt b/file2.txt\n+added line 2\n" // mockGit implements the GitAdapter interface for testing.
max := 50 // each file diff is less than this, but total exceeds type mockGit struct{}
chunks := splitDiffIntoChunks([]byte(diff), max)
if got, want := len(chunks), 2; got != want { func (g *mockGit) GetDiff(ctx context.Context, owner, repo string, prID int) (io.ReadCloser, error) {
t.Fatalf("expected %d chunks, got %d", want, got) diff := "diff --git a/file1.go b/file1.go\n+line1\n" + "diff --git a/file2.go b/file2.go\n+line2\n"
} return io.NopCloser(bytes.NewReader([]byte(diff))), nil
// Ensure each chunk starts with the proper file header
if !strings.HasPrefix(chunks[0], "diff --git a/file1.txt") {
t.Fatalf("first chunk does not contain file1 header: %s", chunks[0])
}
if !strings.HasPrefix(chunks[1], "diff --git a/file2.txt") {
t.Fatalf("second chunk does not contain file2 header: %s", chunks[1])
}
} }
func TestSplitDiffIntoChunks_LargeSingleFile(t *testing.T) { func (g *mockGit) AddComment(ctx context.Context, owner, repo string, prID int, comment Comment) error {
// Create a diff that exceeds maxSize by repeating a line return nil
}
func TestSplitDiffIntoChunks(t *testing.T) {
cases := []struct {
name string
diff string
maxSize int
wantChunks int // 0 means we don't assert exact count
wantPrefixes []string
checkRecombine bool
}{
{
name: "small diff",
diff: "diff --git a/file1.txt b/file1.txt\n+added line\n",
maxSize: 1000,
wantChunks: 1,
wantPrefixes: []string{"diff --git a/file1.txt"},
checkRecombine: true,
},
{
name: "multiple files",
diff: "diff --git a/file1.txt b/file1.txt\n+added line 1\n" +
"diff --git a/file2.txt b/file2.txt\n+added line 2\n",
maxSize: 50,
wantChunks: 2,
wantPrefixes: []string{"diff --git a/file1.txt", "diff --git a/file2.txt"},
checkRecombine: false,
},
{
name: "large single file",
diff: func() string {
line := "+very long added line that will be repeated many times to exceed the chunk size\n" line := "+very long added line that will be repeated many times to exceed the chunk size\n"
diff := "diff --git a/large.txt b/large.txt\n" + strings.Repeat(line, 200) return "diff --git a/large.txt b/large.txt\n" + strings.Repeat(line, 200)
max := 500 // small limit }(),
chunks := splitDiffIntoChunks([]byte(diff), max) maxSize: 500,
// Ensure no chunk exceeds max size wantChunks: 0,
wantPrefixes: nil,
checkRecombine: true,
},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
chunks := splitDiffIntoChunks([]byte(tc.diff), tc.maxSize)
if tc.wantChunks > 0 && len(chunks) != tc.wantChunks {
t.Fatalf("expected %d chunks, got %d", tc.wantChunks, len(chunks))
}
for i, prefix := range tc.wantPrefixes {
if i >= len(chunks) {
t.Fatalf("missing chunk %d for prefix check", i)
}
trimmed := strings.TrimPrefix(chunks[i], "\n")
if !strings.HasPrefix(trimmed, prefix) {
t.Fatalf("chunk %d does not start with expected prefix %q: %s", i, prefix, chunks[i])
}
}
for i, c := range chunks { for i, c := range chunks {
if len(c) > max { if tc.maxSize > 0 && len(c) > tc.maxSize {
t.Fatalf("chunk %d exceeds max size: %d > %d", i, len(c), max) t.Fatalf("chunk %d exceeds max size %d: %d", i, tc.maxSize, len(c))
} }
} }
// Recombine chunks and compare to original (ignoring possible split boundaries) if tc.checkRecombine {
recombined := strings.Join(chunks, "") recombined := strings.Join(chunks, "")
if diff != recombined { if diff := cmp.Diff(tc.diff, recombined); diff != "" {
if d := cmp.Diff(diff, recombined); d != "" { t.Fatalf("recombined diff differs:\n%s", diff)
t.Fatalf("recombined diff differs:\n%s", d)
} }
} }
})
}
}
func TestJudgePR_ChunkAggregationAndDeduplication(t *testing.T) {
svc := &Service{
maxChunkSize: 50,
guidelines: nil,
git: &mockGit{},
chat: &mockChat{},
}
diffReader, err := svc.git.GetDiff(context.Background(), "", "", 0)
if err != nil {
t.Fatalf("failed to get diff: %v", err)
}
defer diffReader.Close()
comments, err := svc.judgePR(context.Background(), diffReader)
if err != nil {
t.Fatalf("judgePR error: %v", err)
}
if got, want := len(comments), 1; got != want {
t.Fatalf("expected %d comment after deduplication, got %d", want, got)
}
} }

View File

@@ -10,16 +10,18 @@ import (
type Service struct { type Service struct {
maxChunkSize int maxChunkSize int
guidelines []string guidelines []string
disableComments bool
git GitAdapter git GitAdapter
chat ChatAdapter chat ChatAdapter
} }
func New(chat ChatAdapter, git GitAdapter, maxChunkSize int, guidelines []string) *Service { func New(chat ChatAdapter, git GitAdapter, maxChunkSize int, guidelines []string, disableComments bool) *Service {
return &Service{ return &Service{
git: git, git: git,
chat: chat, chat: chat,
maxChunkSize: maxChunkSize, maxChunkSize: maxChunkSize,
guidelines: guidelines, guidelines: guidelines,
disableComments: disableComments,
} }
} }

View File

@@ -29,9 +29,12 @@ func (s *Service) MakeReview(ctx context.Context, organisation string, repo stri
fmt.Printf("File: %s\nLine: %d\nMessage: %s\n%s\n", fmt.Printf("File: %s\nLine: %d\nMessage: %s\n%s\n",
c.File, c.Line, c.Message, "---") c.File, c.Line, c.Message, "---")
if err := s.git.AddComment(ctx, organisation, repo, prID, c); err != nil { if s.disableComments {
log.Printf("dry-run: not posting comment for %s:%d", c.File, c.Line)
} else if err := s.git.AddComment(ctx, organisation, repo, prID, c); err != nil {
log.Printf("Failed to add comment: %v", err) log.Printf("Failed to add comment: %v", err)
} }
} }
return nil return nil