Compare commits

..

1 Commits

Author SHA1 Message Date
u80864958
985fac347c feat(pierre): add diff chunking and configurable review settings 2026-02-13 16:21:39 +01:00
5 changed files with 64 additions and 142 deletions

View File

@@ -38,9 +38,8 @@ type LLMConfig struct {
} }
type ReviewConfig struct { type ReviewConfig struct {
MaxChunkChars int `help:"Maximum diff chunk size in characters (default 60000)" default:"60000"` MaxChunkChars int `help:"Maximum diff chunk size in characters (default 60000)" default:"60000"`
Guidelines []string `help:"Project-specific review guidelines"` Guidelines []string `help:"Project-specific review guidelines"`
DisableComments bool `help:"Do not post comments to the Git provider (dryrun mode)"`
} }
type Config struct { type Config struct {
@@ -124,7 +123,7 @@ func main() {
log.Fatalf("Error initializing AI: %v", err) log.Fatalf("Error initializing AI: %v", err)
} }
pierreService := pierre.New(ai, git, cfg.Review.MaxChunkChars, cfg.Review.Guidelines, cfg.Review.DisableComments) pierreService := pierre.New(ai, git, cfg.Review.MaxChunkChars, cfg.Review.Guidelines)
if err := pierreService.MakeReview(context.Background(), cfg.Repo.Owner, cfg.Repo.Repo, cfg.Repo.PRID); err != nil { if err := pierreService.MakeReview(context.Background(), cfg.Repo.Owner, cfg.Repo.Repo, cfg.Repo.PRID); err != nil {
log.Fatalf("Error during review: %v", err) log.Fatalf("Error during review: %v", err)
} }

View File

@@ -39,29 +39,29 @@ func (s *Service) judgePR(ctx context.Context, diff io.Reader) (comments []Comme
} }
} }
baseSystem := strings.TrimSpace(` baseSystem := `
You are a very strict senior software architect. You are a very strict senior software architect.
You review **only** newly added or modified lines in a unified diff, together with the immediate hunk context. You review **only** newly added or modified lines in a unified diff, together with the immediate hunk context.
You do **not** report issues that appear **solely** in deleted lines (“-") or that have already been fixed by the change. You do **not** report issues that appear **solely** in deleted lines (“-) or that have already been fixed by the change.
No comments are made on pure formatting/whitespace changes or reordering that does not alter the programs behavior.`) + guidelinesText No comments are made on pure formatting/whitespace changes or reordering that does not alter the programs behavior.` + guidelinesText + `
`
for i, chunk := range chunks { for i, chunk := range chunks {
// Add a small header so the model knows this is a fragment // Add a small header so the model knows this is a fragment
header := fmt.Sprintf("\n--- Chunk %d of %d ---\n", i+1, len(chunks)) header := fmt.Sprintf("\n--- Chunk %d of %d ---\n", i+1, len(chunks))
userContent := fmt.Sprintf("Hello please review my PR. Write comments where improvements are necessary in new lines.%s\nHere is the git diff of it: %s", header, chunk) userContent := fmt.Sprintf("Hello please review my PR. Write comments where improvements are necessary in new lines.%s\nHere is the git diff of it: %s", header, chunk)
var chunkComments []Comment
err = s.chat.GenerateStructured(ctx, []chatter.Message{{ err = s.chat.GenerateStructured(ctx, []chatter.Message{{
Role: chatter.RoleSystem, Role: chatter.RoleSystem,
Content: baseSystem, Content: baseSystem,
}, { }, {
Role: chatter.RoleUser, Role: chatter.RoleUser,
Content: userContent, Content: userContent,
}}, &chunkComments) }}, &comments)
if err != nil { if err != nil {
return nil, err return nil, err
} }
allComments = append(allComments, chunkComments...) allComments = append(allComments, comments...)
} }
// Deduplicate comments (keyed by file:line) // Deduplicate comments (keyed by file:line)
@@ -91,8 +91,7 @@ func splitDiffIntoChunks(diff []byte, maxSize int) []string {
for idx, part := range parts { for idx, part := range parts {
seg := part seg := part
if idx != 0 { if idx != 0 {
// Preserve the leading newline that was removed by Split seg = "diff --git " + part
seg = "\n" + "diff --git " + part
} }
if current.Len()+len(seg) > maxSize && current.Len() > 0 { if current.Len()+len(seg) > maxSize && current.Len() > 0 {
chunks = append(chunks, current.String()) chunks = append(chunks, current.String())
@@ -129,4 +128,3 @@ func splitDiffIntoChunks(diff []byte, maxSize int) []string {
} }
return chunks return chunks
} }

View File

@@ -1,127 +1,57 @@
package pierre package pierre
import ( import (
"bytes" "github.com/google/go-cmp/cmp"
"context"
"io"
"strings" "strings"
"testing" "testing"
"git.schreifuchs.ch/schreifuchs/pierre-bot/internal/chatter"
"github.com/google/go-cmp/cmp"
) )
// mockChat implements the ChatAdapter interface for testing. func TestSplitDiffIntoChunks_SmallDiff(t *testing.T) {
type mockChat struct{} diff := "diff --git a/file1.txt b/file1.txt\n+added line\n"
max := 1000
func (m *mockChat) GenerateStructured(ctx context.Context, msgs []chatter.Message, target interface{}) error { chunks := splitDiffIntoChunks([]byte(diff), max)
if cSlice, ok := target.(*[]Comment); ok { if got, want := len(chunks), 1; got != want {
*cSlice = []Comment{{File: "file.go", Line: 1, Message: "test comment"}} t.Fatalf("expected %d chunk, got %d", want, got)
return nil
} }
return nil if diff != chunks[0] {
} t.Fatalf("chunk content changed: %s", diff)
func (m *mockChat) GetProviderName() string { return "mock" }
// mockGit implements the GitAdapter interface for testing.
type mockGit struct{}
func (g *mockGit) GetDiff(ctx context.Context, owner, repo string, prID int) (io.ReadCloser, error) {
diff := "diff --git a/file1.go b/file1.go\n+line1\n" + "diff --git a/file2.go b/file2.go\n+line2\n"
return io.NopCloser(bytes.NewReader([]byte(diff))), nil
}
func (g *mockGit) AddComment(ctx context.Context, owner, repo string, prID int, comment Comment) error {
return nil
}
func TestSplitDiffIntoChunks(t *testing.T) {
cases := []struct {
name string
diff string
maxSize int
wantChunks int // 0 means we don't assert exact count
wantPrefixes []string
checkRecombine bool
}{
{
name: "small diff",
diff: "diff --git a/file1.txt b/file1.txt\n+added line\n",
maxSize: 1000,
wantChunks: 1,
wantPrefixes: []string{"diff --git a/file1.txt"},
checkRecombine: true,
},
{
name: "multiple files",
diff: "diff --git a/file1.txt b/file1.txt\n+added line 1\n" +
"diff --git a/file2.txt b/file2.txt\n+added line 2\n",
maxSize: 50,
wantChunks: 2,
wantPrefixes: []string{"diff --git a/file1.txt", "diff --git a/file2.txt"},
checkRecombine: false,
},
{
name: "large single file",
diff: func() string {
line := "+very long added line that will be repeated many times to exceed the chunk size\n"
return "diff --git a/large.txt b/large.txt\n" + strings.Repeat(line, 200)
}(),
maxSize: 500,
wantChunks: 0,
wantPrefixes: nil,
checkRecombine: true,
},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
chunks := splitDiffIntoChunks([]byte(tc.diff), tc.maxSize)
if tc.wantChunks > 0 && len(chunks) != tc.wantChunks {
t.Fatalf("expected %d chunks, got %d", tc.wantChunks, len(chunks))
}
for i, prefix := range tc.wantPrefixes {
if i >= len(chunks) {
t.Fatalf("missing chunk %d for prefix check", i)
}
trimmed := strings.TrimPrefix(chunks[i], "\n")
if !strings.HasPrefix(trimmed, prefix) {
t.Fatalf("chunk %d does not start with expected prefix %q: %s", i, prefix, chunks[i])
}
}
for i, c := range chunks {
if tc.maxSize > 0 && len(c) > tc.maxSize {
t.Fatalf("chunk %d exceeds max size %d: %d", i, tc.maxSize, len(c))
}
}
if tc.checkRecombine {
recombined := strings.Join(chunks, "")
if diff := cmp.Diff(tc.diff, recombined); diff != "" {
t.Fatalf("recombined diff differs:\n%s", diff)
}
}
})
} }
} }
func TestJudgePR_ChunkAggregationAndDeduplication(t *testing.T) { func TestSplitDiffIntoChunks_MultipleFiles(t *testing.T) {
svc := &Service{ diff := "diff --git a/file1.txt b/file1.txt\n+added line 1\n" +
maxChunkSize: 50, "diff --git a/file2.txt b/file2.txt\n+added line 2\n"
guidelines: nil, max := 50 // each file diff is less than this, but total exceeds
git: &mockGit{}, chunks := splitDiffIntoChunks([]byte(diff), max)
chat: &mockChat{}, if got, want := len(chunks), 2; got != want {
t.Fatalf("expected %d chunks, got %d", want, got)
} }
diffReader, err := svc.git.GetDiff(context.Background(), "", "", 0) // Ensure each chunk starts with the proper file header
if err != nil { if !strings.HasPrefix(chunks[0], "diff --git a/file1.txt") {
t.Fatalf("failed to get diff: %v", err) t.Fatalf("first chunk does not contain file1 header: %s", chunks[0])
} }
defer diffReader.Close() if !strings.HasPrefix(chunks[1], "diff --git a/file2.txt") {
comments, err := svc.judgePR(context.Background(), diffReader) t.Fatalf("second chunk does not contain file2 header: %s", chunks[1])
if err != nil { }
t.Fatalf("judgePR error: %v", err) }
}
if got, want := len(comments), 1; got != want { func TestSplitDiffIntoChunks_LargeSingleFile(t *testing.T) {
t.Fatalf("expected %d comment after deduplication, got %d", want, got) // Create a diff that exceeds maxSize by repeating a line
line := "+very long added line that will be repeated many times to exceed the chunk size\n"
diff := "diff --git a/large.txt b/large.txt\n" + strings.Repeat(line, 200)
max := 500 // small limit
chunks := splitDiffIntoChunks([]byte(diff), max)
// Ensure no chunk exceeds max size
for i, c := range chunks {
if len(c) > max {
t.Fatalf("chunk %d exceeds max size: %d > %d", i, len(c), max)
}
}
// Recombine chunks and compare to original (ignoring possible split boundaries)
recombined := strings.Join(chunks, "")
if diff != recombined {
if d := cmp.Diff(diff, recombined); d != "" {
t.Fatalf("recombined diff differs:\n%s", d)
}
} }
} }

View File

@@ -8,20 +8,18 @@ import (
) )
type Service struct { type Service struct {
maxChunkSize int maxChunkSize int
guidelines []string guidelines []string
disableComments bool git GitAdapter
git GitAdapter chat ChatAdapter
chat ChatAdapter
} }
func New(chat ChatAdapter, git GitAdapter, maxChunkSize int, guidelines []string, disableComments bool) *Service { func New(chat ChatAdapter, git GitAdapter, maxChunkSize int, guidelines []string) *Service {
return &Service{ return &Service{
git: git, git: git,
chat: chat, chat: chat,
maxChunkSize: maxChunkSize, maxChunkSize: maxChunkSize,
guidelines: guidelines, guidelines: guidelines,
disableComments: disableComments,
} }
} }

View File

@@ -29,12 +29,9 @@ func (s *Service) MakeReview(ctx context.Context, organisation string, repo stri
fmt.Printf("File: %s\nLine: %d\nMessage: %s\n%s\n", fmt.Printf("File: %s\nLine: %d\nMessage: %s\n%s\n",
c.File, c.Line, c.Message, "---") c.File, c.Line, c.Message, "---")
if s.disableComments { if err := s.git.AddComment(ctx, organisation, repo, prID, c); err != nil {
log.Printf("dry-run: not posting comment for %s:%d", c.File, c.Line)
} else if err := s.git.AddComment(ctx, organisation, repo, prID, c); err != nil {
log.Printf("Failed to add comment: %v", err) log.Printf("Failed to add comment: %v", err)
} }
} }
return nil return nil