feat(pierre): add diff chunking and configurable review settings #2
@@ -37,7 +37,19 @@ type LLMConfig struct {
|
|||||||
Model string `help:"Model to use" env:"LLM_MODEL"`
|
Model string `help:"Model to use" env:"LLM_MODEL"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ReviewConfig holds the review‑specific CLI options.
|
||||||
|
// The `default:"60000"` tag sets an integer default of 60 KB – Kong parses the string value into the int field, which can be confusing for readers.
|
||||||
|
type ReviewConfig struct {
|
||||||
|
MaxChunkSize int `help:"Maximum diff chunk size in bytes" default:"60000"`
|
||||||
|
Guidelines []string `help:"Project guidelines to prepend" sep:","`
|
||||||
|
DisableComments bool `help:"Disable posting comments (dry run)"`
|
||||||
|
}
|
||||||
|
|
||||||
type Config struct {
|
type Config struct {
|
||||||
|
// Embedding ReviewConfig with a prefix changes flag names to `--review-…`.
|
||||||
|
// Existing configuration files using the old flag names will need to be updated.
|
||||||
|
// Consider keeping backwards compatibility if required.
|
||||||
|
Review ReviewConfig `embed:"" prefix:"review-"`
|
||||||
GitProvider string `help:"Git provider (bitbucket or gitea)" env:"GIT_PROVIDER"`
|
GitProvider string `help:"Git provider (bitbucket or gitea)" env:"GIT_PROVIDER"`
|
||||||
Bitbucket BitbucketConfig `embed:"" prefix:"bitbucket-"`
|
Bitbucket BitbucketConfig `embed:"" prefix:"bitbucket-"`
|
||||||
Gitea GiteaConfig `embed:"" prefix:"gitea-"`
|
Gitea GiteaConfig `embed:"" prefix:"gitea-"`
|
||||||
@@ -117,7 +129,7 @@ func main() {
|
|||||||
log.Fatalf("Error initializing AI: %v", err)
|
log.Fatalf("Error initializing AI: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
pierreService := pierre.New(ai, git)
|
pierreService := pierre.New(ai, git, cfg.Review.MaxChunkSize, cfg.Review.Guidelines, cfg.Review.DisableComments)
|
||||||
if err := pierreService.MakeReview(context.Background(), cfg.Repo.Owner, cfg.Repo.Repo, cfg.Repo.PRID); err != nil {
|
if err := pierreService.MakeReview(context.Background(), cfg.Repo.Owner, cfg.Repo.Repo, cfg.Repo.PRID); err != nil {
|
||||||
log.Fatalf("Error during review: %v", err)
|
log.Fatalf("Error during review: %v", err)
|
||||||
}
|
}
|
||||||
|
|||||||
3
go.mod
3
go.mod
@@ -7,7 +7,9 @@ require (
|
|||||||
github.com/alecthomas/kong v1.14.0
|
github.com/alecthomas/kong v1.14.0
|
||||||
github.com/alecthomas/kong-yaml v0.2.0
|
github.com/alecthomas/kong-yaml v0.2.0
|
||||||
github.com/google/generative-ai-go v0.20.1
|
github.com/google/generative-ai-go v0.20.1
|
||||||
|
schreifuchs marked this conversation as resolved
|
|||||||
|
github.com/google/go-cmp v0.7.0
|
||||||
github.com/ollama/ollama v0.16.0
|
github.com/ollama/ollama v0.16.0
|
||||||
|
github.com/sashabaranov/go-openai v1.41.2
|
||||||
google.golang.org/api v0.186.0
|
google.golang.org/api v0.186.0
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -34,7 +36,6 @@ require (
|
|||||||
github.com/googleapis/gax-go/v2 v2.12.5 // indirect
|
github.com/googleapis/gax-go/v2 v2.12.5 // indirect
|
||||||
github.com/hashicorp/go-version v1.7.0 // indirect
|
github.com/hashicorp/go-version v1.7.0 // indirect
|
||||||
github.com/mailru/easyjson v0.7.7 // indirect
|
github.com/mailru/easyjson v0.7.7 // indirect
|
||||||
github.com/sashabaranov/go-openai v1.41.2 // indirect
|
|
||||||
github.com/wk8/go-ordered-map/v2 v2.1.8 // indirect
|
github.com/wk8/go-ordered-map/v2 v2.1.8 // indirect
|
||||||
go.opencensus.io v0.24.0 // indirect
|
go.opencensus.io v0.24.0 // indirect
|
||||||
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.51.0 // indirect
|
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.51.0 // indirect
|
||||||
|
|||||||
@@ -31,7 +31,7 @@ func (b *BitbucketAdapter) GetDiff(ctx context.Context, projectKey, repositorySl
|
|||||||
if response.StatusCode != http.StatusOK {
|
if response.StatusCode != http.StatusOK {
|
||||||
sb := &strings.Builder{}
|
sb := &strings.Builder{}
|
||||||
io.Copy(sb, response.Body)
|
io.Copy(sb, response.Body)
|
||||||
err = fmt.Errorf("error while fetching bitbucket diff staus %d, body %s", response.Status, sb.String())
|
err = fmt.Errorf("error while fetching bitbucket diff status %d, body %s", response.StatusCode, sb.String())
|
||||||
}
|
}
|
||||||
|
|
||||||
diff = response.Body
|
diff = response.Body
|
||||||
|
|||||||
@@ -4,10 +4,14 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
|
"strings"
|
||||||
|
|
||||||
"git.schreifuchs.ch/schreifuchs/pierre-bot/internal/chatter"
|
"git.schreifuchs.ch/schreifuchs/pierre-bot/internal/chatter"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// DefaultChunkSize is the fallback maximum size (in bytes) for a diff chunk when no explicit value is configured.
|
||||||
|
const DefaultChunkSize = 60000
|
||||||
|
|
||||||
type Comment struct {
|
type Comment struct {
|
||||||
File string `json:"file"`
|
File string `json:"file"`
|
||||||
Line int `json:"line"`
|
Line int `json:"line"`
|
||||||
@@ -19,21 +23,121 @@ func (s *Service) judgePR(ctx context.Context, diff io.Reader) (comments []Comme
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to read diff: %w", err)
|
return nil, fmt.Errorf("failed to read diff: %w", err)
|
||||||
}
|
}
|
||||||
err = s.chat.GenerateStructured(ctx, []chatter.Message{
|
|
||||||
{
|
|
||||||
Role: chatter.RoleSystem,
|
|
||||||
Content: `
|
|
||||||
You are a very strict senior software architect.
|
|
||||||
You review **only** newly added or modified lines in a unified diff, together with the immediate hunk context.
|
|
||||||
You do **not** report issues that appear **solely** in deleted lines (“-”) or that have already been fixed by the change.
|
|
||||||
No comments are made on pure formatting/whitespace changes or reordering that does not alter the program’s behavior.
|
|
||||||
`,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
Role: chatter.RoleUser,
|
|
||||||
Content: fmt.Sprintf("Hello please review my PR. Write comments where improvements are necessary in new lines.\n Here is the git diff of it: %s", string(diffBytes)),
|
|
||||||
},
|
|
||||||
}, &comments)
|
|
||||||
|
|
||||||
|
// Determine chunk size (use default if not set)
|
||||||
|
maxSize := s.maxChunkSize
|
||||||
|
if maxSize <= 0 {
|
||||||
|
maxSize = DefaultChunkSize // default 60KB ~ 15k tokens
|
||||||
|
}
|
||||||
|
|
||||||
|
chunks := splitDiffIntoChunks(diffBytes, maxSize)
|
||||||
|
allComments := []Comment{}
|
||||||
|
|
||||||
|
// Build optional guidelines text (added as a separate section with a clear delimiter)
|
||||||
|
guidelinesText := ""
|
||||||
|
if len(s.guidelines) > 0 {
|
||||||
|
// Two newlines ensure the guidelines start on a fresh paragraph.
|
||||||
|
guidelinesText = "\n\nProject guidelines:\n"
|
||||||
|
for _, g := range s.guidelines {
|
||||||
|
guidelinesText += "- " + g + "\n"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// System prompt that instructs the LLM precisely.
|
||||||
|
baseSystem := strings.TrimSpace(`
|
||||||
|
You are a strict senior software architect.
|
||||||
|
Only comment on newly added or modified lines in the diff; ignore deletions, pure formatting, or re‑ordering that does not change behavior.
|
||||||
|
For each issue output a JSON object with fields "file", "line", and "message" (message should be concise, ≤2 sentences, and actionable).
|
||||||
|
If project guidelines are provided, treat them as hard rules that must be respected.`) + guidelinesText
|
||||||
|
|
||||||
|
for i, chunk := range chunks {
|
||||||
|
// Include the chunk identifier in the system message only if there are multiple chunks.
|
||||||
|
systemContent := baseSystem
|
||||||
|
if len(chunks) > 1 {
|
||||||
|
systemContent = fmt.Sprintf("%s\nChunk %d of %d.", baseSystem, i+1, len(chunks))
|
||||||
|
}
|
||||||
|
userContent := chunk
|
||||||
|
|
||||||
|
var chunkComments []Comment
|
||||||
|
err = s.chat.GenerateStructured(ctx, []chatter.Message{{
|
||||||
|
Role: chatter.RoleSystem,
|
||||||
|
Content: systemContent,
|
||||||
|
}, {
|
||||||
|
Role: chatter.RoleUser,
|
||||||
|
Content: userContent,
|
||||||
|
}}, &chunkComments)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
allComments = append(allComments, chunkComments...)
|
||||||
|
}
|
||||||
|
|
||||||
|
// De‑duplicate comments (keyed by file:line)
|
||||||
|
unique := make(map[string]Comment)
|
||||||
|
for _, c := range allComments {
|
||||||
|
key := fmt.Sprintf("%s:%d", c.File, c.Line)
|
||||||
|
unique[key] = c
|
||||||
|
}
|
||||||
|
for _, v := range unique {
|
||||||
|
comments = append(comments, v)
|
||||||
|
}
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// splitDiffIntoChunks splits a diff into chunks that do not exceed maxSize bytes.
|
||||||
|
// It tries to split on file boundaries ("diff --git") first, then on hunk boundaries (@@),
|
||||||
|
// and finally on a hard byte limit.
|
||||||
|
func splitDiffIntoChunks(diff []byte, maxSize int) []string {
|
||||||
|
if len(diff) <= maxSize {
|
||||||
|
return []string{string(diff)}
|
||||||
|
}
|
||||||
|
content := string(diff)
|
||||||
|
// Split by file headers
|
||||||
|
parts := strings.Split(content, "\ndiff --git ")
|
||||||
|
chunks := []string{}
|
||||||
|
var current strings.Builder
|
||||||
|
for idx, part := range parts {
|
||||||
|
seg := part
|
||||||
|
if idx != 0 {
|
||||||
|
// Preserve the leading newline that was removed by Split
|
||||||
|
seg = "\n" + "diff --git " + part
|
||||||
|
}
|
||||||
|
if current.Len()+len(seg) > maxSize && current.Len() > 0 {
|
||||||
|
chunks = append(chunks, current.String())
|
||||||
|
current.Reset()
|
||||||
|
}
|
||||||
|
if len(seg) > maxSize {
|
||||||
|
// Split further by hunks
|
||||||
|
hunks := strings.Split(seg, "\n@@ ")
|
||||||
|
for j, h := range hunks {
|
||||||
|
var hseg string
|
||||||
|
if j == 0 {
|
||||||
|
// First hunk segment already contains the preceding content (including any needed newline)
|
||||||
|
hseg = h
|
||||||
|
} else {
|
||||||
|
// Subsequent hunks need the leading newline and "@@ " marker restored
|
||||||
|
hseg = "\n@@ " + h
|
||||||
|
}
|
||||||
|
if current.Len()+len(hseg) > maxSize && current.Len() > 0 {
|
||||||
|
chunks = append(chunks, current.String())
|
||||||
|
current.Reset()
|
||||||
|
}
|
||||||
|
if len(hseg) > maxSize {
|
||||||
|
for len(hseg) > maxSize {
|
||||||
|
chunks = append(chunks, hseg[:maxSize])
|
||||||
|
hseg = hseg[maxSize:]
|
||||||
|
}
|
||||||
|
current.WriteString(hseg)
|
||||||
|
} else {
|
||||||
|
current.WriteString(hseg)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
current.WriteString(seg)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if current.Len() > 0 {
|
||||||
|
chunks = append(chunks, current.String())
|
||||||
|
}
|
||||||
|
return chunks
|
||||||
|
}
|
||||||
|
|||||||
132
internal/pierre/judge_test.go
Normal file
132
internal/pierre/judge_test.go
Normal file
@@ -0,0 +1,132 @@
|
|||||||
|
package pierre
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"context"
|
||||||
|
"io"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"git.schreifuchs.ch/schreifuchs/pierre-bot/internal/chatter"
|
||||||
|
"github.com/google/go-cmp/cmp"
|
||||||
|
)
|
||||||
|
|
||||||
|
// mockChat implements the ChatAdapter interface for testing.
|
||||||
|
type mockChat struct{ callCount int }
|
||||||
|
|
||||||
|
func (m *mockChat) GenerateStructured(ctx context.Context, msgs []chatter.Message, target interface{}) error {
|
||||||
|
m.callCount++
|
||||||
|
if cSlice, ok := target.(*[]Comment); ok {
|
||||||
|
*cSlice = []Comment{{File: "file.go", Line: 1, Message: "test comment"}}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *mockChat) GetProviderName() string { return "mock" }
|
||||||
|
|
||||||
|
// mockGit implements the GitAdapter interface for testing.
|
||||||
|
type mockGit struct{}
|
||||||
|
|
||||||
|
func (g *mockGit) GetDiff(ctx context.Context, owner, repo string, prID int) (io.ReadCloser, error) {
|
||||||
|
diff := "diff --git a/file1.go b/file1.go\n+line1\n" + "diff --git a/file2.go b/file2.go\n+line2\n"
|
||||||
|
return io.NopCloser(bytes.NewReader([]byte(diff))), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (g *mockGit) AddComment(ctx context.Context, owner, repo string, prID int, comment Comment) error {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSplitDiffIntoChunks(t *testing.T) {
|
||||||
|
cases := []struct {
|
||||||
|
name string
|
||||||
|
diff string
|
||||||
|
maxSize int
|
||||||
|
wantChunks int // 0 means we don't assert exact count
|
||||||
|
wantPrefixes []string
|
||||||
|
checkRecombine bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "small diff",
|
||||||
|
diff: "diff --git a/file1.txt b/file1.txt\n+added line\n",
|
||||||
|
maxSize: 1000,
|
||||||
|
wantChunks: 1,
|
||||||
|
wantPrefixes: []string{"diff --git a/file1.txt"},
|
||||||
|
checkRecombine: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "multiple files",
|
||||||
|
diff: "diff --git a/file1.txt b/file1.txt\n+added line 1\n" +
|
||||||
|
"diff --git a/file2.txt b/file2.txt\n+added line 2\n",
|
||||||
|
maxSize: 50,
|
||||||
|
wantChunks: 2,
|
||||||
|
wantPrefixes: []string{"diff --git a/file1.txt", "diff --git a/file2.txt"},
|
||||||
|
checkRecombine: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "large single file",
|
||||||
|
diff: func() string {
|
||||||
|
line := "+very long added line that will be repeated many times to exceed the chunk size\n"
|
||||||
|
return "diff --git a/large.txt b/large.txt\n" + strings.Repeat(line, 200)
|
||||||
|
}(),
|
||||||
|
maxSize: 500,
|
||||||
|
wantChunks: 0,
|
||||||
|
wantPrefixes: nil,
|
||||||
|
checkRecombine: true,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tc := range cases {
|
||||||
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
|
chunks := splitDiffIntoChunks([]byte(tc.diff), tc.maxSize)
|
||||||
|
if tc.wantChunks > 0 && len(chunks) != tc.wantChunks {
|
||||||
|
t.Fatalf("expected %d chunks, got %d", tc.wantChunks, len(chunks))
|
||||||
|
}
|
||||||
|
for i, prefix := range tc.wantPrefixes {
|
||||||
|
if i >= len(chunks) {
|
||||||
|
t.Fatalf("missing chunk %d for prefix check", i)
|
||||||
|
}
|
||||||
|
trimmed := strings.TrimPrefix(chunks[i], "\n")
|
||||||
|
if !strings.HasPrefix(trimmed, prefix) {
|
||||||
|
t.Fatalf("chunk %d does not start with expected prefix %q: %s", i, prefix, chunks[i])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for i, c := range chunks {
|
||||||
|
if tc.maxSize > 0 && len(c) > tc.maxSize {
|
||||||
|
t.Fatalf("chunk %d exceeds max size %d: %d", i, tc.maxSize, len(c))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if tc.checkRecombine {
|
||||||
|
recombined := strings.Join(chunks, "")
|
||||||
|
if diff := cmp.Diff(tc.diff, recombined); diff != "" {
|
||||||
|
t.Fatalf("recombined diff differs:\n%s", diff)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestJudgePR_ChunkAggregationAndDeduplication(t *testing.T) {
|
||||||
|
chatMock := &mockChat{}
|
||||||
|
svc := &Service{
|
||||||
|
maxChunkSize: 50,
|
||||||
|
guidelines: nil,
|
||||||
|
git: &mockGit{},
|
||||||
|
chat: chatMock,
|
||||||
|
}
|
||||||
|
diffReader, err := svc.git.GetDiff(context.Background(), "", "", 0)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("failed to get diff: %v", err)
|
||||||
|
}
|
||||||
|
defer diffReader.Close()
|
||||||
|
comments, err := svc.judgePR(context.Background(), diffReader)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("judgePR error: %v", err)
|
||||||
|
}
|
||||||
|
if got, want := len(comments), 1; got != want {
|
||||||
|
t.Fatalf("expected %d comment after deduplication, got %d", want, got)
|
||||||
|
}
|
||||||
|
if chatMock.callCount != 2 {
|
||||||
|
t.Fatalf("expected mockChat to be called for each chunk (2), got %d", chatMock.callCount)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -7,15 +7,26 @@ import (
|
|||||||
"git.schreifuchs.ch/schreifuchs/pierre-bot/internal/chatter"
|
"git.schreifuchs.ch/schreifuchs/pierre-bot/internal/chatter"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// Service holds the core collaborators and configuration for Pierre.
|
||||||
|
// The order of the fields is intentional: configuration fields first (used
|
||||||
|
// during initialization) followed by the adapters. This prevents accidental
|
||||||
|
// changes to the serialized layout if encoding/gob or encoding/json is used
|
||||||
|
// elsewhere in the future.
|
||||||
type Service struct {
|
type Service struct {
|
||||||
|
maxChunkSize int
|
||||||
|
guidelines []string
|
||||||
|
disableComments bool
|
||||||
git GitAdapter
|
git GitAdapter
|
||||||
chat ChatAdapter
|
chat ChatAdapter
|
||||||
}
|
}
|
||||||
|
|
||||||
func New(chat ChatAdapter, git GitAdapter) *Service {
|
func New(chat ChatAdapter, git GitAdapter, maxChunkSize int, guidelines []string, disableComments bool) *Service {
|
||||||
return &Service{
|
return &Service{
|
||||||
git: git,
|
git: git,
|
||||||
chat: chat,
|
chat: chat,
|
||||||
|
maxChunkSize: maxChunkSize,
|
||||||
|
guidelines: guidelines,
|
||||||
|
disableComments: disableComments,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -26,13 +26,19 @@ func (s *Service) MakeReview(ctx context.Context, organisation string, repo stri
|
|||||||
|
|
||||||
for _, c := range comments {
|
for _, c := range comments {
|
||||||
c.Message = fmt.Sprintf("%s (Generated by: %s)", c.Message, model)
|
c.Message = fmt.Sprintf("%s (Generated by: %s)", c.Message, model)
|
||||||
|
|
||||||
|
if s.disableComments {
|
||||||
|
// Dry‑run: just log what would have been posted.
|
||||||
|
log.Printf("dry‑run: %s:%d => %s", c.File, c.Line, c.Message)
|
||||||
|
} else {
|
||||||
|
schreifuchs marked this conversation as resolved
schreifuchs
commented
Update the comment above the block to reflect that comments are posted only when not in dry‑run mode. (Reason: The comment correctly identifies that the existing comment is misleading—the code posts comments only when Update the comment above the block to reflect that comments are posted only when not in dry‑run mode. (Reason: The comment correctly identifies that the existing comment is misleading—the code posts comments only when `disableComments` (dry‑run) is true, so the comment should be updated to reflect posting occurs when not in dry‑run mode.) (Generated by: OpenAI (openai/gpt-oss-120b))
|
|||||||
|
// Normal mode: print to stdout and post the comment to the VCS.
|
||||||
fmt.Printf("File: %s\nLine: %d\nMessage: %s\n%s\n",
|
fmt.Printf("File: %s\nLine: %d\nMessage: %s\n%s\n",
|
||||||
c.File, c.Line, c.Message, "---")
|
c.File, c.Line, c.Message, "---")
|
||||||
|
|
||||||
if err := s.git.AddComment(ctx, organisation, repo, prID, c); err != nil {
|
if err := s.git.AddComment(ctx, organisation, repo, prID, c); err != nil {
|
||||||
log.Printf("Failed to add comment: %v", err)
|
log.Printf("Failed to add comment: %v", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user
You added
github.com/google/go-cmp v0.7.0as a direct dependency – good for the new tests. Remember to rungo mod tidyso that thego.sumis updated accordingly. (Generated by: OpenAI (openai/gpt-oss-120b))