feat(pierre): add diff chunking and configurable review settings

This commit is contained in:
u80864958
2026-02-13 16:18:49 +01:00
parent 2cb64194b9
commit ac5ff7aeeb
8 changed files with 281 additions and 24 deletions

View File

@@ -4,10 +4,13 @@ import (
"context"
"fmt"
"io"
"strings"
"git.schreifuchs.ch/schreifuchs/pierre-bot/internal/chatter"
)
const defaultChunkSize = 60000
type Comment struct {
File string `json:"file"`
Line int `json:"line"`
@@ -19,21 +22,116 @@ func (s *Service) judgePR(ctx context.Context, diff io.Reader) (comments []Comme
if err != nil {
return nil, fmt.Errorf("failed to read diff: %w", err)
}
err = s.chat.GenerateStructured(ctx, []chatter.Message{
{
Role: chatter.RoleSystem,
Content: `
You are a very strict senior software architect.
You review **only** newly added or modified lines in a unified diff, together with the immediate hunk context.
You do **not** report issues that appear **solely** in deleted lines (“-”) or that have already been fixed by the change.
No comments are made on pure formatting/whitespace changes or reordering that does not alter the programs behavior.
`,
},
{
Role: chatter.RoleUser,
Content: fmt.Sprintf("Hello please review my PR. Write comments where improvements are necessary in new lines.\n Here is the git diff of it: %s", string(diffBytes)),
},
}, &comments)
// Determine chunk size (use default if not set)
maxSize := s.maxChunkSize
if maxSize <= 0 {
maxSize = defaultChunkSize // default 60KB ~ 15k tokens
}
chunks := splitDiffIntoChunks(diffBytes, maxSize)
allComments := []Comment{}
// Build optional guidelines text
guidelinesText := ""
if len(s.guidelines) > 0 {
guidelinesText = "Project guidelines:\n"
for _, g := range s.guidelines {
guidelinesText += "- " + g + "\n"
}
}
baseSystem := strings.TrimSpace(`
You are a very strict senior software architect.
You review **only** newly added or modified lines in a unified diff, together with the immediate hunk context.
You do **not** report issues that appear **solely** in deleted lines (“-") or that have already been fixed by the change.
No comments are made on pure formatting/whitespace changes or reordering that does not alter the programs behavior.`) + guidelinesText
for i, chunk := range chunks {
// Add a small header so the model knows this is a fragment
header := fmt.Sprintf("\n--- Chunk %d of %d ---\n", i+1, len(chunks))
userContent := fmt.Sprintf("Hello please review my PR. Write comments where improvements are necessary in new lines.%s\nHere is the git diff of it: %s", header, chunk)
var chunkComments []Comment
err = s.chat.GenerateStructured(ctx, []chatter.Message{{
Role: chatter.RoleSystem,
Content: baseSystem,
}, {
Role: chatter.RoleUser,
Content: userContent,
}}, &chunkComments)
if err != nil {
return nil, err
}
allComments = append(allComments, chunkComments...)
}
// Deduplicate comments (keyed by file:line)
unique := make(map[string]Comment)
for _, c := range allComments {
key := fmt.Sprintf("%s:%d", c.File, c.Line)
unique[key] = c
}
for _, v := range unique {
comments = append(comments, v)
}
return
}
// splitDiffIntoChunks splits a diff into chunks that do not exceed maxSize bytes.
// It tries to split on file boundaries ("diff --git") first, then on hunk boundaries (@@),
// and finally on a hard byte limit.
func splitDiffIntoChunks(diff []byte, maxSize int) []string {
if len(diff) <= maxSize {
return []string{string(diff)}
}
content := string(diff)
// Split by file headers
parts := strings.Split(content, "\ndiff --git ")
chunks := []string{}
var current strings.Builder
for idx, part := range parts {
seg := part
if idx != 0 {
// Preserve the leading newline that was removed by Split
seg = "\n" + "diff --git " + part
}
if current.Len()+len(seg) > maxSize && current.Len() > 0 {
chunks = append(chunks, current.String())
current.Reset()
}
if len(seg) > maxSize {
// Split further by hunks
hunks := strings.Split(seg, "\n@@ ")
for j, h := range hunks {
var hseg string
if j == 0 {
// First hunk segment already contains the preceding content (including any needed newline)
hseg = h
} else {
// Subsequent hunks need the leading newline and "@@ " marker restored
hseg = "\n@@ " + h
}
if current.Len()+len(hseg) > maxSize && current.Len() > 0 {
chunks = append(chunks, current.String())
current.Reset()
}
if len(hseg) > maxSize {
for len(hseg) > maxSize {
chunks = append(chunks, hseg[:maxSize])
hseg = hseg[maxSize:]
}
current.WriteString(hseg)
} else {
current.WriteString(hseg)
}
}
} else {
current.WriteString(seg)
}
}
if current.Len() > 0 {
chunks = append(chunks, current.String())
}
return chunks
}