feat(pierre): add diff chunking and configurable review settings
This commit is contained in:
@@ -4,6 +4,7 @@ import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
|
||||
"git.schreifuchs.ch/schreifuchs/pierre-bot/internal/chatter"
|
||||
)
|
||||
@@ -19,21 +20,111 @@ func (s *Service) judgePR(ctx context.Context, diff io.Reader) (comments []Comme
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read diff: %w", err)
|
||||
}
|
||||
err = s.chat.GenerateStructured(ctx, []chatter.Message{
|
||||
{
|
||||
Role: chatter.RoleSystem,
|
||||
Content: `
|
||||
You are a very strict senior software architect.
|
||||
You review **only** newly added or modified lines in a unified diff, together with the immediate hunk context.
|
||||
You do **not** report issues that appear **solely** in deleted lines (“-”) or that have already been fixed by the change.
|
||||
No comments are made on pure formatting/whitespace changes or reordering that does not alter the program’s behavior.
|
||||
`,
|
||||
},
|
||||
{
|
||||
Role: chatter.RoleUser,
|
||||
Content: fmt.Sprintf("Hello please review my PR. Write comments where improvements are necessary in new lines.\n Here is the git diff of it: %s", string(diffBytes)),
|
||||
},
|
||||
}, &comments)
|
||||
|
||||
// Determine chunk size (use default if not set)
|
||||
maxSize := s.maxChunkSize
|
||||
if maxSize <= 0 {
|
||||
maxSize = 60000 // default 60KB ~ 15k tokens
|
||||
}
|
||||
|
||||
chunks := splitDiffIntoChunks(diffBytes, maxSize)
|
||||
allComments := []Comment{}
|
||||
|
||||
// Build optional guidelines text
|
||||
guidelinesText := ""
|
||||
if len(s.guidelines) > 0 {
|
||||
guidelinesText = "\nProject guidelines:\n"
|
||||
for _, g := range s.guidelines {
|
||||
guidelinesText += "- " + g + "\n"
|
||||
}
|
||||
}
|
||||
|
||||
baseSystem := `
|
||||
You are a very strict senior software architect.
|
||||
You review **only** newly added or modified lines in a unified diff, together with the immediate hunk context.
|
||||
You do **not** report issues that appear **solely** in deleted lines (“-”) or that have already been fixed by the change.
|
||||
No comments are made on pure formatting/whitespace changes or reordering that does not alter the program’s behavior.` + guidelinesText + `
|
||||
`
|
||||
|
||||
for i, chunk := range chunks {
|
||||
// Add a small header so the model knows this is a fragment
|
||||
header := fmt.Sprintf("\n--- Chunk %d of %d ---\n", i+1, len(chunks))
|
||||
userContent := fmt.Sprintf("Hello please review my PR. Write comments where improvements are necessary in new lines.%s\nHere is the git diff of it: %s", header, chunk)
|
||||
|
||||
err = s.chat.GenerateStructured(ctx, []chatter.Message{{
|
||||
Role: chatter.RoleSystem,
|
||||
Content: baseSystem,
|
||||
}, {
|
||||
Role: chatter.RoleUser,
|
||||
Content: userContent,
|
||||
}}, &comments)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
allComments = append(allComments, comments...)
|
||||
}
|
||||
|
||||
// De‑duplicate comments (keyed by file:line)
|
||||
unique := make(map[string]Comment)
|
||||
for _, c := range allComments {
|
||||
key := fmt.Sprintf("%s:%d", c.File, c.Line)
|
||||
unique[key] = c
|
||||
}
|
||||
for _, v := range unique {
|
||||
comments = append(comments, v)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// splitDiffIntoChunks splits a diff into chunks that do not exceed maxSize bytes.
|
||||
// It tries to split on file boundaries ("diff --git") first, then on hunk boundaries (@@),
|
||||
// and finally on a hard byte limit.
|
||||
func splitDiffIntoChunks(diff []byte, maxSize int) []string {
|
||||
if len(diff) <= maxSize {
|
||||
return []string{string(diff)}
|
||||
}
|
||||
content := string(diff)
|
||||
// Split by file headers
|
||||
parts := strings.Split(content, "\ndiff --git ")
|
||||
chunks := []string{}
|
||||
var current strings.Builder
|
||||
for idx, part := range parts {
|
||||
seg := part
|
||||
if idx != 0 {
|
||||
seg = "diff --git " + part
|
||||
}
|
||||
if current.Len()+len(seg) > maxSize && current.Len() > 0 {
|
||||
chunks = append(chunks, current.String())
|
||||
current.Reset()
|
||||
}
|
||||
if len(seg) > maxSize {
|
||||
// Split further by hunks
|
||||
hunks := strings.Split(seg, "\n@@ ")
|
||||
for j, h := range hunks {
|
||||
hseg := h
|
||||
if j != 0 {
|
||||
hseg = "@@ " + h
|
||||
}
|
||||
if current.Len()+len(hseg) > maxSize && current.Len() > 0 {
|
||||
chunks = append(chunks, current.String())
|
||||
current.Reset()
|
||||
}
|
||||
if len(hseg) > maxSize {
|
||||
for len(hseg) > maxSize {
|
||||
chunks = append(chunks, hseg[:maxSize])
|
||||
hseg = hseg[maxSize:]
|
||||
}
|
||||
current.WriteString(hseg)
|
||||
} else {
|
||||
current.WriteString(hseg)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
current.WriteString(seg)
|
||||
}
|
||||
}
|
||||
if current.Len() > 0 {
|
||||
chunks = append(chunks, current.String())
|
||||
}
|
||||
return chunks
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user