Files
pierre-bot/internal/pierre/judge.go

138 lines
3.9 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package pierre
import (
"context"
"fmt"
"io"
"strings"
"git.schreifuchs.ch/schreifuchs/pierre-bot/internal/chatter"
)
const defaultChunkSize = 60000
type Comment struct {
File string `json:"file"`
Line int `json:"line"`
Message string `json:"message"`
}
func (s *Service) judgePR(ctx context.Context, diff io.Reader) (comments []Comment, err error) {
diffBytes, err := io.ReadAll(diff)
if err != nil {
return nil, fmt.Errorf("failed to read diff: %w", err)
}
// Determine chunk size (use default if not set)
maxSize := s.maxChunkSize
if maxSize <= 0 {
maxSize = defaultChunkSize // default 60KB ~ 15k tokens
}
chunks := splitDiffIntoChunks(diffBytes, maxSize)
allComments := []Comment{}
// Build optional guidelines text
guidelinesText := ""
if len(s.guidelines) > 0 {
guidelinesText = "Project guidelines:\n"
for _, g := range s.guidelines {
guidelinesText += "- " + g + "\n"
}
}
baseSystem := strings.TrimSpace(`
You are a very strict senior software architect.
You review **only** newly added or modified lines in a unified diff, together with the immediate hunk context.
You do **not** report issues that appear **solely** in deleted lines (“-") or that have already been fixed by the change.
No comments are made on pure formatting/whitespace changes or reordering that does not alter the programs behavior.`) + guidelinesText
for i, chunk := range chunks {
// Add a small header so the model knows this is a fragment
header := fmt.Sprintf("\n--- Chunk %d of %d ---\n", i+1, len(chunks))
userContent := fmt.Sprintf("Hello please review my PR. Write comments where improvements are necessary in new lines.%s\nHere is the git diff of it: %s", header, chunk)
var chunkComments []Comment
err = s.chat.GenerateStructured(ctx, []chatter.Message{{
Role: chatter.RoleSystem,
Content: baseSystem,
}, {
Role: chatter.RoleUser,
Content: userContent,
}}, &chunkComments)
if err != nil {
return nil, err
}
allComments = append(allComments, chunkComments...)
}
// Deduplicate comments (keyed by file:line)
unique := make(map[string]Comment)
for _, c := range allComments {
key := fmt.Sprintf("%s:%d", c.File, c.Line)
unique[key] = c
}
for _, v := range unique {
comments = append(comments, v)
}
return
}
// splitDiffIntoChunks splits a diff into chunks that do not exceed maxSize bytes.
// It tries to split on file boundaries ("diff --git") first, then on hunk boundaries (@@),
// and finally on a hard byte limit.
func splitDiffIntoChunks(diff []byte, maxSize int) []string {
if len(diff) <= maxSize {
return []string{string(diff)}
}
content := string(diff)
// Split by file headers
parts := strings.Split(content, "\ndiff --git ")
chunks := []string{}
var current strings.Builder
for idx, part := range parts {
seg := part
if idx != 0 {
// Preserve the leading newline that was removed by Split
seg = "\n" + "diff --git " + part
}
if current.Len()+len(seg) > maxSize && current.Len() > 0 {
chunks = append(chunks, current.String())
current.Reset()
}
if len(seg) > maxSize {
// Split further by hunks
hunks := strings.Split(seg, "\n@@ ")
for j, h := range hunks {
var hseg string
if j == 0 {
// First hunk segment already contains the preceding content (including any needed newline)
hseg = h
} else {
// Subsequent hunks need the leading newline and "@@ " marker restored
hseg = "\n@@ " + h
}
if current.Len()+len(hseg) > maxSize && current.Len() > 0 {
chunks = append(chunks, current.String())
current.Reset()
}
if len(hseg) > maxSize {
for len(hseg) > maxSize {
chunks = append(chunks, hseg[:maxSize])
hseg = hseg[maxSize:]
}
current.WriteString(hseg)
} else {
current.WriteString(hseg)
}
}
} else {
current.WriteString(seg)
}
}
if current.Len() > 0 {
chunks = append(chunks, current.String())
}
return chunks
}