Files
pierre-bot/internal/pierre/judge.go
2026-02-13 18:33:21 +01:00

160 lines
5.0 KiB
Go
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package pierre
import (
"context"
"fmt"
"io"
"log/slog"
"strings"
"git.schreifuchs.ch/schreifuchs/pierre-bot/internal/chatter"
)
// DefaultChunkSize is the fallback maximum size (in bytes) for a diff chunk when no explicit value is configured.
const DefaultChunkSize = 60000
type Comment struct {
File string `json:"file"`
Line int `json:"line"`
Message string `json:"message"`
}
func (s *Service) judgePR(ctx context.Context, diff io.Reader) (comments []Comment, err error) {
slog.Info("judgePR started")
diffBytes, err := io.ReadAll(diff)
if err != nil {
return nil, fmt.Errorf("failed to read diff: %w", err)
}
// Determine chunk size (use default if not set)
maxSize := s.maxChunkSize
if maxSize <= 0 {
maxSize = DefaultChunkSize // default 60KB ~ 15k tokens
}
chunks := splitDiffIntoChunks(diffBytes, maxSize)
allComments := []Comment{}
// Build optional guidelines text (added as a separate section with a clear delimiter)
guidelinesText := ""
if len(s.guidelines) > 0 {
// Two newlines ensure the guidelines start on a fresh paragraph.
guidelinesText = "\n\nProject guidelines:\n"
for _, g := range s.guidelines {
guidelinesText += "- " + g + "\n"
}
}
// System prompt that instructs the LLM precisely.
baseSystem := strings.TrimSpace(`
You are a strict senior software architect.
Only comment on newly added or modified lines in the diff; ignore deletions, pure formatting, or reordering that does not change behavior.
For each issue output a JSON object with fields "file", "line", and "message" (message should be concise, ≤2 sentences, and actionable).
If project guidelines are provided, treat them as hard rules that must be respected.`) + guidelinesText
for i, chunk := range chunks {
// Include the chunk identifier in the system message only if there are multiple chunks.
systemContent := baseSystem
if len(chunks) > 1 {
systemContent = fmt.Sprintf("%s\nChunk %d of %d.", baseSystem, i+1, len(chunks))
}
userContent := chunk
var chunkComments []Comment
err = s.chat.GenerateStructured(ctx, []chatter.Message{{
Role: chatter.RoleSystem,
Content: systemContent,
}, {
Role: chatter.RoleUser,
Content: userContent,
}}, &chunkComments)
if err != nil {
return nil, err
}
allComments = append(allComments, chunkComments...)
}
// Deduplicate comments (keyed by file:line)
unique := make(map[string]Comment)
for _, c := range allComments {
key := fmt.Sprintf("%s:%d", c.File, c.Line)
unique[key] = c
}
for _, v := range unique {
comments = append(comments, v)
}
slog.Info("judgePR finished", "comments", len(comments))
return
}
// splitDiffIntoChunks splits a diff into chunks that do not exceed maxSize bytes.
// It tries to split on file boundaries ("diff --git") first, then on hunk boundaries (@@),
// and finally on a hard byte limit.
func splitDiffIntoChunks(diff []byte, maxSize int) []string {
// Preserve the file header for each chunk when a single file's diff is split across multiple chunks.
// The header is the portion before the first hunk marker "@@" (including the "diff --git" line).
// When we need to split by hunks, we prepend this header to every resulting subchunk.
if len(diff) <= maxSize {
return []string{string(diff)}
}
content := string(diff)
// Split by file headers
parts := strings.Split(content, "\ndiff --git ")
chunks := []string{}
var current strings.Builder
for idx, part := range parts {
seg := part
if idx != 0 {
// Preserve the leading newline that was removed by Split
seg = "\n" + "diff --git " + part
}
if current.Len()+len(seg) > maxSize && current.Len() > 0 {
chunks = append(chunks, current.String())
current.Reset()
}
if len(seg) > maxSize {
// Determine if there is a hunk marker. If not, fall back to simple sizebased chunking.
headerEnd := strings.Index(seg, "\n@@ ")
if headerEnd == -1 {
// No hunk marker split purely by size.
remaining := seg
for len(remaining) > maxSize {
chunks = append(chunks, remaining[:maxSize])
remaining = remaining[maxSize:]
}
current.WriteString(remaining)
continue
}
// Preserve the header up to the first hunk.
header := seg[:headerEnd+1] // include newline before "@@"
// Split the rest of the segment into hunks (excluding the header part).
hunks := strings.Split(strings.TrimPrefix(seg, header), "\n@@ ")
for _, h := range hunks {
// Reconstruct each hunk with its header and "@@ " prefix.
hseg := header + "@@ " + h
if current.Len()+len(hseg) > maxSize && current.Len() > 0 {
chunks = append(chunks, current.String())
current.Reset()
}
if len(hseg) > maxSize {
// If a single hunk exceeds maxSize, split it further.
for len(hseg) > maxSize {
chunks = append(chunks, hseg[:maxSize])
hseg = hseg[maxSize:]
}
current.WriteString(hseg)
} else {
current.WriteString(hseg)
}
}
} else {
current.WriteString(seg)
}
}
if current.Len() > 0 {
chunks = append(chunks, current.String())
}
return chunks
}