144 lines
4.1 KiB
Go
144 lines
4.1 KiB
Go
package pierre
|
||
|
||
import (
|
||
"context"
|
||
"fmt"
|
||
"io"
|
||
"strings"
|
||
|
||
"git.schreifuchs.ch/schreifuchs/pierre-bot/internal/chatter"
|
||
)
|
||
|
||
// DefaultChunkSize is the fallback maximum size (in bytes) for a diff chunk when no explicit value is configured.
|
||
const DefaultChunkSize = 60000
|
||
|
||
type Comment struct {
|
||
File string `json:"file"`
|
||
Line int `json:"line"`
|
||
Message string `json:"message"`
|
||
}
|
||
|
||
func (s *Service) judgePR(ctx context.Context, diff io.Reader) (comments []Comment, err error) {
|
||
diffBytes, err := io.ReadAll(diff)
|
||
if err != nil {
|
||
return nil, fmt.Errorf("failed to read diff: %w", err)
|
||
}
|
||
|
||
// Determine chunk size (use default if not set)
|
||
maxSize := s.maxChunkSize
|
||
if maxSize <= 0 {
|
||
maxSize = DefaultChunkSize // default 60KB ~ 15k tokens
|
||
}
|
||
|
||
chunks := splitDiffIntoChunks(diffBytes, maxSize)
|
||
allComments := []Comment{}
|
||
|
||
// Build optional guidelines text (added as a separate section with a clear delimiter)
|
||
guidelinesText := ""
|
||
if len(s.guidelines) > 0 {
|
||
// Two newlines ensure the guidelines start on a fresh paragraph.
|
||
guidelinesText = "\n\nProject guidelines:\n"
|
||
for _, g := range s.guidelines {
|
||
guidelinesText += "- " + g + "\n"
|
||
}
|
||
}
|
||
|
||
// System prompt that instructs the LLM precisely.
|
||
baseSystem := strings.TrimSpace(`
|
||
You are a strict senior software architect.
|
||
Only comment on newly added or modified lines in the diff; ignore deletions, pure formatting, or re‑ordering that does not change behavior.
|
||
For each issue output a JSON object with fields "file", "line", and "message" (message should be concise, ≤2 sentences, and actionable).
|
||
If project guidelines are provided, treat them as hard rules that must be respected.`) + guidelinesText
|
||
|
||
for i, chunk := range chunks {
|
||
// Include the chunk identifier in the system message only if there are multiple chunks.
|
||
systemContent := baseSystem
|
||
if len(chunks) > 1 {
|
||
systemContent = fmt.Sprintf("%s\nChunk %d of %d.", baseSystem, i+1, len(chunks))
|
||
}
|
||
userContent := chunk
|
||
|
||
var chunkComments []Comment
|
||
err = s.chat.GenerateStructured(ctx, []chatter.Message{{
|
||
Role: chatter.RoleSystem,
|
||
Content: systemContent,
|
||
}, {
|
||
Role: chatter.RoleUser,
|
||
Content: userContent,
|
||
}}, &chunkComments)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
allComments = append(allComments, chunkComments...)
|
||
}
|
||
|
||
// De‑duplicate comments (keyed by file:line)
|
||
unique := make(map[string]Comment)
|
||
for _, c := range allComments {
|
||
key := fmt.Sprintf("%s:%d", c.File, c.Line)
|
||
unique[key] = c
|
||
}
|
||
for _, v := range unique {
|
||
comments = append(comments, v)
|
||
}
|
||
return
|
||
}
|
||
|
||
// splitDiffIntoChunks splits a diff into chunks that do not exceed maxSize bytes.
|
||
// It tries to split on file boundaries ("diff --git") first, then on hunk boundaries (@@),
|
||
// and finally on a hard byte limit.
|
||
func splitDiffIntoChunks(diff []byte, maxSize int) []string {
|
||
if len(diff) <= maxSize {
|
||
return []string{string(diff)}
|
||
}
|
||
content := string(diff)
|
||
// Split by file headers
|
||
parts := strings.Split(content, "\ndiff --git ")
|
||
chunks := []string{}
|
||
var current strings.Builder
|
||
for idx, part := range parts {
|
||
seg := part
|
||
if idx != 0 {
|
||
// Preserve the leading newline that was removed by Split
|
||
seg = "\n" + "diff --git " + part
|
||
}
|
||
if current.Len()+len(seg) > maxSize && current.Len() > 0 {
|
||
chunks = append(chunks, current.String())
|
||
current.Reset()
|
||
}
|
||
if len(seg) > maxSize {
|
||
// Split further by hunks
|
||
hunks := strings.Split(seg, "\n@@ ")
|
||
for j, h := range hunks {
|
||
var hseg string
|
||
if j == 0 {
|
||
// First hunk segment already contains the preceding content (including any needed newline)
|
||
hseg = h
|
||
} else {
|
||
// Subsequent hunks need the leading newline and "@@ " marker restored
|
||
hseg = "\n@@ " + h
|
||
}
|
||
if current.Len()+len(hseg) > maxSize && current.Len() > 0 {
|
||
chunks = append(chunks, current.String())
|
||
current.Reset()
|
||
}
|
||
if len(hseg) > maxSize {
|
||
for len(hseg) > maxSize {
|
||
chunks = append(chunks, hseg[:maxSize])
|
||
hseg = hseg[maxSize:]
|
||
}
|
||
current.WriteString(hseg)
|
||
} else {
|
||
current.WriteString(hseg)
|
||
}
|
||
}
|
||
} else {
|
||
current.WriteString(seg)
|
||
}
|
||
}
|
||
if current.Len() > 0 {
|
||
chunks = append(chunks, current.String())
|
||
}
|
||
return chunks
|
||
}
|