feat(pierre): sanity check

This commit is contained in:
u80864958
2026-02-13 17:27:53 +01:00
parent cc321be658
commit 343f6ab165
11 changed files with 392 additions and 74 deletions

View File

@@ -88,6 +88,10 @@ If project guidelines are provided, treat them as hard rules that must be respec
// It tries to split on file boundaries ("diff --git") first, then on hunk boundaries (@@),
// and finally on a hard byte limit.
func splitDiffIntoChunks(diff []byte, maxSize int) []string {
// Preserve the file header for each chunk when a single file's diff is split across multiple chunks.
// The header is the portion before the first hunk marker "@@" (including the "diff --git" line).
// When we need to split by hunks, we prepend this header to every resulting subchunk.
if len(diff) <= maxSize {
return []string{string(diff)}
}
@@ -107,22 +111,31 @@ func splitDiffIntoChunks(diff []byte, maxSize int) []string {
current.Reset()
}
if len(seg) > maxSize {
// Split further by hunks
hunks := strings.Split(seg, "\n@@ ")
for j, h := range hunks {
var hseg string
if j == 0 {
// First hunk segment already contains the preceding content (including any needed newline)
hseg = h
} else {
// Subsequent hunks need the leading newline and "@@ " marker restored
hseg = "\n@@ " + h
// Determine if there is a hunk marker. If not, fall back to simple sizebased chunking.
headerEnd := strings.Index(seg, "\n@@ ")
if headerEnd == -1 {
// No hunk marker split purely by size.
remaining := seg
for len(remaining) > maxSize {
chunks = append(chunks, remaining[:maxSize])
remaining = remaining[maxSize:]
}
current.WriteString(remaining)
continue
}
// Preserve the header up to the first hunk.
header := seg[:headerEnd+1] // include newline before "@@"
// Split the rest of the segment into hunks (excluding the header part).
hunks := strings.Split(strings.TrimPrefix(seg, header), "\n@@ ")
for _, h := range hunks {
// Reconstruct each hunk with its header and "@@ " prefix.
hseg := header + "@@ " + h
if current.Len()+len(hseg) > maxSize && current.Len() > 0 {
chunks = append(chunks, current.String())
current.Reset()
}
if len(hseg) > maxSize {
// If a single hunk exceeds maxSize, split it further.
for len(hseg) > maxSize {
chunks = append(chunks, hseg[:maxSize])
hseg = hseg[maxSize:]

View File

@@ -37,6 +37,15 @@ func (g *mockGit) AddComment(ctx context.Context, owner, repo string, prID int,
return nil
}
func (g *mockGit) GetFileContent(ctx context.Context, owner, repo, path, ref string) (string, error) {
// For tests, return a simple placeholder content.
return "package main\n\nfunc placeholder() {}", nil
}
func (g *mockGit) GetPRHeadSHA(ctx context.Context, owner, repo string, prID int) (string, error) {
return "dummysha", nil
}
func TestSplitDiffIntoChunks(t *testing.T) {
cases := []struct {
name string

View File

@@ -33,6 +33,8 @@ func New(chat ChatAdapter, git GitAdapter, maxChunkSize int, guidelines []string
type GitAdapter interface {
GetDiff(ctx context.Context, owner, repo string, prID int) (io.ReadCloser, error)
AddComment(ctx context.Context, owner, repo string, prID int, comment Comment) error
GetFileContent(ctx context.Context, owner, repo, path, ref string) (string, error)
GetPRHeadSHA(ctx context.Context, owner, repo string, prID int) (string, error)
}
type ChatAdapter interface {

View File

@@ -4,6 +4,8 @@ import (
"context"
"fmt"
"log"
"git.schreifuchs.ch/schreifuchs/pierre-bot/internal/chatter"
)
func (s *Service) MakeReview(ctx context.Context, organisation string, repo string, prID int) error {
@@ -20,6 +22,46 @@ func (s *Service) MakeReview(ctx context.Context, organisation string, repo stri
return fmt.Errorf("error judging PR: %w", err)
}
// ---------- Sanitycheck step (always enabled) ----------
headSHA, err := s.git.GetPRHeadSHA(ctx, organisation, repo, prID)
if err != nil {
log.Printf("warning: could not fetch PR head SHA (%v); skipping sanity check", err)
} else {
filtered := []Comment{}
for _, c := range comments {
// Retrieve full file content at the PR head
fileContent, fErr := s.git.GetFileContent(ctx, organisation, repo, c.File, headSHA)
if fErr != nil {
log.Printf("failed to fetch file %s: %v keeping original comment", c.File, fErr)
filtered = append(filtered, c)
continue
}
// Build a simple sanitycheck prompt
systemPrompt := `You are a senior software architect. Given the full source code of a file and a review comment that refers to it, decide whether the comment is useful. Return JSON with fields "useful" (bool) and "reason" (short explanation, ≤2 sentences).`
userPrompt := fmt.Sprintf("File content:\n%s\n\nComment:\n%s", fileContent, c.Message)
type sanityResult struct {
Useful bool `json:"useful"`
Reason string `json:"reason"`
}
var res sanityResult
if err := s.chat.GenerateStructured(ctx, []chatter.Message{{Role: chatter.RoleSystem, Content: systemPrompt}, {Role: chatter.RoleUser, Content: userPrompt}}, &res); err != nil {
log.Printf("sanity check error for %s:%d: %v keeping comment", c.File, c.Line, err)
filtered = append(filtered, c)
continue
}
if res.Useful {
// Optionally annotate the comment with the reason for debugging
c.Message = fmt.Sprintf("%s (Reason: %s)", c.Message, res.Reason)
filtered = append(filtered, c)
} else {
log.Printf("comment on %s:%d discarded: %s", c.File, c.Line, res.Reason)
}
}
comments = filtered
}
fmt.Printf("Analysis complete. Found %d issues.\n---\n", len(comments))
model := s.chat.GetProviderName()