feat(pierre): sanity check

This commit is contained in:
u80864958
2026-02-13 17:27:53 +01:00
parent cc321be658
commit 343f6ab165
11 changed files with 392 additions and 74 deletions

View File

@@ -0,0 +1,80 @@
package bitbucket
import (
"context"
"net/http"
"net/http/httptest"
"testing"
)
func TestBitbucketGetFileContentSuccess(t *testing.T) {
const expected = "file content"
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
// Verify path structure
if r.URL.Path != "/rest/api/1.0/projects/owner/repos/repo/raw/path/to/file.go" {
t.Fatalf("unexpected URL path: %s", r.URL.Path)
}
w.WriteHeader(http.StatusOK)
w.Write([]byte(expected))
}))
defer server.Close()
// Trim trailing slash handling done in NewBitbucket
adapter := NewBitbucket(server.URL, "")
content, err := adapter.GetFileContent(context.Background(), "owner", "repo", "path/to/file.go", "")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if content != expected {
t.Fatalf("expected %q, got %q", expected, content)
}
}
func TestBitbucketGetFileContentError(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusNotFound)
w.Write([]byte("not found"))
}))
defer server.Close()
adapter := NewBitbucket(server.URL, "")
_, err := adapter.GetFileContent(context.Background(), "owner", "repo", "missing.go", "")
if err == nil {
t.Fatalf("expected error for non200 response")
}
}
func TestBitbucketGetPRHeadSHASuccess(t *testing.T) {
const sha = "deadbeef"
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path != "/rest/api/1.0/projects/owner/repos/repo/pull-requests/42" {
t.Fatalf("unexpected URL: %s", r.URL.Path)
}
w.WriteHeader(http.StatusOK)
w.Write([]byte(`{"toRef":{"latestCommit":"` + sha + `"}}`))
}))
defer server.Close()
adapter := NewBitbucket(server.URL, "")
got, err := adapter.GetPRHeadSHA(context.Background(), "owner", "repo", 42)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if got != sha {
t.Fatalf("expected sha %s, got %s", sha, got)
}
}
func TestBitbucketGetPRHeadSHAError(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusInternalServerError)
w.Write([]byte("error"))
}))
defer server.Close()
adapter := NewBitbucket(server.URL, "")
_, err := adapter.GetPRHeadSHA(context.Background(), "owner", "repo", 1)
if err == nil {
t.Fatalf("expected error for non200 response")
}
}

View File

@@ -47,10 +47,10 @@ func (b *BitbucketAdapter) GetPR(ctx context.Context, projectKey, repositorySlug
)
response, err := http.DefaultClient.Do(r)
defer response.Body.Close() // Add this
if err != nil {
return
}
defer response.Body.Close() // Add this
err = json.NewDecoder(response.Body).Decode(&pr)
@@ -86,10 +86,10 @@ func (b *BitbucketAdapter) AddComment(ctx context.Context, owner, repo string, p
}
response, err := http.DefaultClient.Do(r)
defer response.Body.Close() // Add this
if err != nil {
return err
}
defer response.Body.Close() // Add this
if response.StatusCode >= 300 || response.StatusCode < 200 {
sb := &strings.Builder{}

View File

@@ -1,6 +1,10 @@
package bitbucket
import (
"context"
"fmt"
"io"
"net/http"
"strings"
"git.schreifuchs.ch/schreifuchs/pierre-bot/internal/gitadapters/baseadapter"
@@ -18,3 +22,43 @@ func NewBitbucket(baseURL string, bearerToken string) *BitbucketAdapter {
Rest: baseadapter.NewRest(baseURL, bearerToken),
}
}
// GetFileContent returns the raw file content at the given ref (commit SHA) or HEAD if ref is empty.
func (b *BitbucketAdapter) GetFileContent(ctx context.Context, projectKey, repositorySlug, path, ref string) (string, error) {
// Use the Rest helper to build the base URL, then add the "at" query param if needed.
r, err := b.CreateRequest(ctx, http.MethodGet, nil,
"/projects/", projectKey, "repos", repositorySlug, "raw", path)
if err != nil {
return "", err
}
if ref != "" {
q := r.URL.Query()
q.Set("at", ref)
r.URL.RawQuery = q.Encode()
}
resp, err := http.DefaultClient.Do(r)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
sb := &strings.Builder{}
io.Copy(sb, resp.Body)
return "", fmt.Errorf("error fetching file %s status %d, body %s", path, resp.StatusCode, sb.String())
}
content, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(content), nil
}
// GetPRHeadSHA fetches the PR and returns the SHA of the source (to) branch.
func (b *BitbucketAdapter) GetPRHeadSHA(ctx context.Context, projectKey, repositorySlug string, pullRequestID int) (string, error) {
pr, err := b.GetPR(ctx, projectKey, repositorySlug, pullRequestID)
if err != nil {
return "", err
}
return pr.ToRef.LatestCommit, nil
}

View File

@@ -3,6 +3,7 @@ package gitea
import (
"bytes"
"context"
"fmt"
"io"
"code.gitea.io/sdk/gitea"
@@ -47,3 +48,28 @@ func (g *Adapter) AddComment(ctx context.Context, owner, repo string, prID int,
_, _, err := g.client.CreatePullReview(owner, repo, int64(prID), opts)
return err
}
// GetFileContent returns the file content at a given path and ref (commit SHA).
func (g *Adapter) GetFileContent(ctx context.Context, owner, repo, path, ref string) (string, error) {
g.client.SetContext(ctx)
// The SDK's GetFile returns the raw bytes of the file.
data, _, err := g.client.GetFile(owner, repo, ref, path)
if err != nil {
return "", err
}
return string(data), nil
}
// GetPRHeadSHA fetches the pull request and returns the head commit SHA.
func (g *Adapter) GetPRHeadSHA(ctx context.Context, owner, repo string, prID int) (string, error) {
g.client.SetContext(ctx)
// GetPullRequest returns the detailed PR information.
pr, _, err := g.client.GetPullRequest(owner, repo, int64(prID))
if err != nil {
return "", err
}
if pr == nil || pr.Head == nil {
return "", fmt.Errorf("pull request %d has no head information", prID)
}
return pr.Head.Sha, nil
}

View File

@@ -88,6 +88,10 @@ If project guidelines are provided, treat them as hard rules that must be respec
// It tries to split on file boundaries ("diff --git") first, then on hunk boundaries (@@),
// and finally on a hard byte limit.
func splitDiffIntoChunks(diff []byte, maxSize int) []string {
// Preserve the file header for each chunk when a single file's diff is split across multiple chunks.
// The header is the portion before the first hunk marker "@@" (including the "diff --git" line).
// When we need to split by hunks, we prepend this header to every resulting subchunk.
if len(diff) <= maxSize {
return []string{string(diff)}
}
@@ -107,22 +111,31 @@ func splitDiffIntoChunks(diff []byte, maxSize int) []string {
current.Reset()
}
if len(seg) > maxSize {
// Split further by hunks
hunks := strings.Split(seg, "\n@@ ")
for j, h := range hunks {
var hseg string
if j == 0 {
// First hunk segment already contains the preceding content (including any needed newline)
hseg = h
} else {
// Subsequent hunks need the leading newline and "@@ " marker restored
hseg = "\n@@ " + h
// Determine if there is a hunk marker. If not, fall back to simple sizebased chunking.
headerEnd := strings.Index(seg, "\n@@ ")
if headerEnd == -1 {
// No hunk marker split purely by size.
remaining := seg
for len(remaining) > maxSize {
chunks = append(chunks, remaining[:maxSize])
remaining = remaining[maxSize:]
}
current.WriteString(remaining)
continue
}
// Preserve the header up to the first hunk.
header := seg[:headerEnd+1] // include newline before "@@"
// Split the rest of the segment into hunks (excluding the header part).
hunks := strings.Split(strings.TrimPrefix(seg, header), "\n@@ ")
for _, h := range hunks {
// Reconstruct each hunk with its header and "@@ " prefix.
hseg := header + "@@ " + h
if current.Len()+len(hseg) > maxSize && current.Len() > 0 {
chunks = append(chunks, current.String())
current.Reset()
}
if len(hseg) > maxSize {
// If a single hunk exceeds maxSize, split it further.
for len(hseg) > maxSize {
chunks = append(chunks, hseg[:maxSize])
hseg = hseg[maxSize:]

View File

@@ -37,6 +37,15 @@ func (g *mockGit) AddComment(ctx context.Context, owner, repo string, prID int,
return nil
}
func (g *mockGit) GetFileContent(ctx context.Context, owner, repo, path, ref string) (string, error) {
// For tests, return a simple placeholder content.
return "package main\n\nfunc placeholder() {}", nil
}
func (g *mockGit) GetPRHeadSHA(ctx context.Context, owner, repo string, prID int) (string, error) {
return "dummysha", nil
}
func TestSplitDiffIntoChunks(t *testing.T) {
cases := []struct {
name string

View File

@@ -33,6 +33,8 @@ func New(chat ChatAdapter, git GitAdapter, maxChunkSize int, guidelines []string
type GitAdapter interface {
GetDiff(ctx context.Context, owner, repo string, prID int) (io.ReadCloser, error)
AddComment(ctx context.Context, owner, repo string, prID int, comment Comment) error
GetFileContent(ctx context.Context, owner, repo, path, ref string) (string, error)
GetPRHeadSHA(ctx context.Context, owner, repo string, prID int) (string, error)
}
type ChatAdapter interface {

View File

@@ -4,6 +4,8 @@ import (
"context"
"fmt"
"log"
"git.schreifuchs.ch/schreifuchs/pierre-bot/internal/chatter"
)
func (s *Service) MakeReview(ctx context.Context, organisation string, repo string, prID int) error {
@@ -20,6 +22,46 @@ func (s *Service) MakeReview(ctx context.Context, organisation string, repo stri
return fmt.Errorf("error judging PR: %w", err)
}
// ---------- Sanitycheck step (always enabled) ----------
headSHA, err := s.git.GetPRHeadSHA(ctx, organisation, repo, prID)
if err != nil {
log.Printf("warning: could not fetch PR head SHA (%v); skipping sanity check", err)
} else {
filtered := []Comment{}
for _, c := range comments {
// Retrieve full file content at the PR head
fileContent, fErr := s.git.GetFileContent(ctx, organisation, repo, c.File, headSHA)
if fErr != nil {
log.Printf("failed to fetch file %s: %v keeping original comment", c.File, fErr)
filtered = append(filtered, c)
continue
}
// Build a simple sanitycheck prompt
systemPrompt := `You are a senior software architect. Given the full source code of a file and a review comment that refers to it, decide whether the comment is useful. Return JSON with fields "useful" (bool) and "reason" (short explanation, ≤2 sentences).`
userPrompt := fmt.Sprintf("File content:\n%s\n\nComment:\n%s", fileContent, c.Message)
type sanityResult struct {
Useful bool `json:"useful"`
Reason string `json:"reason"`
}
var res sanityResult
if err := s.chat.GenerateStructured(ctx, []chatter.Message{{Role: chatter.RoleSystem, Content: systemPrompt}, {Role: chatter.RoleUser, Content: userPrompt}}, &res); err != nil {
log.Printf("sanity check error for %s:%d: %v keeping comment", c.File, c.Line, err)
filtered = append(filtered, c)
continue
}
if res.Useful {
// Optionally annotate the comment with the reason for debugging
c.Message = fmt.Sprintf("%s (Reason: %s)", c.Message, res.Reason)
filtered = append(filtered, c)
} else {
log.Printf("comment on %s:%d discarded: %s", c.File, c.Line, res.Reason)
}
}
comments = filtered
}
fmt.Printf("Analysis complete. Found %d issues.\n---\n", len(comments))
model := s.chat.GetProviderName()