Compare commits
1 Commits
985fac347c
...
61d538d4a5
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
61d538d4a5 |
@@ -37,7 +37,14 @@ type LLMConfig struct {
|
|||||||
Model string `help:"Model to use" env:"LLM_MODEL"`
|
Model string `help:"Model to use" env:"LLM_MODEL"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type ReviewConfig struct {
|
||||||
|
MaxChunkChars int `help:"Maximum diff chunk size in characters (default 60000)" default:"60000"`
|
||||||
|
Guidelines []string `help:"Project-specific review guidelines"`
|
||||||
|
DisableComments bool `help:"Do not post comments to the Git provider (dry‑run mode)"`
|
||||||
|
}
|
||||||
|
|
||||||
type Config struct {
|
type Config struct {
|
||||||
|
Review ReviewConfig `embed:"" prefix:"review-"`
|
||||||
GitProvider string `help:"Git provider (bitbucket or gitea)" env:"GIT_PROVIDER"`
|
GitProvider string `help:"Git provider (bitbucket or gitea)" env:"GIT_PROVIDER"`
|
||||||
Bitbucket BitbucketConfig `embed:"" prefix:"bitbucket-"`
|
Bitbucket BitbucketConfig `embed:"" prefix:"bitbucket-"`
|
||||||
Gitea GiteaConfig `embed:"" prefix:"gitea-"`
|
Gitea GiteaConfig `embed:"" prefix:"gitea-"`
|
||||||
@@ -117,7 +124,7 @@ func main() {
|
|||||||
log.Fatalf("Error initializing AI: %v", err)
|
log.Fatalf("Error initializing AI: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
pierreService := pierre.New(ai, git)
|
pierreService := pierre.New(ai, git, cfg.Review.MaxChunkChars, cfg.Review.Guidelines, cfg.Review.DisableComments)
|
||||||
if err := pierreService.MakeReview(context.Background(), cfg.Repo.Owner, cfg.Repo.Repo, cfg.Repo.PRID); err != nil {
|
if err := pierreService.MakeReview(context.Background(), cfg.Repo.Owner, cfg.Repo.Repo, cfg.Repo.PRID); err != nil {
|
||||||
log.Fatalf("Error during review: %v", err)
|
log.Fatalf("Error during review: %v", err)
|
||||||
}
|
}
|
||||||
|
|||||||
3
go.mod
3
go.mod
@@ -7,7 +7,9 @@ require (
|
|||||||
github.com/alecthomas/kong v1.14.0
|
github.com/alecthomas/kong v1.14.0
|
||||||
github.com/alecthomas/kong-yaml v0.2.0
|
github.com/alecthomas/kong-yaml v0.2.0
|
||||||
github.com/google/generative-ai-go v0.20.1
|
github.com/google/generative-ai-go v0.20.1
|
||||||
|
github.com/google/go-cmp v0.7.0
|
||||||
github.com/ollama/ollama v0.16.0
|
github.com/ollama/ollama v0.16.0
|
||||||
|
github.com/sashabaranov/go-openai v1.41.2
|
||||||
google.golang.org/api v0.186.0
|
google.golang.org/api v0.186.0
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -34,7 +36,6 @@ require (
|
|||||||
github.com/googleapis/gax-go/v2 v2.12.5 // indirect
|
github.com/googleapis/gax-go/v2 v2.12.5 // indirect
|
||||||
github.com/hashicorp/go-version v1.7.0 // indirect
|
github.com/hashicorp/go-version v1.7.0 // indirect
|
||||||
github.com/mailru/easyjson v0.7.7 // indirect
|
github.com/mailru/easyjson v0.7.7 // indirect
|
||||||
github.com/sashabaranov/go-openai v1.41.2 // indirect
|
|
||||||
github.com/wk8/go-ordered-map/v2 v2.1.8 // indirect
|
github.com/wk8/go-ordered-map/v2 v2.1.8 // indirect
|
||||||
go.opencensus.io v0.24.0 // indirect
|
go.opencensus.io v0.24.0 // indirect
|
||||||
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.51.0 // indirect
|
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.51.0 // indirect
|
||||||
|
|||||||
@@ -31,7 +31,7 @@ func (b *BitbucketAdapter) GetDiff(ctx context.Context, projectKey, repositorySl
|
|||||||
if response.StatusCode != http.StatusOK {
|
if response.StatusCode != http.StatusOK {
|
||||||
sb := &strings.Builder{}
|
sb := &strings.Builder{}
|
||||||
io.Copy(sb, response.Body)
|
io.Copy(sb, response.Body)
|
||||||
err = fmt.Errorf("error while fetching bitbucket diff staus %d, body %s", response.Status, sb.String())
|
err = fmt.Errorf("error while fetching bitbucket diff status %d, body %s", response.StatusCode, sb.String())
|
||||||
}
|
}
|
||||||
|
|
||||||
diff = response.Body
|
diff = response.Body
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
|
"strings"
|
||||||
|
|
||||||
"git.schreifuchs.ch/schreifuchs/pierre-bot/internal/chatter"
|
"git.schreifuchs.ch/schreifuchs/pierre-bot/internal/chatter"
|
||||||
)
|
)
|
||||||
@@ -19,21 +20,113 @@ func (s *Service) judgePR(ctx context.Context, diff io.Reader) (comments []Comme
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to read diff: %w", err)
|
return nil, fmt.Errorf("failed to read diff: %w", err)
|
||||||
}
|
}
|
||||||
err = s.chat.GenerateStructured(ctx, []chatter.Message{
|
|
||||||
{
|
|
||||||
Role: chatter.RoleSystem,
|
|
||||||
Content: `
|
|
||||||
You are a very strict senior software architect.
|
|
||||||
You review **only** newly added or modified lines in a unified diff, together with the immediate hunk context.
|
|
||||||
You do **not** report issues that appear **solely** in deleted lines (“-”) or that have already been fixed by the change.
|
|
||||||
No comments are made on pure formatting/whitespace changes or reordering that does not alter the program’s behavior.
|
|
||||||
`,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
Role: chatter.RoleUser,
|
|
||||||
Content: fmt.Sprintf("Hello please review my PR. Write comments where improvements are necessary in new lines.\n Here is the git diff of it: %s", string(diffBytes)),
|
|
||||||
},
|
|
||||||
}, &comments)
|
|
||||||
|
|
||||||
|
// Determine chunk size (use default if not set)
|
||||||
|
maxSize := s.maxChunkSize
|
||||||
|
if maxSize <= 0 {
|
||||||
|
maxSize = 60000 // default 60KB ~ 15k tokens
|
||||||
|
}
|
||||||
|
|
||||||
|
chunks := splitDiffIntoChunks(diffBytes, maxSize)
|
||||||
|
allComments := []Comment{}
|
||||||
|
|
||||||
|
// Build optional guidelines text
|
||||||
|
guidelinesText := ""
|
||||||
|
if len(s.guidelines) > 0 {
|
||||||
|
guidelinesText = "\nProject guidelines:\n"
|
||||||
|
for _, g := range s.guidelines {
|
||||||
|
guidelinesText += "- " + g + "\n"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
baseSystem := strings.TrimSpace(`
|
||||||
|
You are a very strict senior software architect.
|
||||||
|
You review **only** newly added or modified lines in a unified diff, together with the immediate hunk context.
|
||||||
|
You do **not** report issues that appear **solely** in deleted lines (“-") or that have already been fixed by the change.
|
||||||
|
No comments are made on pure formatting/whitespace changes or reordering that does not alter the program’s behavior.`) + guidelinesText
|
||||||
|
|
||||||
|
for i, chunk := range chunks {
|
||||||
|
// Add a small header so the model knows this is a fragment
|
||||||
|
header := fmt.Sprintf("\n--- Chunk %d of %d ---\n", i+1, len(chunks))
|
||||||
|
userContent := fmt.Sprintf("Hello please review my PR. Write comments where improvements are necessary in new lines.%s\nHere is the git diff of it: %s", header, chunk)
|
||||||
|
|
||||||
|
var chunkComments []Comment
|
||||||
|
err = s.chat.GenerateStructured(ctx, []chatter.Message{{
|
||||||
|
Role: chatter.RoleSystem,
|
||||||
|
Content: baseSystem,
|
||||||
|
}, {
|
||||||
|
Role: chatter.RoleUser,
|
||||||
|
Content: userContent,
|
||||||
|
}}, &chunkComments)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
allComments = append(allComments, chunkComments...)
|
||||||
|
}
|
||||||
|
|
||||||
|
// De‑duplicate comments (keyed by file:line)
|
||||||
|
unique := make(map[string]Comment)
|
||||||
|
for _, c := range allComments {
|
||||||
|
key := fmt.Sprintf("%s:%d", c.File, c.Line)
|
||||||
|
unique[key] = c
|
||||||
|
}
|
||||||
|
for _, v := range unique {
|
||||||
|
comments = append(comments, v)
|
||||||
|
}
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// splitDiffIntoChunks splits a diff into chunks that do not exceed maxSize bytes.
|
||||||
|
// It tries to split on file boundaries ("diff --git") first, then on hunk boundaries (@@),
|
||||||
|
// and finally on a hard byte limit.
|
||||||
|
func splitDiffIntoChunks(diff []byte, maxSize int) []string {
|
||||||
|
if len(diff) <= maxSize {
|
||||||
|
return []string{string(diff)}
|
||||||
|
}
|
||||||
|
content := string(diff)
|
||||||
|
// Split by file headers
|
||||||
|
parts := strings.Split(content, "\ndiff --git ")
|
||||||
|
chunks := []string{}
|
||||||
|
var current strings.Builder
|
||||||
|
for idx, part := range parts {
|
||||||
|
seg := part
|
||||||
|
if idx != 0 {
|
||||||
|
// Preserve the leading newline that was removed by Split
|
||||||
|
seg = "\n" + "diff --git " + part
|
||||||
|
}
|
||||||
|
if current.Len()+len(seg) > maxSize && current.Len() > 0 {
|
||||||
|
chunks = append(chunks, current.String())
|
||||||
|
current.Reset()
|
||||||
|
}
|
||||||
|
if len(seg) > maxSize {
|
||||||
|
// Split further by hunks
|
||||||
|
hunks := strings.Split(seg, "\n@@ ")
|
||||||
|
for j, h := range hunks {
|
||||||
|
hseg := h
|
||||||
|
if j != 0 {
|
||||||
|
hseg = "@@ " + h
|
||||||
|
}
|
||||||
|
if current.Len()+len(hseg) > maxSize && current.Len() > 0 {
|
||||||
|
chunks = append(chunks, current.String())
|
||||||
|
current.Reset()
|
||||||
|
}
|
||||||
|
if len(hseg) > maxSize {
|
||||||
|
for len(hseg) > maxSize {
|
||||||
|
chunks = append(chunks, hseg[:maxSize])
|
||||||
|
hseg = hseg[maxSize:]
|
||||||
|
}
|
||||||
|
current.WriteString(hseg)
|
||||||
|
} else {
|
||||||
|
current.WriteString(hseg)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
current.WriteString(seg)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if current.Len() > 0 {
|
||||||
|
chunks = append(chunks, current.String())
|
||||||
|
}
|
||||||
|
return chunks
|
||||||
|
}
|
||||||
|
|
||||||
|
|||||||
127
internal/pierre/judge_test.go
Normal file
127
internal/pierre/judge_test.go
Normal file
@@ -0,0 +1,127 @@
|
|||||||
|
package pierre
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"context"
|
||||||
|
"io"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"git.schreifuchs.ch/schreifuchs/pierre-bot/internal/chatter"
|
||||||
|
"github.com/google/go-cmp/cmp"
|
||||||
|
)
|
||||||
|
|
||||||
|
// mockChat implements the ChatAdapter interface for testing.
|
||||||
|
type mockChat struct{}
|
||||||
|
|
||||||
|
func (m *mockChat) GenerateStructured(ctx context.Context, msgs []chatter.Message, target interface{}) error {
|
||||||
|
if cSlice, ok := target.(*[]Comment); ok {
|
||||||
|
*cSlice = []Comment{{File: "file.go", Line: 1, Message: "test comment"}}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *mockChat) GetProviderName() string { return "mock" }
|
||||||
|
|
||||||
|
// mockGit implements the GitAdapter interface for testing.
|
||||||
|
type mockGit struct{}
|
||||||
|
|
||||||
|
func (g *mockGit) GetDiff(ctx context.Context, owner, repo string, prID int) (io.ReadCloser, error) {
|
||||||
|
diff := "diff --git a/file1.go b/file1.go\n+line1\n" + "diff --git a/file2.go b/file2.go\n+line2\n"
|
||||||
|
return io.NopCloser(bytes.NewReader([]byte(diff))), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (g *mockGit) AddComment(ctx context.Context, owner, repo string, prID int, comment Comment) error {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSplitDiffIntoChunks(t *testing.T) {
|
||||||
|
cases := []struct {
|
||||||
|
name string
|
||||||
|
diff string
|
||||||
|
maxSize int
|
||||||
|
wantChunks int // 0 means we don't assert exact count
|
||||||
|
wantPrefixes []string
|
||||||
|
checkRecombine bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "small diff",
|
||||||
|
diff: "diff --git a/file1.txt b/file1.txt\n+added line\n",
|
||||||
|
maxSize: 1000,
|
||||||
|
wantChunks: 1,
|
||||||
|
wantPrefixes: []string{"diff --git a/file1.txt"},
|
||||||
|
checkRecombine: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "multiple files",
|
||||||
|
diff: "diff --git a/file1.txt b/file1.txt\n+added line 1\n" +
|
||||||
|
"diff --git a/file2.txt b/file2.txt\n+added line 2\n",
|
||||||
|
maxSize: 50,
|
||||||
|
wantChunks: 2,
|
||||||
|
wantPrefixes: []string{"diff --git a/file1.txt", "diff --git a/file2.txt"},
|
||||||
|
checkRecombine: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "large single file",
|
||||||
|
diff: func() string {
|
||||||
|
line := "+very long added line that will be repeated many times to exceed the chunk size\n"
|
||||||
|
return "diff --git a/large.txt b/large.txt\n" + strings.Repeat(line, 200)
|
||||||
|
}(),
|
||||||
|
maxSize: 500,
|
||||||
|
wantChunks: 0,
|
||||||
|
wantPrefixes: nil,
|
||||||
|
checkRecombine: true,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tc := range cases {
|
||||||
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
|
chunks := splitDiffIntoChunks([]byte(tc.diff), tc.maxSize)
|
||||||
|
if tc.wantChunks > 0 && len(chunks) != tc.wantChunks {
|
||||||
|
t.Fatalf("expected %d chunks, got %d", tc.wantChunks, len(chunks))
|
||||||
|
}
|
||||||
|
for i, prefix := range tc.wantPrefixes {
|
||||||
|
if i >= len(chunks) {
|
||||||
|
t.Fatalf("missing chunk %d for prefix check", i)
|
||||||
|
}
|
||||||
|
trimmed := strings.TrimPrefix(chunks[i], "\n")
|
||||||
|
if !strings.HasPrefix(trimmed, prefix) {
|
||||||
|
t.Fatalf("chunk %d does not start with expected prefix %q: %s", i, prefix, chunks[i])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for i, c := range chunks {
|
||||||
|
if tc.maxSize > 0 && len(c) > tc.maxSize {
|
||||||
|
t.Fatalf("chunk %d exceeds max size %d: %d", i, tc.maxSize, len(c))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if tc.checkRecombine {
|
||||||
|
recombined := strings.Join(chunks, "")
|
||||||
|
if diff := cmp.Diff(tc.diff, recombined); diff != "" {
|
||||||
|
t.Fatalf("recombined diff differs:\n%s", diff)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestJudgePR_ChunkAggregationAndDeduplication(t *testing.T) {
|
||||||
|
svc := &Service{
|
||||||
|
maxChunkSize: 50,
|
||||||
|
guidelines: nil,
|
||||||
|
git: &mockGit{},
|
||||||
|
chat: &mockChat{},
|
||||||
|
}
|
||||||
|
diffReader, err := svc.git.GetDiff(context.Background(), "", "", 0)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("failed to get diff: %v", err)
|
||||||
|
}
|
||||||
|
defer diffReader.Close()
|
||||||
|
comments, err := svc.judgePR(context.Background(), diffReader)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("judgePR error: %v", err)
|
||||||
|
}
|
||||||
|
if got, want := len(comments), 1; got != want {
|
||||||
|
t.Fatalf("expected %d comment after deduplication, got %d", want, got)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -8,14 +8,20 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
type Service struct {
|
type Service struct {
|
||||||
|
maxChunkSize int
|
||||||
|
guidelines []string
|
||||||
|
disableComments bool
|
||||||
git GitAdapter
|
git GitAdapter
|
||||||
chat ChatAdapter
|
chat ChatAdapter
|
||||||
}
|
}
|
||||||
|
|
||||||
func New(chat ChatAdapter, git GitAdapter) *Service {
|
func New(chat ChatAdapter, git GitAdapter, maxChunkSize int, guidelines []string, disableComments bool) *Service {
|
||||||
return &Service{
|
return &Service{
|
||||||
git: git,
|
git: git,
|
||||||
chat: chat,
|
chat: chat,
|
||||||
|
maxChunkSize: maxChunkSize,
|
||||||
|
guidelines: guidelines,
|
||||||
|
disableComments: disableComments,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -29,9 +29,12 @@ func (s *Service) MakeReview(ctx context.Context, organisation string, repo stri
|
|||||||
fmt.Printf("File: %s\nLine: %d\nMessage: %s\n%s\n",
|
fmt.Printf("File: %s\nLine: %d\nMessage: %s\n%s\n",
|
||||||
c.File, c.Line, c.Message, "---")
|
c.File, c.Line, c.Message, "---")
|
||||||
|
|
||||||
if err := s.git.AddComment(ctx, organisation, repo, prID, c); err != nil {
|
if s.disableComments {
|
||||||
|
log.Printf("dry-run: not posting comment for %s:%d", c.File, c.Line)
|
||||||
|
} else if err := s.git.AddComment(ctx, organisation, repo, prID, c); err != nil {
|
||||||
log.Printf("Failed to add comment: %v", err)
|
log.Printf("Failed to add comment: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
|
|||||||
Reference in New Issue
Block a user