feat: guidelines

feat: add logs
feat(pierre): sanity check
2026-02-13 18:33:21 +01:00 · 2026-02-13 18:04:09 +01:00 · 2026-02-13 17:35:39 +01:00
11 changed files with 403 additions and 87 deletions
@@ -6,20 +6,20 @@ layout: default

 > This document is consulted by autonomous coding agents (including the
 > Open‑Source `opencode` agent) when they read, modify, test, or lint the
-> code‑base.  It contains the canonical commands for building, testing, and
+> code‑base. It contains the canonical commands for building, testing, and
 > lint‑ing the project as well as a concise style guide that all Go code must
-> follow.  The file is deliberately kept around **150 lines** to be readable for
+> follow. The file is deliberately kept around **150 lines** to be readable for
 > both humans and LLMs.

 ---

 ## 1️⃣ Build / Run Commands

-| Command | Description |
-|--------|-------------|
-| `go build -o bin/pierre ./cmd/pierre/main.go` | Build the binary into `./bin/pierre`. |
-| `go run ./cmd/pierre/main.go --help` | Run the CLI with the help flag (fast sanity check). |
-| `./bin/pierre --version` | Verify the built binary reports its version. |
+| Command                                       | Description                                         |
+| --------------------------------------------- | --------------------------------------------------- |
+| `go build -o bin/pierre ./cmd/pierre/main.go` | Build the binary into `./bin/pierre`.               |
+| `go run ./cmd/pierre/main.go --help`          | Run the CLI with the help flag (fast sanity check). |
+| `./bin/pierre --version`                      | Verify the built binary reports its version.        |

 > **Tip for agents** – always prefer the `./bin/pierre` binary over invoking `go run` in CI or tests; it guarantees the same compilation flags.

@@ -29,13 +29,13 @@ layout: default

 The project uses the standard Go testing framework.

-| Command | Use case |
-|---------|----------|
-| `go test ./...` | Run **all** unit and integration tests. |
-| `go test ./... -run ^TestJudgePR$` | Run a **single** test (replace `TestJudgePR` with the desired name). |
-| `go test -cover ./...` | Run tests and emit a coverage report. |
-| `go test -run ^TestName$ -count=1 -v` | Verbose, non‑cached run for debugging a single test. |
-| `go test ./... -bench .` | Execute benchmarks (if any). |
+| Command                               | Use case                                                             |
+| ------------------------------------- | -------------------------------------------------------------------- |
+| `go test ./...`                       | Run **all** unit and integration tests.                              |
+| `go test ./... -run ^TestJudgePR$`    | Run a **single** test (replace `TestJudgePR` with the desired name). |
+| `go test -cover ./...`                | Run tests and emit a coverage report.                                |
+| `go test -run ^TestName$ -count=1 -v` | Verbose, non‑cached run for debugging a single test.                 |
+| `go test ./... -bench .`              | Execute benchmarks (if any).                                         |

 > **Agents should never** invoke `go test` with `-parallel` flags; the default parallelism is sufficient and ensures deterministic ordering for our table‑driven tests.

@@ -46,10 +46,10 @@ The project uses the standard Go testing framework.
 We rely on two lightweight built‑in tools plus optional `golangci-lint` when
 available.

-| Tool | Command | Description |
-|------|---------|-------------|
-| `go vet ./...` | `go vet ./...` | Detect obvious bugs, misuse of `fmt.Printf`, etc. |
-| `staticcheck ./...` | `staticcheck ./...` | Deeper static analysis (must be installed via `go install honnef.co/go/tools/cmd/staticcheck@latest`). |
+| Tool                       | Command             | Description                                                                                                                                                 |
+| -------------------------- | ------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `go vet ./...`             | `go vet ./...`      | Detect obvious bugs, misuse of `fmt.Printf`, etc.                                                                                                           |
+| `staticcheck ./...`        | `staticcheck ./...` | Deeper static analysis (must be installed via `go install honnef.co/go/tools/cmd/staticcheck@latest`).                                                      |
 | `golangci-lint` (optional) | `golangci-lint run` | Runs a configurable suite of linters. Install with `brew install golangci-lint` or `go install github.com/golangci/golangci-lint/cmd/golangci-lint@latest`. |

 **Agent tip** – when `golangci-lint` is present, run it after `go vet` to catch style
@@ -60,66 +60,74 @@ use the same commands.

 ## 4️⃣ Code‑Style Guidelines (Go)

-All changes must satisfy the following conventions.  They are enforced by
-`go fmt`, `go vet`, and the optional `golangci-lint` config.
+All changes must satisfy the following conventions. They are enforced by
+`go fmt`, `go vet`, and the optional `golangci-lint` config. The code does not have
+to be backwards compatible.

 ### 4.1 Formatting & Imports
-* Run `go fmt ./...` before committing.  The repository uses **tab‑indented** Go code.
-* Imports are grouped in three blocks, separated by a blank line:
+
+- Run `go fmt ./...` before committing. The repository uses **tab‑indented** Go code.
+- Imports are grouped in three blocks, separated by a blank line:
  1. **Standard library**
  2. **External dependencies** (e.g., `github.com/alecthomas/kong`)
  3. **Internal packages** (e.g., `github.com/yourorg/pierre/internal/...`)
-* Within each block, imports are sorted alphabetically.
-* Use `goimports` (or `golangci‑lint --fast`) to automatically fix import ordering.
+- Within each block, imports are sorted alphabetically.
+- Use `goimports` (or `golangci‑lint --fast`) to automatically fix import ordering.

 ### 4.2 Naming Conventions
-| Entity | Rule |
-|--------|------|
-| Packages | lower‑case, no underscores or hyphens. |
-| Files | snake_case (`*_test.go` for tests). |
-| Types / Structs | PascalCase, descriptive (e.g., `ReviewConfig`). |
-| Interfaces | End with `er` when appropriate (e.g., `ChatAdapter`). |
-| Variables / Constants | camelCase for locals, PascalCase for exported. |
-| Functions | PascalCase if exported, camelCase if private. |
-| Constants | Use `CamelCase` for groups, `CONST_NAME` only for truly global values. |
-| Test Functions | `TestXxx` and optionally `BenchmarkXxx`. |
+
+| Entity                | Rule                                                                   |
+| --------------------- | ---------------------------------------------------------------------- |
+| Packages              | lower‑case, no underscores or hyphens.                                 |
+| Files                 | snake_case (`*_test.go` for tests).                                    |
+| Types / Structs       | PascalCase, descriptive (e.g., `ReviewConfig`).                        |
+| Interfaces            | End with `er` when appropriate (e.g., `ChatAdapter`).                  |
+| Variables / Constants | camelCase for locals, PascalCase for exported.                         |
+| Functions             | PascalCase if exported, camelCase if private.                          |
+| Constants             | Use `CamelCase` for groups, `CONST_NAME` only for truly global values. |
+| Test Functions        | `TestXxx` and optionally `BenchmarkXxx`.                               |

 ### 4.3 Error Handling
-* Errors are **never** ignored.  Use the blank identifier only when the value is truly irrelevant.
-* Wrap contextual information using `fmt.Errorf("context: %w", err)` or `errors.Wrap` if the project imports `github.com/pkg/errors` (currently not used).
-* Return errors **as soon as** they are detected – "guard clause" style.
-* In public APIs, prefer error values over panics.  Panics are limited to unrecoverable
+
+- Errors are **never** ignored. Use the blank identifier only when the value is truly irrelevant.
+- Wrap contextual information using `fmt.Errorf("context: %w", err)` or `errors.Wrap` if the project imports `github.com/pkg/errors` (currently not used).
+- Return errors **as soon as** they are detected – "guard clause" style.
+- In public APIs, prefer error values over panics. Panics are limited to unrecoverable
  programming errors (e.g., nil‑pointer dereference in `init`).

 ### 4.4 Documentation & Comments
-* Exported identifiers **must** have a preceding doc comment beginning with the name (e.g., `// JudgePR reviews a PR and returns comments.`).
-* Inline comments should be sentence‑case and end with a period.
-* Use `//go:generate` directives sparingly; they must be accompanied by a test that ensures the generated file is up‑to‑date.
+
+- Exported identifiers **must** have a preceding doc comment beginning with the name (e.g., `// JudgePR reviews a PR and returns comments.`).
+- Inline comments should be sentence‑case and end with a period.
+- Use `//go:generate` directives sparingly; they must be accompanied by a test that ensures the generated file is up‑to‑date.

 ### 4.5 Testing Practices
-* Keep tests **table‑driven** where possible; this yields concise, readable test suites.
-* Use the `testing` package only; avoid third‑party test frameworks.
-* Prefer `t.Fatalf` for fatal errors and `t.Errorf` for non‑fatal assertions.
-* When comparing complex structs, use `github.com/google/go-cmp/cmp` (already in `go.mod`).
-* Test coverage should be **≥ 80 %** for new code.
-* All tests must be **deterministic** – no reliance on external services; use mocks or fakes.
+
+- Keep tests **table‑driven** where possible; this yields concise, readable test suites.
+- Use the `testing` package only; avoid third‑party test frameworks.
+- Prefer `t.Fatalf` for fatal errors and `t.Errorf` for non‑fatal assertions.
+- When comparing complex structs, use `github.com/google/go-cmp/cmp` (already in `go.mod`).
+- Test coverage should be **≥ 80 %** for new code.
+- All tests must be **deterministic** – no reliance on external services; use mocks or fakes.

 ### 4.6 Dependency Management
-* All dependencies are managed via Go modules (`go.mod`).
-* Run `go mod tidy` after adding/removing imports.
-* Do not commit `vendor/` unless a vendoring strategy is explicitly adopted.
+
+- All dependencies are managed via Go modules (`go.mod`).
+- Run `go mod tidy` after adding/removing imports.
+- Do not commit `vendor/` unless a vendoring strategy is explicitly adopted.

 ### 4.7 Logging & Output
-* Use the standard library `log` package for user‑visible output.
-* For structured logs (debug mode), wrap with a small helper that respects the `--debug` flag.
-* CLI output must be **machine‑parsable** when `--json-output` is set (future feature).
+
+- Use the standard library `log` package for user‑visible output.
+- For structured logs (debug mode), wrap with a small helper that respects the `--debug` flag.
+- CLI output must be **machine‑parsable** when `--json-output` is set (future feature).

 ---

 ## 5️⃣ Git & Pull‑Request Workflow (for agents)

 1. **Branch naming** – `feature/<short‑desc>` or `bugfix/<short‑desc>`.
-2. **Commits** – One logical change per commit, with a concise subject line (<50 chars) and an optional body explaining *why* the change was needed.
+2. **Commits** – One logical change per commit, with a concise subject line (<50 chars) and an optional body explaining _why_ the change was needed.
 3. **CI** – Every PR must pass `go test ./...`, `go vet ./...`, and `golangci‑lint run` (if configured).
 4. **Review** – Agents should add comments only via the LLM; do not edit code generated by the LLM unless a test fails.
 5. **Rebasing** – Keep a linear history; use `git rebase -i` locally before merging.
@@ -129,7 +137,7 @@ All changes must satisfy the following conventions.  They are enforced by
 ## 6️⃣ Cursor / Copilot Rules (None Present)

 The repository does not contain a `.cursor` directory or a `.github/copilot‑instructions.md`
-file.  If such files are added in the future, agents should read them and incorporate
+file. If such files are added in the future, agents should read them and incorporate
 any additional constraints into this document.

 ---
@@ -159,4 +167,5 @@ go mod tidy

 ---

-*End of AGENTS.md*
+_End of AGENTS.md_
+
@@ -3,8 +3,10 @@ package main
 import (
 	"context"
 	"log"
+	"log/slog"
 	"os"
 	"path/filepath"
+	"strings"

 	"git.schreifuchs.ch/schreifuchs/pierre-bot/internal/chatter"
 	"git.schreifuchs.ch/schreifuchs/pierre-bot/internal/gitadapters/bitbucket"
@@ -43,9 +45,12 @@ type ReviewConfig struct {
 	MaxChunkSize    int      `help:"Maximum diff chunk size in bytes" default:"60000"`
 	Guidelines      []string `help:"Project guidelines to prepend" sep:","`
 	DisableComments bool     `help:"Disable posting comments (dry run)"`
+	SanityCheck     bool     `help:"Run sanity‑check LLM prompts per comment" default:"true"`
 }

 type Config struct {
+	LogLevel string `help:"Log verbosity: debug, info, warn, error" default:"info"`
+
 	// Embedding ReviewConfig with a prefix changes flag names to `--review-…`.
 	// Existing configuration files using the old flag names will need to be updated.
 	// Consider keeping backwards compatibility if required.
@@ -75,6 +80,24 @@ func main() {
 		kong.Configuration(kongyaml.Loader, "config.yaml", defaultConfig),
 	)

+	// Configure global slog logger based on the --log-level flag
+	lvl := slog.LevelInfo
+	switch strings.ToLower(cfg.LogLevel) {
+	case "debug":
+		lvl = slog.LevelDebug
+	case "info":
+		lvl = slog.LevelInfo
+	case "warn":
+		lvl = slog.LevelWarn
+	case "error":
+		lvl = slog.LevelError
+	}
+
+	// Logs are sent to stderr so that stdout can be safely piped.
+	// The review output (fmt.Printf) stays on stdout unchanged.
+	logger := slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: lvl}))
+	slog.SetDefault(logger)
+
 	// Auto-detect provider
 	provider := cfg.GitProvider
 	if provider == "" {
@@ -130,6 +153,7 @@ func main() {
 	}

 	pierreService := pierre.New(ai, git, cfg.Review.MaxChunkSize, cfg.Review.Guidelines, cfg.Review.DisableComments)
+	pierreService.SetSanityCheck(cfg.Review.SanityCheck)
 	if err := pierreService.MakeReview(context.Background(), cfg.Repo.Owner, cfg.Repo.Repo, cfg.Repo.PRID); err != nil {
 		log.Fatalf("Error during review: %v", err)
 	}
@@ -0,0 +1,80 @@
+package bitbucket
+
+import (
+	"context"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+)
+
+func TestBitbucketGetFileContentSuccess(t *testing.T) {
+	const expected = "file content"
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		// Verify path structure
+		if r.URL.Path != "/rest/api/1.0/projects/owner/repos/repo/raw/path/to/file.go" {
+			t.Fatalf("unexpected URL path: %s", r.URL.Path)
+		}
+		w.WriteHeader(http.StatusOK)
+		w.Write([]byte(expected))
+	}))
+	defer server.Close()
+
+	// Trim trailing slash handling done in NewBitbucket
+	adapter := NewBitbucket(server.URL, "")
+	content, err := adapter.GetFileContent(context.Background(), "owner", "repo", "path/to/file.go", "")
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if content != expected {
+		t.Fatalf("expected %q, got %q", expected, content)
+	}
+}
+
+func TestBitbucketGetFileContentError(t *testing.T) {
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusNotFound)
+		w.Write([]byte("not found"))
+	}))
+	defer server.Close()
+
+	adapter := NewBitbucket(server.URL, "")
+	_, err := adapter.GetFileContent(context.Background(), "owner", "repo", "missing.go", "")
+	if err == nil {
+		t.Fatalf("expected error for non‑200 response")
+	}
+}
+
+func TestBitbucketGetPRHeadSHASuccess(t *testing.T) {
+	const sha = "deadbeef"
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.URL.Path != "/rest/api/1.0/projects/owner/repos/repo/pull-requests/42" {
+			t.Fatalf("unexpected URL: %s", r.URL.Path)
+		}
+		w.WriteHeader(http.StatusOK)
+		w.Write([]byte(`{"toRef":{"latestCommit":"` + sha + `"}}`))
+	}))
+	defer server.Close()
+
+	adapter := NewBitbucket(server.URL, "")
+	got, err := adapter.GetPRHeadSHA(context.Background(), "owner", "repo", 42)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if got != sha {
+		t.Fatalf("expected sha %s, got %s", sha, got)
+	}
+}
+
+func TestBitbucketGetPRHeadSHAError(t *testing.T) {
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusInternalServerError)
+		w.Write([]byte("error"))
+	}))
+	defer server.Close()
+
+	adapter := NewBitbucket(server.URL, "")
+	_, err := adapter.GetPRHeadSHA(context.Background(), "owner", "repo", 1)
+	if err == nil {
+		t.Fatalf("expected error for non‑200 response")
+	}
+}
@@ -5,6 +5,7 @@ import (
 	"encoding/json"
 	"fmt"
 	"io"
+	"log/slog"
 	"net/http"
 	"strconv"
 	"strings"
@@ -13,6 +14,7 @@ import (
 )

 func (b *BitbucketAdapter) GetDiff(ctx context.Context, projectKey, repositorySlug string, pullRequestID int) (diff io.ReadCloser, err error) {
+	slog.Debug("Bitbucket GetDiff start", "project", projectKey, "repo", repositorySlug, "pr", pullRequestID)
 	r, err := b.CreateRequest(
 		ctx,
 		http.MethodGet,
@@ -39,6 +41,7 @@ func (b *BitbucketAdapter) GetDiff(ctx context.Context, projectKey, repositorySl
 }

 func (b *BitbucketAdapter) GetPR(ctx context.Context, projectKey, repositorySlug string, pullRequestID int) (pr PullRequest, err error) {
+	slog.Debug("Bitbucket GetPR start", "project", projectKey, "repo", repositorySlug, "pr", pullRequestID)
 	r, err := b.CreateRequest(
 		ctx,
 		http.MethodGet,
@@ -47,17 +50,23 @@ func (b *BitbucketAdapter) GetPR(ctx context.Context, projectKey, repositorySlug
 	)

 	response, err := http.DefaultClient.Do(r)
-	defer response.Body.Close() // Add this
 	if err != nil {
 		return
 	}
+	defer response.Body.Close() // Add this

 	err = json.NewDecoder(response.Body).Decode(&pr)
+	if err != nil {
+		slog.Error("Bitbucket GetPR decode error", "err", err)
+		return
+	}
+	slog.Info("Bitbucket GetPR success", "id", pullRequestID)

 	return
 }

 func (b *BitbucketAdapter) AddComment(ctx context.Context, owner, repo string, prID int, comment pierre.Comment) (err error) {
+	slog.Debug("Bitbucket AddComment start", "owner", owner, "repo", repo, "pr", prID, "file", comment.File, "line", comment.Line)
 	// pr, err := b.GetPR(ctx, owner, repo, prID)
 	// if err != nil {
 	// 	return
@@ -86,15 +95,18 @@ func (b *BitbucketAdapter) AddComment(ctx context.Context, owner, repo string, p
 	}

 	response, err := http.DefaultClient.Do(r)
-	defer response.Body.Close() // Add this
 	if err != nil {
 		return err
 	}
+	defer response.Body.Close() // Add this

 	if response.StatusCode >= 300 || response.StatusCode < 200 {
 		sb := &strings.Builder{}
 		io.Copy(sb, response.Body)
 		err = fmt.Errorf("error while creating comment staus %d, body %s", response.StatusCode, sb.String())
+		slog.Error("Bitbucket AddComment failed", "status", response.StatusCode, "err", err)
+	} else {
+		slog.Info("Bitbucket AddComment succeeded", "pr", prID)
 	}

 	return err
@@ -31,6 +31,8 @@ func (b *BitbucketAdapter) GetFileContent(ctx context.Context, projectKey, repos
 	if err != nil {
 		return "", err
 	}
+	// Ensure raw file retrieval
+	r.Header.Set("Accept", "application/octet-stream")
 	if ref != "" {
 		q := r.URL.Query()
 		q.Set("at", ref)
@@ -5,6 +5,8 @@ import (
 	"context"
 	"fmt"
 	"io"
+	"log/slog"
+	"net/http"

 	"code.gitea.io/sdk/gitea"
 	"git.schreifuchs.ch/schreifuchs/pierre-bot/internal/pierre"
@@ -25,15 +27,19 @@ func New(baseURL, token string) (*Adapter, error) {
 }

 func (g *Adapter) GetDiff(ctx context.Context, owner, repo string, prID int) (io.ReadCloser, error) {
+	slog.Debug("Gitea GetDiff start", "owner", owner, "repo", repo, "pr", prID)
 	g.client.SetContext(ctx)
 	diff, _, err := g.client.GetPullRequestDiff(owner, repo, int64(prID), gitea.PullRequestDiffOptions{})
 	if err != nil {
 		return nil, err
 	}
-	return io.NopCloser(bytes.NewReader(diff)), nil
+	rc := io.NopCloser(bytes.NewReader(diff))
+	slog.Info("Gitea GetDiff success", "owner", owner, "repo", repo, "pr", prID, "bytes", len(diff))
+	return rc, nil
 }

 func (g *Adapter) AddComment(ctx context.Context, owner, repo string, prID int, comment pierre.Comment) error {
+	slog.Debug("Gitea AddComment start", "owner", owner, "repo", repo, "pr", prID, "file", comment.File, "line", comment.Line)
 	g.client.SetContext(ctx)
 	opts := gitea.CreatePullReviewOptions{
 		State: gitea.ReviewStateComment,
@@ -45,23 +51,34 @@ func (g *Adapter) AddComment(ctx context.Context, owner, repo string, prID int,
 			},
 		},
 	}
-	_, _, err := g.client.CreatePullReview(owner, repo, int64(prID), opts)
-	return err
+	_, resp, err := g.client.CreatePullReview(owner, repo, int64(prID), opts)
+	if err != nil {
+		slog.Error("Gitea AddComment failed", "err", err)
+		return err
+	}
+	if resp != nil && resp.StatusCode != http.StatusCreated {
+		return fmt.Errorf("unexpected status %d creating comment", resp.StatusCode)
+	}
+	slog.Info("Gitea AddComment succeeded", "pr", prID)
+	return nil
 }

 // GetFileContent returns the file content at a given path and ref (commit SHA).
 func (g *Adapter) GetFileContent(ctx context.Context, owner, repo, path, ref string) (string, error) {
+	slog.Debug("Gitea GetFileContent start", "owner", owner, "repo", repo, "path", path, "ref", ref)
 	g.client.SetContext(ctx)
 	// The SDK's GetFile returns the raw bytes of the file.
 	data, _, err := g.client.GetFile(owner, repo, ref, path)
 	if err != nil {
 		return "", err
 	}
+	slog.Info("Gitea GetFileContent success", "bytes", len(data))
 	return string(data), nil
 }

 // GetPRHeadSHA fetches the pull request and returns the head commit SHA.
 func (g *Adapter) GetPRHeadSHA(ctx context.Context, owner, repo string, prID int) (string, error) {
+	slog.Debug("Gitea GetPRHeadSHA start", "owner", owner, "repo", repo, "pr", prID)
 	g.client.SetContext(ctx)
 	// GetPullRequest returns the detailed PR information.
 	pr, _, err := g.client.GetPullRequest(owner, repo, int64(prID))
@@ -71,5 +88,6 @@ func (g *Adapter) GetPRHeadSHA(ctx context.Context, owner, repo string, prID int
 	if pr == nil || pr.Head == nil {
 		return "", fmt.Errorf("pull request %d has no head information", prID)
 	}
+	slog.Info("Gitea GetPRHeadSHA success", "sha", pr.Head.Sha)
 	return pr.Head.Sha, nil
 }
@@ -0,0 +1,70 @@
+package pierre
+
+import (
+	"fmt"
+	"strings"
+	"testing"
+)
+
+func TestParseGuidelinesFromStringValid(t *testing.T) {
+	md := "# Rule One\n\n  - Item A  \n\n# Rule Two\n"
+	lines, err := parseGuidelinesFromString(md)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	expected := []string{"# Rule One", "- Item A", "# Rule Two"}
+	if got, want := fmt.Sprint(lines), fmt.Sprint(expected); got != want {
+		t.Fatalf("expected %v, got %v", expected, lines)
+	}
+}
+
+func TestParseGuidelinesFromStringEmpty(t *testing.T) {
+	md := "\n   \n"
+	lines, err := parseGuidelinesFromString(md)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if len(lines) != 0 {
+		t.Fatalf("expected empty slice, got %d elements", len(lines))
+	}
+}
+
+func TestParseGuidelinesFromStringTooManyLines(t *testing.T) {
+	// generate 1001 non-empty lines
+	var sb strings.Builder
+	for i := 0; i < 1001; i++ {
+		sb.WriteString(fmt.Sprintf("Line %d\n", i))
+	}
+	_, err := parseGuidelinesFromString(sb.String())
+	if err == nil {
+		t.Fatalf("expected error for exceeding line limit, got nil")
+	}
+}
+
+func TestWithGuidelinesSuccess(t *testing.T) {
+	svc := &Service{}
+	md := "First line\nSecond line\n"
+	if err := svc.WithGuidelines(md); err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	expected := []string{"First line", "Second line"}
+	if got, want := fmt.Sprint(svc.guidelines), fmt.Sprint(expected); got != want {
+		t.Fatalf("expected guidelines %v, got %v", expected, svc.guidelines)
+	}
+}
+
+func TestWithGuidelinesError(t *testing.T) {
+	svc := &Service{guidelines: []string{"old"}}
+	var sb strings.Builder
+	for i := 0; i < 1001; i++ {
+		sb.WriteString("x\n")
+	}
+	err := svc.WithGuidelines(sb.String())
+	if err == nil {
+		t.Fatalf("expected error, got nil")
+	}
+	// ensure old guidelines unchanged
+	if len(svc.guidelines) != 1 || svc.guidelines[0] != "old" {
+		t.Fatalf("guidelines should remain unchanged on error")
+	}
+}
@@ -4,6 +4,7 @@ import (
 	"context"
 	"fmt"
 	"io"
+	"log/slog"
 	"strings"

 	"git.schreifuchs.ch/schreifuchs/pierre-bot/internal/chatter"
@@ -19,6 +20,7 @@ type Comment struct {
 }

 func (s *Service) judgePR(ctx context.Context, diff io.Reader) (comments []Comment, err error) {
+	slog.Info("judgePR started")
 	diffBytes, err := io.ReadAll(diff)
 	if err != nil {
 		return nil, fmt.Errorf("failed to read diff: %w", err)
@@ -81,6 +83,7 @@ If project guidelines are provided, treat them as hard rules that must be respec
 	for _, v := range unique {
 		comments = append(comments, v)
 	}
+	slog.Info("judgePR finished", "comments", len(comments))
 	return
 }

@@ -88,6 +91,10 @@ If project guidelines are provided, treat them as hard rules that must be respec
 // It tries to split on file boundaries ("diff --git") first, then on hunk boundaries (@@),
 // and finally on a hard byte limit.
 func splitDiffIntoChunks(diff []byte, maxSize int) []string {
+	// Preserve the file header for each chunk when a single file's diff is split across multiple chunks.
+	// The header is the portion before the first hunk marker "@@" (including the "diff --git" line).
+	// When we need to split by hunks, we prepend this header to every resulting sub‑chunk.
+
 	if len(diff) <= maxSize {
 		return []string{string(diff)}
 	}
@@ -107,22 +114,31 @@ func splitDiffIntoChunks(diff []byte, maxSize int) []string {
 			current.Reset()
 		}
 		if len(seg) > maxSize {
-			// Split further by hunks
-			hunks := strings.Split(seg, "\n@@ ")
-			for j, h := range hunks {
-				var hseg string
-				if j == 0 {
-					// First hunk segment already contains the preceding content (including any needed newline)
-					hseg = h
-				} else {
-					// Subsequent hunks need the leading newline and "@@ " marker restored
-					hseg = "\n@@ " + h
+			// Determine if there is a hunk marker. If not, fall back to simple size‑based chunking.
+			headerEnd := strings.Index(seg, "\n@@ ")
+			if headerEnd == -1 {
+				// No hunk marker – split purely by size.
+				remaining := seg
+				for len(remaining) > maxSize {
+					chunks = append(chunks, remaining[:maxSize])
+					remaining = remaining[maxSize:]
 				}
+				current.WriteString(remaining)
+				continue
+			}
+			// Preserve the header up to the first hunk.
+			header := seg[:headerEnd+1] // include newline before "@@"
+			// Split the rest of the segment into hunks (excluding the header part).
+			hunks := strings.Split(strings.TrimPrefix(seg, header), "\n@@ ")
+			for _, h := range hunks {
+				// Reconstruct each hunk with its header and "@@ " prefix.
+				hseg := header + "@@ " + h
 				if current.Len()+len(hseg) > maxSize && current.Len() > 0 {
 					chunks = append(chunks, current.String())
 					current.Reset()
 				}
 				if len(hseg) > maxSize {
+					// If a single hunk exceeds maxSize, split it further.
 					for len(hseg) > maxSize {
 						chunks = append(chunks, hseg[:maxSize])
 						hseg = hseg[maxSize:]
@@ -0,0 +1,44 @@
+package pierre
+
+import (
+	"context"
+	"testing"
+
+	"git.schreifuchs.ch/schreifuchs/pierre-bot/internal/chatter"
+)
+
+type overlapChat struct{ callCount int }
+
+func (m *overlapChat) GenerateStructured(ctx context.Context, msgs []chatter.Message, target interface{}) error {
+	m.callCount++
+	if cSlice, ok := target.(*[]Comment); ok {
+		// Return two comments with same file and line to test deduplication
+		*cSlice = []Comment{{File: "dup.go", Line: 10, Message: "first"}, {File: "dup.go", Line: 10, Message: "second"}}
+		return nil
+	}
+	return nil
+}
+
+func (m *overlapChat) GetProviderName() string { return "mock" }
+
+func TestJudgePR_DeduplicationOverlap(t *testing.T) {
+	chat := &overlapChat{}
+	svc := &Service{
+		maxChunkSize: 1000,
+		guidelines:   nil,
+		git:          &mockGit{},
+		chat:         chat,
+	}
+	diffReader, err := svc.git.GetDiff(context.Background(), "", "", 0)
+	if err != nil {
+		t.Fatalf("failed to get diff: %v", err)
+	}
+	defer diffReader.Close()
+	comments, err := svc.judgePR(context.Background(), diffReader)
+	if err != nil {
+		t.Fatalf("judgePR error: %v", err)
+	}
+	if len(comments) != 1 {
+		t.Fatalf("expected 1 deduplicated comment, got %d", len(comments))
+	}
+}
@@ -2,7 +2,9 @@ package pierre

 import (
 	"context"
+	"fmt"
 	"io"
+	"strings"

 	"git.schreifuchs.ch/schreifuchs/pierre-bot/internal/chatter"
 )
@@ -14,22 +16,63 @@ import (
 // elsewhere in the future.
 type Service struct {
 	maxChunkSize    int
-	guidelines      []string
+	guidelines      []string // stored as slice of lines; legacy, see WithGuidelines
+	skipSanityCheck bool     // if true, skip LLM sanity‑check prompts per comment
 	disableComments bool
 	git             GitAdapter
 	chat            ChatAdapter
 }

 func New(chat ChatAdapter, git GitAdapter, maxChunkSize int, guidelines []string, disableComments bool) *Service {
+	// Existing constructor retains slice based guidelines for backward compatibility.
 	return &Service{
 		git:             git,
 		chat:            chat,
 		maxChunkSize:    maxChunkSize,
 		guidelines:      guidelines,
+		skipSanityCheck: false,
 		disableComments: disableComments,
 	}
 }

+// WithGuidelines parses a raw Markdown string (or any multiline string) into
+// individual guideline lines, validates the line‑count (max 1000 non‑empty lines),
+// and stores the result in the Service. It returns an error if validation fails.
+// This is a convenience mutator for callers that have the guidelines as a
+// single string.
+func (s *Service) WithGuidelines(md string) error {
+	lines, err := parseGuidelinesFromString(md)
+	if err != nil {
+		return err
+	}
+	s.guidelines = lines
+	return nil
+}
+
+// parseGuidelinesFromString splits a markdown string into trimmed, non‑empty
+// lines and ensures the total number of lines does not exceed 1000.
+func (s *Service) SetSanityCheck(enabled bool) {
+	s.skipSanityCheck = !enabled
+}
+
+// parseGuidelinesFromString splits a markdown string into trimmed, non‑empty
+func parseGuidelinesFromString(md string) ([]string, error) {
+	var result []string
+	// Split on newline. Handles both \n and \r\n because TrimSpace removes \r.
+	rawLines := strings.Split(md, "\n")
+	for _, l := range rawLines {
+		trimmed := strings.TrimSpace(l)
+		if trimmed == "" {
+			continue
+		}
+		result = append(result, trimmed)
+	}
+	if len(result) > 1000 {
+		return nil, fmt.Errorf("guidelines exceed 1000 lines (found %d)", len(result))
+	}
+	return result, nil
+}
+
 type GitAdapter interface {
 	GetDiff(ctx context.Context, owner, repo string, prID int) (io.ReadCloser, error)
 	AddComment(ctx context.Context, owner, repo string, prID int, comment Comment) error
@@ -3,7 +3,7 @@ package pierre
 import (
 	"context"
 	"fmt"
-	"log"
+	"log/slog"

 	"git.schreifuchs.ch/schreifuchs/pierre-bot/internal/chatter"
 )
@@ -22,17 +22,17 @@ func (s *Service) MakeReview(ctx context.Context, organisation string, repo stri
 		return fmt.Errorf("error judging PR: %w", err)
 	}

-	// ---------- Sanity‑check step (always enabled) ----------
+	// ---------- Sanity‑check step ----------
 	headSHA, err := s.git.GetPRHeadSHA(ctx, organisation, repo, prID)
 	if err != nil {
-		log.Printf("warning: could not fetch PR head SHA (%v); skipping sanity check", err)
-	} else {
+		slog.Warn("could not fetch PR head SHA", "error", err)
+	} else if !s.skipSanityCheck {
 		filtered := []Comment{}
 		for _, c := range comments {
 			// Retrieve full file content at the PR head
 			fileContent, fErr := s.git.GetFileContent(ctx, organisation, repo, c.File, headSHA)
 			if fErr != nil {
-				log.Printf("failed to fetch file %s: %v – keeping original comment", c.File, fErr)
+				slog.Warn("failed to fetch file", "path", c.File, "error", fErr)
 				filtered = append(filtered, c)
 				continue
 			}
@@ -47,7 +47,7 @@ func (s *Service) MakeReview(ctx context.Context, organisation string, repo stri
 			}
 			var res sanityResult
 			if err := s.chat.GenerateStructured(ctx, []chatter.Message{{Role: chatter.RoleSystem, Content: systemPrompt}, {Role: chatter.RoleUser, Content: userPrompt}}, &res); err != nil {
-				log.Printf("sanity check error for %s:%d: %v – keeping comment", c.File, c.Line, err)
+				slog.Error("sanity check error", "file", c.File, "line", c.Line, "error", err)
 				filtered = append(filtered, c)
 				continue
 			}
@@ -56,7 +56,7 @@ func (s *Service) MakeReview(ctx context.Context, organisation string, repo stri
 				c.Message = fmt.Sprintf("%s (Reason: %s)", c.Message, res.Reason)
 				filtered = append(filtered, c)
 			} else {
-				log.Printf("comment on %s:%d discarded: %s", c.File, c.Line, res.Reason)
+				slog.Info("comment discarded", "file", c.File, "line", c.Line, "reason", res.Reason)
 			}
 		}
 		comments = filtered
@@ -69,15 +69,13 @@ func (s *Service) MakeReview(ctx context.Context, organisation string, repo stri
 	for _, c := range comments {
 		c.Message = fmt.Sprintf("%s (Generated by: %s)", c.Message, model)

-		if s.disableComments {
-			// Dry‑run: just log what would have been posted.
-			log.Printf("dry‑run: %s:%d => %s", c.File, c.Line, c.Message)
-		} else {
-			// Normal mode: print to stdout and post the comment to the VCS.
-			fmt.Printf("File: %s\nLine: %d\nMessage: %s\n%s\n",
-				c.File, c.Line, c.Message, "---")
+		// Normal mode: print to stdout and post the comment to the VCS.
+		fmt.Printf("File: %s\nLine: %d\nMessage: %s\n%s\n",
+			c.File, c.Line, c.Message, "---")
+
+		if !s.disableComments {
 			if err := s.git.AddComment(ctx, organisation, repo, prID, c); err != nil {
-				log.Printf("Failed to add comment: %v", err)
+				slog.Error("failed to add comment", "error", err)
 			}
 		}
 	}
Author	SHA1	Message	Date
u80864958	75ef8da1a4	feat: guidelines	2026-02-13 18:33:21 +01:00
u80864958	2e12c39786	feat: add logs	2026-02-13 18:04:09 +01:00
u80864958	343f6ab165	feat(pierre): sanity check	2026-02-13 17:35:39 +01:00