package pierre import ( "log/slog" "context" "fmt" "io" "strings" "git.schreifuchs.ch/schreifuchs/pierre-bot/internal/chatter" ) // DefaultChunkSize is the fallback maximum size (in bytes) for a diff chunk when no explicit value is configured. const DefaultChunkSize = 60000 type Comment struct { File string `json:"file"` Line int `json:"line"` Message string `json:"message"` } func (s *Service) judgePR(ctx context.Context, diff io.Reader) (comments []Comment, err error) { slog.Info("judgePR started") diffBytes, err := io.ReadAll(diff) if err != nil { return nil, fmt.Errorf("failed to read diff: %w", err) } // Determine chunk size (use default if not set) maxSize := s.maxChunkSize if maxSize <= 0 { maxSize = DefaultChunkSize // default 60KB ~ 15k tokens } chunks := splitDiffIntoChunks(diffBytes, maxSize) allComments := []Comment{} // Build optional guidelines text (added as a separate section with a clear delimiter) guidelinesText := "" if len(s.guidelines) > 0 { // Two newlines ensure the guidelines start on a fresh paragraph. guidelinesText = "\n\nProject guidelines:\n" for _, g := range s.guidelines { guidelinesText += "- " + g + "\n" } } // System prompt that instructs the LLM precisely. baseSystem := strings.TrimSpace(` You are a strict senior software architect. Only comment on newly added or modified lines in the diff; ignore deletions, pure formatting, or re‑ordering that does not change behavior. For each issue output a JSON object with fields "file", "line", and "message" (message should be concise, ≤2 sentences, and actionable). If project guidelines are provided, treat them as hard rules that must be respected.`) + guidelinesText for i, chunk := range chunks { // Include the chunk identifier in the system message only if there are multiple chunks. systemContent := baseSystem if len(chunks) > 1 { systemContent = fmt.Sprintf("%s\nChunk %d of %d.", baseSystem, i+1, len(chunks)) } userContent := chunk var chunkComments []Comment err = s.chat.GenerateStructured(ctx, []chatter.Message{{ Role: chatter.RoleSystem, Content: systemContent, }, { Role: chatter.RoleUser, Content: userContent, }}, &chunkComments) if err != nil { return nil, err } allComments = append(allComments, chunkComments...) } // De‑duplicate comments (keyed by file:line) unique := make(map[string]Comment) for _, c := range allComments { key := fmt.Sprintf("%s:%d", c.File, c.Line) unique[key] = c } for _, v := range unique { comments = append(comments, v) } slog.Info("judgePR finished", "comments", len(comments)) return } // splitDiffIntoChunks splits a diff into chunks that do not exceed maxSize bytes. // It tries to split on file boundaries ("diff --git") first, then on hunk boundaries (@@), // and finally on a hard byte limit. func splitDiffIntoChunks(diff []byte, maxSize int) []string { // Preserve the file header for each chunk when a single file's diff is split across multiple chunks. // The header is the portion before the first hunk marker "@@" (including the "diff --git" line). // When we need to split by hunks, we prepend this header to every resulting sub‑chunk. if len(diff) <= maxSize { return []string{string(diff)} } content := string(diff) // Split by file headers parts := strings.Split(content, "\ndiff --git ") chunks := []string{} var current strings.Builder for idx, part := range parts { seg := part if idx != 0 { // Preserve the leading newline that was removed by Split seg = "\n" + "diff --git " + part } if current.Len()+len(seg) > maxSize && current.Len() > 0 { chunks = append(chunks, current.String()) current.Reset() } if len(seg) > maxSize { // Determine if there is a hunk marker. If not, fall back to simple size‑based chunking. headerEnd := strings.Index(seg, "\n@@ ") if headerEnd == -1 { // No hunk marker – split purely by size. remaining := seg for len(remaining) > maxSize { chunks = append(chunks, remaining[:maxSize]) remaining = remaining[maxSize:] } current.WriteString(remaining) continue } // Preserve the header up to the first hunk. header := seg[:headerEnd+1] // include newline before "@@" // Split the rest of the segment into hunks (excluding the header part). hunks := strings.Split(strings.TrimPrefix(seg, header), "\n@@ ") for _, h := range hunks { // Reconstruct each hunk with its header and "@@ " prefix. hseg := header + "@@ " + h if current.Len()+len(hseg) > maxSize && current.Len() > 0 { chunks = append(chunks, current.String()) current.Reset() } if len(hseg) > maxSize { // If a single hunk exceeds maxSize, split it further. for len(hseg) > maxSize { chunks = append(chunks, hseg[:maxSize]) hseg = hseg[maxSize:] } current.WriteString(hseg) } else { current.WriteString(hseg) } } } else { current.WriteString(seg) } } if current.Len() > 0 { chunks = append(chunks, current.String()) } return chunks }