package pierre import ( "context" "fmt" "io" "strings" "git.schreifuchs.ch/schreifuchs/pierre-bot/internal/chatter" ) // DefaultChunkSize is the fallback maximum size (in bytes) for a diff chunk when no explicit value is configured. const DefaultChunkSize = 60000 type Comment struct { File string `json:"file"` Line int `json:"line"` Message string `json:"message"` } func (s *Service) judgePR(ctx context.Context, diff io.Reader) (comments []Comment, err error) { diffBytes, err := io.ReadAll(diff) if err != nil { return nil, fmt.Errorf("failed to read diff: %w", err) } // Determine chunk size (use default if not set) maxSize := s.maxChunkSize if maxSize <= 0 { maxSize = DefaultChunkSize // default 60KB ~ 15k tokens } chunks := splitDiffIntoChunks(diffBytes, maxSize) allComments := []Comment{} // Build optional guidelines text (added as a separate section with a clear delimiter) guidelinesText := "" if len(s.guidelines) > 0 { // Two newlines ensure the guidelines start on a fresh paragraph. guidelinesText = "\n\nProject guidelines:\n" for _, g := range s.guidelines { guidelinesText += "- " + g + "\n" } } // System prompt that instructs the LLM precisely. baseSystem := strings.TrimSpace(` You are a strict senior software architect. Only comment on newly added or modified lines in the diff; ignore deletions, pure formatting, or re‑ordering that does not change behavior. For each issue output a JSON object with fields "file", "line", and "message" (message should be concise, ≤2 sentences, and actionable). If project guidelines are provided, treat them as hard rules that must be respected.`) + guidelinesText for i, chunk := range chunks { // Include the chunk identifier in the system message only if there are multiple chunks. systemContent := baseSystem if len(chunks) > 1 { systemContent = fmt.Sprintf("%s\nChunk %d of %d.", baseSystem, i+1, len(chunks)) } userContent := chunk var chunkComments []Comment err = s.chat.GenerateStructured(ctx, []chatter.Message{{ Role: chatter.RoleSystem, Content: systemContent, }, { Role: chatter.RoleUser, Content: userContent, }}, &chunkComments) if err != nil { return nil, err } allComments = append(allComments, chunkComments...) } // De‑duplicate comments (keyed by file:line) unique := make(map[string]Comment) for _, c := range allComments { key := fmt.Sprintf("%s:%d", c.File, c.Line) unique[key] = c } for _, v := range unique { comments = append(comments, v) } return } // splitDiffIntoChunks splits a diff into chunks that do not exceed maxSize bytes. // It tries to split on file boundaries ("diff --git") first, then on hunk boundaries (@@), // and finally on a hard byte limit. func splitDiffIntoChunks(diff []byte, maxSize int) []string { if len(diff) <= maxSize { return []string{string(diff)} } content := string(diff) // Split by file headers parts := strings.Split(content, "\ndiff --git ") chunks := []string{} var current strings.Builder for idx, part := range parts { seg := part if idx != 0 { // Preserve the leading newline that was removed by Split seg = "\n" + "diff --git " + part } if current.Len()+len(seg) > maxSize && current.Len() > 0 { chunks = append(chunks, current.String()) current.Reset() } if len(seg) > maxSize { // Split further by hunks hunks := strings.Split(seg, "\n@@ ") for j, h := range hunks { var hseg string if j == 0 { // First hunk segment already contains the preceding content (including any needed newline) hseg = h } else { // Subsequent hunks need the leading newline and "@@ " marker restored hseg = "\n@@ " + h } if current.Len()+len(hseg) > maxSize && current.Len() > 0 { chunks = append(chunks, current.String()) current.Reset() } if len(hseg) > maxSize { for len(hseg) > maxSize { chunks = append(chunks, hseg[:maxSize]) hseg = hseg[maxSize:] } current.WriteString(hseg) } else { current.WriteString(hseg) } } } else { current.WriteString(seg) } } if current.Len() > 0 { chunks = append(chunks, current.String()) } return chunks }