package pierre import ( "context" "fmt" "io" "strings" "git.schreifuchs.ch/schreifuchs/pierre-bot/internal/chatter" ) const defaultChunkSize = 60000 type Comment struct { File string `json:"file"` Line int `json:"line"` Message string `json:"message"` } func (s *Service) judgePR(ctx context.Context, diff io.Reader) (comments []Comment, err error) { diffBytes, err := io.ReadAll(diff) if err != nil { return nil, fmt.Errorf("failed to read diff: %w", err) } // Determine chunk size (use default if not set) maxSize := s.maxChunkSize if maxSize <= 0 { maxSize = defaultChunkSize // default 60KB ~ 15k tokens } chunks := splitDiffIntoChunks(diffBytes, maxSize) allComments := []Comment{} // Build optional guidelines text guidelinesText := "" if len(s.guidelines) > 0 { guidelinesText = "Project guidelines:\n" for _, g := range s.guidelines { guidelinesText += "- " + g + "\n" } } baseSystem := strings.TrimSpace(` You are a very strict senior software architect. You review **only** newly added or modified lines in a unified diff, together with the immediate hunk context. You do **not** report issues that appear **solely** in deleted lines (“-") or that have already been fixed by the change. No comments are made on pure formatting/whitespace changes or reordering that does not alter the program’s behavior.`) + guidelinesText for i, chunk := range chunks { // Add a small header so the model knows this is a fragment header := fmt.Sprintf("\n--- Chunk %d of %d ---\n", i+1, len(chunks)) userContent := fmt.Sprintf("Hello please review my PR. Write comments where improvements are necessary in new lines.%s\nHere is the git diff of it: %s", header, chunk) var chunkComments []Comment err = s.chat.GenerateStructured(ctx, []chatter.Message{{ Role: chatter.RoleSystem, Content: baseSystem, }, { Role: chatter.RoleUser, Content: userContent, }}, &chunkComments) if err != nil { return nil, err } allComments = append(allComments, chunkComments...) } // De‑duplicate comments (keyed by file:line) unique := make(map[string]Comment) for _, c := range allComments { key := fmt.Sprintf("%s:%d", c.File, c.Line) unique[key] = c } for _, v := range unique { comments = append(comments, v) } return } // splitDiffIntoChunks splits a diff into chunks that do not exceed maxSize bytes. // It tries to split on file boundaries ("diff --git") first, then on hunk boundaries (@@), // and finally on a hard byte limit. func splitDiffIntoChunks(diff []byte, maxSize int) []string { if len(diff) <= maxSize { return []string{string(diff)} } content := string(diff) // Split by file headers parts := strings.Split(content, "\ndiff --git ") chunks := []string{} var current strings.Builder for idx, part := range parts { seg := part if idx != 0 { // Preserve the leading newline that was removed by Split seg = "\n" + "diff --git " + part } if current.Len()+len(seg) > maxSize && current.Len() > 0 { chunks = append(chunks, current.String()) current.Reset() } if len(seg) > maxSize { // Split further by hunks hunks := strings.Split(seg, "\n@@ ") for j, h := range hunks { var hseg string if j == 0 { // First hunk segment already contains the preceding content (including any needed newline) hseg = h } else { // Subsequent hunks need the leading newline and "@@ " marker restored hseg = "\n@@ " + h } if current.Len()+len(hseg) > maxSize && current.Len() > 0 { chunks = append(chunks, current.String()) current.Reset() } if len(hseg) > maxSize { for len(hseg) > maxSize { chunks = append(chunks, hseg[:maxSize]) hseg = hseg[maxSize:] } current.WriteString(hseg) } else { current.WriteString(hseg) } } } else { current.WriteString(seg) } } if current.Len() > 0 { chunks = append(chunks, current.String()) } return chunks }