diff --git a/api/types.go b/api/types.go index 0ea0b9bf077..d53742ee5a4 100644 --- a/api/types.go +++ b/api/types.go @@ -103,10 +103,18 @@ type ChatRequest struct { // Tools is an optional list of tools the model has access to. Tools `json:"tools,omitempty"` + Debug *Debug `json:"debug,omitempty"` + + Dry bool `json:"dry,omitempty"` + // Options lists model-specific options. Options map[string]interface{} `json:"options"` } +type Debug struct { + Include []string `json:"include,omitempty"` +} + type Tools []Tool func (t Tools) String() string { @@ -190,6 +198,8 @@ type ChatResponse struct { Message Message `json:"message"` DoneReason string `json:"done_reason,omitempty"` + Debug map[string]any `json:"debug,omitempty"` + Done bool `json:"done"` Metrics diff --git a/server/prompt.go b/server/prompt.go index cc69fe8cf57..062bbeb9e55 100644 --- a/server/prompt.go +++ b/server/prompt.go @@ -82,6 +82,10 @@ func chatPrompt(ctx context.Context, m *Model, tokenize tokenizeFunc, opts *api. } currMsgIdx := n + // Warn user if messages are truncated from the input + if numTruncatedMessages := len(msgs[0:currMsgIdx]); numTruncatedMessages > 0 { + slog.Warn("truncated first messages from input", "num_truncated", numTruncatedMessages) + } for cnt, msg := range msgs[currMsgIdx:] { prefix := "" diff --git a/server/routes.go b/server/routes.go index 0154dde700c..98be80d7363 100644 --- a/server/routes.go +++ b/server/routes.go @@ -1539,6 +1539,34 @@ func (s *Server) ChatHandler(c *gin.Context) { return } + if req.Dry { + var debug map[string]any + if req.Debug != nil && req.Debug.Include != nil && slices.Contains(req.Debug.Include, "prompt") { + debug = map[string]any{"prompt": prompt} + } + tokens, err := r.Tokenize(c.Request.Context(), prompt) + if err != nil { + slog.Error("tokenize error", "error", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) + return + } + c.JSON(http.StatusOK, api.ChatResponse{ + Model: req.Model, + CreatedAt: time.Now().UTC(), + Message: api.Message{Role: "assistant", Content: ""}, + Done: true, + DoneReason: "dry_run", + Debug: debug, + Metrics: api.Metrics{ + PromptEvalCount: len(tokens), + PromptEvalDuration: 0, + EvalCount: 0, + EvalDuration: 0, + }, + }) + return + } + slog.Debug("chat request", "images", len(images), "prompt", prompt) ch := make(chan any) @@ -1571,6 +1599,16 @@ func (s *Server) ChatHandler(c *gin.Context) { res.LoadDuration = checkpointLoaded.Sub(checkpointStart) } + if req.Debug != nil && req.Debug.Include != nil && slices.Contains(req.Debug.Include, "prompt") { + res.Debug = map[string]any{"prompt": prompt} + if req.Stream != nil && !*req.Stream { + tempMsg := res.Message + res.Message = api.Message{Role: "assistant", Content: ""} + ch <- res + res.Message = tempMsg + } + } + // TODO: tool call checking and filtering should be moved outside of this callback once streaming // however this was a simple change for now without reworking streaming logic of this (and other) // handlers