openapi: 3.0.0
info:
  version: 1.0.0
  title: Charity Engine Internal LLM Processing API
  description: ''
tags:
  - name: inference
    description: Operations related to text inference requests.
paths:
  /generate:
    post:
      tags:
        - inference
      summary: Perform text inference for a given prompt
      requestBody:
        content:
          application/json:
            schema:
              type: object
              properties:
                model:
                  type: string
                  description: LLM identifier as a string.
                  example: llama3.2:3b
                  maxLength: 128
                  minLength: 1
                prompt:
                  type: string
                  description: Text to use as the basis for inference.
                  example: Respond with a single, random word.
                  maxLength: 131072
                  minLength: 1
                stream:
                  type: boolean
                  description: Whether to return a partial response as it is generated or wait until the process is complete and return the final response.
                  default: false
                images:
                  type: array
                  description: A list of base64-encoded images; for multi-modal models only
                  items: {
                    type: string
                  }
                  example: [
                  R0lGODdhAQABAPAAAP8AAAAAACwAAAAAAQABAAACAkQBADs=
                  ]
                context:
                  type: string
                  description: The context parameter returned from a previous request.
                  example: Based
                  maxLength: 131072
                  minLength: 1
                system:
                  type: string
                  description: Specifies the system message that will be set in the template.
                  example: You are a cool high school teacher who uses a lot of Gen Z slang.
                  maxLength: 4096
                  minLength: 1
                template:
                  type: string
                  description: The full prompt template to be sent to the model.
                  maxLength: 16384
                  minLength: 1
                options:
                  type: object
                  description: Additional parameters to pass to the model, such as "temperature", "seed", or "stop" sequence
                  example: {
                    temperature: 0.9,
                    seed: 42,
                    stop: "vibe"
                  }
        required: true
      responses:
        200:
          description: Successfully processed request.
          content:
            application/json:
              schema:
                type: object
                properties:
                  model:
                    type: string
                    example: llama3.2:3b
                  created_at:
                    type: string
                    example: 2024-10-28T22:38:15.168253042Z
                  response:
                    type: string
                    example: Fire
                  done:
                    type: boolean
                    example: true
                  done_reason:
                    type: string
                    example: stop
                  context:
                    type: array
                    description: Vector to pass to subsequent request to maintain context.
                    items: {
                      type: number
                    }
                    example: [128006,9125,128007,271,38766,78191,128007,271,34,47704]
                  total_duration:
                    type: integer
                    example: 1761685899
                  load_duration:
                    type: integer
                    example: 35293232
                  prompt_eval_count:
                    type: integer
                    example: 32
                  prompt_eval_duration:
                    type: integer
                    example: 1462830000
                  eval_count:
                    type: integer
                    example: 3
                  eval_duration:
                    type: integer
                    example: 219636000
        400:
          description: Request is invalid; JSON could not be parsed or the model does not provide this function.
        404:
          description: Model with the given name was not found.
  /embeddings:
    post:
      tags:
        - inference
      summary: Generate vector embeddings for a given prompt
      requestBody:
        content:
          application/json:
            schema:
              type: object
              properties:
                model:
                  type: string
                  description: LLM identifier as a string.
                  example: nomic-embed-text
                  maxLength: 128
                  minLength: 1
                prompt:
                  type: string
                  description: Text to use as the basis for inference.
                  example: Respond with a single, random word.
                  maxLength: 131072
                  minLength: 1
      responses:
        200:
          description: Successfully processed request.
          content:
            application/json:
              schema:
                type: object
                properties:
                  embedding:
                    type: array
                    description: The list of vector embeddings
                    items: {
                      type: number
                    }
                    example: [0.007006374653428793,-1.99142324924469,-1.6458194255828857,-0.8742634057998657,-1.5847256183624268,0.7248231172561646,0.14410534501075745,-0.015371601097285748,0.43502897024154663,1.7395483255386353,-0.3814678490161896,-2.694427490234375,0.2900508642196655]
        400:
          description: Request is invalid; JSON could not be parsed or the model does not provide this function.
        404:
          description: Model with the given name was not found.
servers:
  - url: http://localhost:11434/api