actions
diff --git a/‎README.md‎
Lines changed: 45 additions & 17 deletions b/‎README.md‎
Lines changed: 45 additions & 17 deletions
diff --git a/‎__tests__/helpers-inference.test.ts‎
Lines changed: 4 additions & 0 deletions b/‎__tests__/helpers-inference.test.ts‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎__tests__/inference.test.ts‎
Lines changed: 61 additions & 1 deletion b/‎__tests__/inference.test.ts‎
Lines changed: 61 additions & 1 deletion
diff --git a/‎__tests__/main.test.ts‎
Lines changed: 2 additions & 0 deletions b/‎__tests__/main.test.ts‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎action.yml‎
Lines changed: 5 additions & 1 deletion b/‎action.yml‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎dist/index.js‎
Lines changed: 25 additions & 9 deletions b/‎dist/index.js‎
Lines changed: 25 additions & 9 deletions
diff --git a/‎dist/index.js.map‎
Lines changed: 1 addition & 1 deletion b/‎dist/index.js.map‎
Lines changed: 1 addition & 1 deletion
@@ -123,6 +123,33 @@ supplied via the `input` parameter in YAML format. Additionally, you can
 provide file-based variables via `file_input`, where each key maps to a file
 path.
 
+### Prompt.yml with model parameters
+
+You can specify model parameters directly in your `.prompt.yml` files using the
+`modelParameters` key:
+
+```yaml
+messages:
+  - role: system
+    content: Be as concise as possible
+  - role: user
+    content: 'Compare {{a}} and {{b}}, please'
+model: openai/gpt-4o
+modelParameters:
+  maxCompletionTokens: 500
+  temperature: 0.7
+```
+
+| Key                   | Type   | Description                                           |
+| --------------------- | ------ | ----------------------------------------------------- |
+| `maxCompletionTokens` | number | The maximum number of tokens to generate              |
+| `maxTokens`           | number | The maximum number of tokens to generate (deprecated) |
+| `temperature`         | number | The sampling temperature to use (0-1)                 |
+| `topP`                | number | The nucleus sampling parameter to use (0-1)           |
+
+> ![Note]
+> Parameters set in `modelParameters` take precedence over the corresponding action inputs.
+
 ### Using a system prompt file
 
 In addition to the regular prompt, you can provide a system prompt file instead
@@ -276,23 +303,24 @@ perform actions like searching issues and PRs.
 Various inputs are defined in [`action.yml`](action.yml) to let you configure
 the action:
 
-| Name                 | Description                                                                                                                                                                                                        | Default                              |
-| -------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------ |
-| `token`              | Token to use for inference. Typically the GITHUB_TOKEN secret                                                                                                                                                      | `github.token`                       |
-| `prompt`             | The prompt to send to the model                                                                                                                                                                                    | N/A                                  |
-| `prompt-file`        | Path to a file containing the prompt (supports .txt and .prompt.yml formats). If both `prompt` and `prompt-file` are provided, `prompt-file` takes precedence                                                      | `""`                                 |
-| `input`              | Template variables in YAML format for .prompt.yml files (e.g., `var1: value1` on separate lines)                                                                                                                   | `""`                                 |
-| `file_input`         | Template variables in YAML where values are file paths. The file contents are read and used for templating                                                                                                         | `""`                                 |
-| `system-prompt`      | The system prompt to send to the model                                                                                                                                                                             | `"You are a helpful assistant"`      |
-| `system-prompt-file` | Path to a file containing the system prompt. If both `system-prompt` and `system-prompt-file` are provided, `system-prompt-file` takes precedence                                                                  | `""`                                 |
-| `model`              | The model to use for inference. Must be available in the [GitHub Models](http://github.com/marketplace?type=models) catalog                                                                                       | `openai/gpt-4o`                      |
-| `endpoint`           | The endpoint to use for inference. If you're running this as part of an org, you should probably use the org-specific Models endpoint                                                                              | `https://models.github.ai/inference` |
-| `max-tokens`         | The max number of tokens to generate                                                                                                                                                                               | 200                                  |
-| `temperature`        | The sampling temperature to use (0-1)                                                                                                                                                                              | `""`                                 |
-| `top-p`              | The nucleus sampling parameter to use (0-1)                                                                                                                                                                        | `""`                                 |
-| `enable-github-mcp`  | Enable Model Context Protocol integration with GitHub tools                                                                                                                                                        | `false`                              |
-| `github-mcp-token`   | Token to use for GitHub MCP server (defaults to the main token if not specified).                                                                                                                                  | `""`                                 |
-| `custom-headers`     | Custom HTTP headers to include in API requests. Supports both YAML format (`header1: value1`) and JSON format (`{"header1": "value1"}`). Useful for API Management platforms, rate limiting, and request tracking. | `""`                                 |
+| Name                    | Description                                                                                                                                                                                                        | Default                              |
+| ----------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------ |
+| `token`                 | Token to use for inference. Typically the GITHUB_TOKEN secret                                                                                                                                                      | `github.token`                       |
+| `prompt`                | The prompt to send to the model                                                                                                                                                                                    | N/A                                  |
+| `prompt-file`           | Path to a file containing the prompt (supports .txt and .prompt.yml formats). If both `prompt` and `prompt-file` are provided, `prompt-file` takes precedence                                                      | `""`                                 |
+| `input`                 | Template variables in YAML format for .prompt.yml files (e.g., `var1: value1` on separate lines)                                                                                                                   | `""`                                 |
+| `file_input`            | Template variables in YAML where values are file paths. The file contents are read and used for templating                                                                                                         | `""`                                 |
+| `system-prompt`         | The system prompt to send to the model                                                                                                                                                                             | `"You are a helpful assistant"`      |
+| `system-prompt-file`    | Path to a file containing the system prompt. If both `system-prompt` and `system-prompt-file` are provided, `system-prompt-file` takes precedence                                                                  | `""`                                 |
+| `model`                 | The model to use for inference. Must be available in the [GitHub Models](http://github.com/marketplace?type=models) catalog                                                                                       | `openai/gpt-4o`                      |
+| `endpoint`              | The endpoint to use for inference. If you're running this as part of an org, you should probably use the org-specific Models endpoint                                                                              | `https://models.github.ai/inference` |
+| `max-tokens`            | The maximum number of tokens to generate (deprecated, use `max-completion-tokens` instead)                                                                                                                         | 200                                  |
+| `max-completion-tokens` | The maximum number of tokens to generate                                                                                                                                                                           | `""`                                 |
+| `temperature`           | The sampling temperature to use (0-1)                                                                                                                                                                              | `""`                                 |
+| `top-p`                 | The nucleus sampling parameter to use (0-1)                                                                                                                                                                        | `""`                                 |
+| `enable-github-mcp`     | Enable Model Context Protocol integration with GitHub tools                                                                                                                                                        | `false`                              |
+| `github-mcp-token`      | Token to use for GitHub MCP server (defaults to the main token if not specified).                                                                                                                                  | `""`                                 |
+| `custom-headers`        | Custom HTTP headers to include in API requests. Supports both YAML format (`header1: value1`) and JSON format (`{"header1": "value1"}`). Useful for API Management platforms, rate limiting, and request tracking. | `""`                                 |
 
 ## Outputs
 
 
@@ -109,6 +109,7 @@ describe('helpers.ts - inference request building', () => {
         undefined,
         undefined,
         100,
+        undefined,
         'https://api.test.com',
         'test-token',
       )
@@ -122,6 +123,7 @@ describe('helpers.ts - inference request building', () => {
         temperature: undefined,
         topP: undefined,
         maxTokens: 100,
+        maxCompletionTokens: undefined,
         endpoint: 'https://api.test.com',
         token: 'test-token',
         responseFormat: {
@@ -143,6 +145,7 @@ describe('helpers.ts - inference request building', () => {
         undefined,
         undefined,
         100,
+        undefined,
         'https://api.test.com',
         'test-token',
       )
@@ -156,6 +159,7 @@ describe('helpers.ts - inference request building', () => {
         temperature: undefined,
         topP: undefined,
         maxTokens: 100,
+        maxCompletionTokens: undefined,
         endpoint: 'https://api.test.com',
         token: 'test-token',
         responseFormat: undefined,
 
@@ -31,7 +31,7 @@ describe('inference.ts', () => {
       {role: 'user' as const, content: 'Hello, AI!'},
     ],
     modelName: 'gpt-4',
-    maxTokens: 100,
+    maxCompletionTokens: 100,
     endpoint: 'https://api.test.com',
     token: 'test-token',
   }
@@ -633,4 +633,64 @@ describe('inference.ts', () => {
       expect(result).toBe('{"immediate": "result"}')
     })
   })
+
+  describe('token param routing', () => {
+    it('sends max_tokens when only maxTokens is set', async () => {
+      const requestWithMaxTokens = {
+        ...mockRequest,
+        maxCompletionTokens: undefined,
+        maxTokens: 100,
+      }
+
+      const mockResponse = {
+        choices: [
+          {
+            message: {
+              content: 'Direct max_tokens response',
+            },
+          },
+        ],
+      }
+
+      mockCreate.mockResolvedValueOnce(mockResponse)
+
+      const result = await simpleInference(requestWithMaxTokens)
+
+      expect(result).toBe('Direct max_tokens response')
+      expect(mockCreate).toHaveBeenCalledTimes(1)
+
+      // Should have sent max_tokens directly
+      expect(mockCreate.mock.calls[0][0]).toHaveProperty('max_tokens', 100)
+      expect(mockCreate.mock.calls[0][0]).not.toHaveProperty('max_completion_tokens')
+    })
+
+    it('sends neither token param when both are undefined', async () => {
+      const requestWithNoTokens = {
+        ...mockRequest,
+        maxCompletionTokens: undefined,
+        maxTokens: undefined,
+      }
+
+      const mockResponse = {
+        choices: [
+          {
+            message: {
+              content: 'No token limit response',
+            },
+          },
+        ],
+      }
+
+      mockCreate.mockResolvedValueOnce(mockResponse)
+
+      const result = await simpleInference(requestWithNoTokens)
+
+      expect(result).toBe('No token limit response')
+      expect(mockCreate).toHaveBeenCalledTimes(1)
+
+      const params = mockCreate.mock.calls[0][0]
+      expect(params).not.toHaveProperty('max_tokens')
+      expect(params).not.toHaveProperty('max_completion_tokens')
+    })
+  })
 })
@@ -168,6 +168,7 @@ describe('main.ts', () => {
       ],
       modelName: 'gpt-4',
       maxTokens: 100,
+      maxCompletionTokens: undefined,
       endpoint: 'https://api.test.com',
       token: 'fake-token',
       responseFormat: undefined,
@@ -259,6 +260,7 @@ describe('main.ts', () => {
       ],
       modelName: 'gpt-4',
       maxTokens: 100,
+      maxCompletionTokens: undefined,
       endpoint: 'https://api.test.com',
       token: 'fake-token',
       responseFormat: undefined,
 
@@ -43,9 +43,13 @@ inputs:
     required: false
     default: ''
   max-tokens:
-    description: The maximum number of tokens to generate
+    description: The maximum number of tokens to generate (deprecated)
     required: false
     default: '200'
+  max-completion-tokens:
+    description: The maximum number of tokens to generate
+    required: false
+    default: ''
   temperature:
     description: The sampling temperature to use (0-1)
     required: false