Skip to content

Commit c8123f9

Browse files
committed
feat: expose methods, add grok, add deepseek
1 parent 8858790 commit c8123f9

13 files changed

+365
-21
lines changed

README.md

Lines changed: 87 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,28 @@
22

33
Generate AI test summaries using leading AI models from OpenAI and Anthropic Claude. Integrate with Developer tooling to provide AI summaries where you need them.
44

5-
## **⭐⭐ If you find this project useful, consider giving it a GitHub star ⭐⭐**
6-
7-
## You can help us grow
8-
9-
Support our mission to enhance test reporting with AI summaries by:
10-
11-
- **⭐ Starring this repository to show your support. ⭐**
12-
- **🙌 Following our [GitHub page here](https://github.com/ctrf-io) 🙌**
13-
14-
Thank you! Your support is invaluable to us! 💙
5+
<div align="center">
6+
<div style="padding: 1.5rem; border-radius: 8px; margin: 1rem 0; border: 1px solid #30363d;">
7+
<span style="font-size: 23px;">💚</span>
8+
<h3 style="margin: 1rem 0;">CTRF tooling is open source and free to use</h3>
9+
<p style="font-size: 16px;">You can support the project with a follow and a star</p>
10+
11+
<div style="margin-top: 1.5rem;">
12+
<a href="https://github.com/ctrf-io/ai-test-reporter">
13+
<img src="https://img.shields.io/github/stars/ctrf-io/ai-test-reporter?style=for-the-badge&color=2ea043" alt="GitHub stars">
14+
</a>
15+
<a href="https://github.com/ctrf-io">
16+
<img src="https://img.shields.io/github/followers/ctrf-io?style=for-the-badge&color=2ea043" alt="GitHub followers">
17+
</a>
18+
</div>
19+
</div>
20+
21+
<p style="font-size: 14px; margin: 1rem 0;">
22+
Maintained by <a href="https://github.com/ma11hewthomas">Matthew Thomas</a><br/>
23+
Contributions are very welcome! <br/>
24+
Explore more <a href="https://www.ctrf.io/integrations">integrations</a>
25+
</p>
26+
</div>
1527

1628
## Key Features
1729

@@ -140,6 +152,71 @@ A message is sent to Claude for each failed test.
140152

141153
`consolidate`: Consolidate and summarize multiple AI summaries into a higher-level overview (default: true)
142154

155+
## Grok
156+
157+
Run the following command:
158+
159+
```bash
160+
npx ai-ctrf grok <path-to-ctrf-report>
161+
```
162+
163+
164+
An AI summary for each failed test will be added to your test report.
165+
166+
The package interacts with the Grok API, you must set `GROK_API_KEY` and optionally `GROK_API_BASE_URL` environment variables.
167+
168+
You will be responsible for any charges incurred from using Grok. Make sure you are aware of the associated costs.
169+
170+
A message is sent to Grok for each failed test.
171+
172+
### Grok Options
173+
174+
`--model`: Grok model to use (default: grok-1).
175+
176+
`--systemPrompt`: Custom system prompt to guide the AI response.
177+
178+
`--maxTokens`: Maximum number of tokens for the response.
179+
180+
`--temperature`: Sampling temperature.
181+
182+
`--log`: Whether to log the AI responses to the console (default: true).
183+
184+
`--maxMessages`: Limit the number of failing tests to send for summarization in the LLM request. This helps avoid overwhelming the model when dealing with reports that have many failing tests. (default: 10)
185+
186+
`consolidate`: Consolidate and summarize multiple AI summaries into a higher-level overview (default: true)
187+
188+
## DeepSeek
189+
190+
Run the following command:
191+
192+
```bash
193+
npx ai-ctrf deepseek <path-to-ctrf-report>
194+
```
195+
196+
An AI summary for each failed test will be added to your test report.
197+
198+
The package interacts with the DeepSeek API, you must set `DEEPSEEK_API_KEY` and optionally `DEEPSEEK_API_BASE_URL` environment variables.
199+
200+
You will be responsible for any charges incurred from using DeepSeek. Make sure you are aware of the associated costs.
201+
202+
A message is sent to DeepSeek for each failed test.
203+
204+
### DeepSeek Options
205+
206+
`--model`: DeepSeek model to use (default: deepseek-coder).
207+
208+
`--systemPrompt`: Custom system prompt to guide the AI response.
209+
210+
`--maxTokens`: Maximum number of tokens for the response.
211+
212+
`--temperature`: Sampling temperature.
213+
214+
`--log`: Whether to log the AI responses to the console (default: true).
215+
216+
`--maxMessages`: Limit the number of failing tests to send for summarization in the LLM request. This helps avoid overwhelming the model when dealing with reports that have many failing tests. (default: 10)
217+
218+
`consolidate`: Consolidate and summarize multiple AI summaries into a higher-level overview (default: true)
219+
143220
## CTRF Report Example
144221

145222
``` json

package-lock.json

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
{
22
"name": "ai-ctrf",
3-
"version": "0.0.5",
3+
"version": "0.0.6",
44
"description": "AI Test Reporter - Create human-readable summaries of test results with LLMs like OpenAI GPT",
5-
"main": "index.js",
5+
"main": "dist/index.js",
6+
"types": "dist/index.d.ts",
67
"scripts": {
78
"test": "echo \"Error: no test specified\" && exit 1",
89
"lint": "eslint . --ext .ts --fix",
@@ -39,5 +40,12 @@
3940
"@anthropic-ai/sdk": "^0.27.2",
4041
"openai": "^4.57.0",
4142
"yargs": "^17.7.2"
43+
},
44+
"exports": {
45+
".": {
46+
"require": "./dist/index.js",
47+
"import": "./dist/index.js",
48+
"types": "./dist/index.d.ts"
49+
}
4250
}
4351
}

src/common.ts

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,3 +42,19 @@ export function stripAnsi(message: string) {
4242

4343
return message.replace(ansiRegex(), '');
4444
}
45+
46+
export function generateFailedTestPrompt(test: any, report: CtrfReport): string {
47+
return `Analyze this test failure:
48+
49+
Test Name: ${test.name}
50+
Test Tool: ${report.results.tool.name}
51+
${report.results.environment ? `Environment: ${report.results.environment}` : ""}
52+
53+
Failure Details:
54+
${JSON.stringify(test, null, 2)}
55+
56+
What I need:
57+
1. What specifically failed in this test
58+
2. The likely root cause based on the error messages and context
59+
3. The potential impact of this failure on the system`;
60+
}

src/consolidated-summary.ts

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
import { CtrfReport } from "../types/ctrf";
22
import { Arguments } from "./index";
33
import { openAI } from "./models/openai";
4-
import { saveUpdatedReport } from "./common";
54
import { claudeAI } from "./models/claude";
65
import { azureOpenAI } from "./models/azure-openai";
6+
import { grokAI } from "./models/grok";
7+
import { deepseekAI } from "./models/deepseek";
8+
// import { CONSOLIDATED_SUMMARY_SYSTEM_PROMPT } from "./constants";
79

810
export async function generateConsolidatedSummary(report: CtrfReport, file: string, model: string, args: Arguments) {
911
const failedTests = report.results.tests.filter(test => test.status === 'failed');
@@ -21,6 +23,25 @@ export async function generateConsolidatedSummary(report: CtrfReport, file: stri
2123
- Adding generic conclusions or advice such as "By following these steps..."
2224
- headings, bullet points, or special formatting.`
2325
const consolidatedPrompt = `The following tests failed in the suite:\n\n${aiSummaries.join("\n")}\n\nA total of ${failedTests.length} tests failed in this test suite. Please provide a high-level summary of what went wrong across the suite and suggest what might be the root causes or patterns.`;
26+
27+
// const systemPrompt = CONSOLIDATED_SUMMARY_SYSTEM_PROMPT;
28+
// const consolidatedPrompt = `Analyze these ${failedTests.length} test failures from our test suite:
29+
30+
// Test Environment: ${report.results.environment || 'Not specified'}
31+
// Test Tool: ${report.results.tool.name}
32+
// Total Tests Run: ${report.results.tests.length}
33+
// Failed Tests: ${failedTests.length}
34+
35+
// Failed Test Details:
36+
// ${aiSummaries.join("\n")}
37+
38+
// Key Questions to Address:
39+
// 1. What patterns or common themes exist across these failures?
40+
// 3. Is there evidence of a broader system change causing these failures?
41+
// 4. How should I prioritize the issues to address based on the failure patterns?
42+
43+
// Please provide a high-level analysis of the test suite failures, focusing on systemic issues and patterns.`;
44+
2445
let consolidatedSummary = ""
2546

2647
if (model === 'openai') {
@@ -29,6 +50,10 @@ export async function generateConsolidatedSummary(report: CtrfReport, file: stri
2950
consolidatedSummary = await claudeAI(systemPrompt, consolidatedPrompt, args) || ""
3051
} else if (model === 'azure') {
3152
consolidatedSummary = await azureOpenAI(systemPrompt, consolidatedPrompt, args) || ""
53+
} else if (model === 'grok') {
54+
consolidatedSummary = await grokAI(systemPrompt, consolidatedPrompt, args) || ""
55+
} else if (model === 'deepseek') {
56+
consolidatedSummary = await deepseekAI(systemPrompt, consolidatedPrompt, args) || ""
3257
}
3358

3459
if (consolidatedSummary) {

src/constants.ts

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
export const CONSOLIDATED_SUMMARY_SYSTEM_PROMPT = `You are tasked with analyzing multiple test failures across a test run. Your goal is to provide a concise, high-level synthesis that identifies common patterns, potential root causes, and system-wide issues. Focus on correlations between failures and broader system implications.
2+
3+
Avoid:
4+
- Including code snippets or technical implementation details
5+
- Generic testing advice or best practices
6+
- Bullet points, headings, or special formatting
7+
- Repeating individual test failure details`
8+
9+
export const FAILED_TEST_SUMMARY_SYSTEM_PROMPT = `You are tasked with analyzing a specific test failure from a CTRF report. Your goal is to generate a clear, actionable summary that helps developers understand and fix the issue quickly.
10+
11+
When analyzing the failure:
12+
- Start your response with "The test failed because"
13+
- Keep your explanation conversational and natural
14+
- Focus on the exact error message and stack trace provided without reinterpreting them
15+
- Identify the specific root cause based on the provided information
16+
- Suggest concrete steps for resolution that directly relate to the failure
17+
18+
Avoid:
19+
- Including code snippets or stack traces in your response
20+
- Adding generic conclusions or advice
21+
- Using bullet points, headings, or special formatting
22+
- Making assumptions beyond the provided information
23+
- Including implementation details or debugging steps`

src/index.ts

Lines changed: 54 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@ import { openAIFailedTestSummary } from './models/openai';
55
import { azureFailedTestSummary } from './models/azure-openai';
66
import { validateCtrfFile } from './common';
77
import { claudeFailedTestSummary } from './models/claude';
8-
import { generateConsolidatedSummary } from './consolidated-summary';
8+
import { grokFailedTestSummary } from './models/grok';
9+
import { deepseekFailedTestSummary } from './models/deepseek';
10+
import { FAILED_TEST_SUMMARY_SYSTEM_PROMPT } from './constants';
911

1012
export interface Arguments {
1113
_: Array<string | number>;
@@ -73,6 +75,36 @@ const argv: Arguments = yargs(hideBin(process.argv))
7375
});
7476
}
7577
)
78+
.command(
79+
'grok <file>',
80+
'Generate test summary from a CTRF report using Grok',
81+
(yargs) => {
82+
return yargs.positional('file', {
83+
describe: 'Path to the CTRF file',
84+
type: 'string',
85+
})
86+
.option('model', {
87+
describe: 'Grok model to use',
88+
type: 'string',
89+
default: 'grok-2-latest',
90+
});
91+
}
92+
)
93+
.command(
94+
'deepseek <file>',
95+
'Generate test summary from a CTRF report using DeepSeek',
96+
(yargs) => {
97+
return yargs.positional('file', {
98+
describe: 'Path to the CTRF file',
99+
type: 'string',
100+
})
101+
.option('model', {
102+
describe: 'DeepSeek model to use',
103+
type: 'string',
104+
default: 'deepseek-reasoner',
105+
});
106+
}
107+
)
76108
.option('systemPrompt', {
77109
describe: 'System prompt to guide the AI',
78110
type: 'string',
@@ -159,4 +191,24 @@ if (argv._.includes('openai') && argv.file) {
159191
} catch (error) {
160192
console.error('Failed to read file:', error);
161193
}
162-
}
194+
} else if (argv._.includes('grok') && argv.file) {
195+
try {
196+
const report = validateCtrfFile(argv.file);
197+
if (report !== null) {
198+
grokFailedTestSummary(report, file, argv);
199+
}
200+
} catch (error) {
201+
console.error('Failed to read file:', error);
202+
}
203+
} else if (argv._.includes('deepseek') && argv.file) {
204+
try {
205+
const report = validateCtrfFile(argv.file);
206+
if (report !== null) {
207+
deepseekFailedTestSummary(report, file, argv);
208+
}
209+
} catch (error) {
210+
console.error('Failed to read file:', error);
211+
}
212+
}
213+
214+
export { openAIFailedTestSummary, claudeFailedTestSummary, azureFailedTestSummary, grokFailedTestSummary, deepseekFailedTestSummary };

src/models/azure-openai.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ export async function azureOpenAI(systemPrompt: string, prompt: string, args: Ar
3737
}
3838
}
3939

40-
export async function azureFailedTestSummary(report: CtrfReport, file: string, args: Arguments) {
40+
export async function azureFailedTestSummary(report: CtrfReport, file: string, args: Arguments): Promise<CtrfReport> {
4141
const failedTests = report.results.tests.filter(test => test.status === 'failed');
4242

4343
let logged = false;
@@ -71,4 +71,5 @@ export async function azureFailedTestSummary(report: CtrfReport, file: string, a
7171
await generateConsolidatedSummary(report, file, "azure", args)
7272
}
7373
saveUpdatedReport(file, report);
74+
return report;
7475
}

src/models/claude.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ export async function claudeAI(systemPrompt: string, prompt: string, args: Argum
3333
}
3434
}
3535

36-
export async function claudeFailedTestSummary(report: CtrfReport, file: string, args: Arguments) {
36+
export async function claudeFailedTestSummary(report: CtrfReport, file: string, args: Arguments): Promise<CtrfReport> {
3737
const failedTests = report.results.tests.filter(test => test.status === 'failed');
3838

3939
let logged = false;
@@ -67,4 +67,5 @@ export async function claudeFailedTestSummary(report: CtrfReport, file: string,
6767
await generateConsolidatedSummary(report, file, "claude", args)
6868
}
6969
saveUpdatedReport(file, report);
70+
return report;
7071
}

0 commit comments

Comments
 (0)