KWJ tools are standard MCP servers and plain CLI executables. If your agent supports MCP or can run shell commands, KWJ works — no SDK lock-in, no vendor dependency.
MCP-native setup
Add this JSON to your agent's MCP config file. Works for Claude Code
(~/.claude/mcp.json), Cursor
(.cursor/mcp.json), Windsurf, Continue.dev,
or any MCP client.
{
"mcpServers": {
"kwj": {
"command": "custom-mcp",
"args": ["serve"],
"env": {
"KWJ_API_KEY": "kwj_your_key_here"
}
}
}
}
Code examples
# Add to ~/.claude/mcp.json — tools auto-appear in every Claude Code session.
# No extra setup. Claude Code has built-in MCP support.
{
"mcpServers": {
"kwj": { "command": "custom-mcp", "args": ["serve"],
"env": { "KWJ_API_KEY": "kwj_your_key_here" } }
}
}
# Or use the one-liner:
claude mcp add kwj https://kwj.ai/mcp
from agents import Agent, MCPServerStdio
import os
# Mount KWJ as an MCP server — all 50 tools become available to the agent
kwj = MCPServerStdio(
command="custom-mcp",
args=["serve"],
env={"KWJ_API_KEY": os.environ["KWJ_API_KEY"]},
)
agent = Agent(
name="my-agent",
model="gpt-4o",
mcp_servers=[kwj],
)
# The agent can now call kwj_digest, kwj_cache_get, kwj_slice, etc.
import subprocess, openai, os
# Shrink context before sending to DeepSeek (or any OpenAI-compatible model)
slim = subprocess.check_output([
"custom-context", "slice", "src/main.rs", "handle_request"
]).decode()
# Check cache first — skip the LLM call entirely on a hit
cached = subprocess.run(
["custom-cache", "get", "--fuzzy", "analyze handle_request"],
capture_output=True, text=True
).stdout.strip()
if not cached:
client = openai.OpenAI(
base_url="https://api.deepseek.com",
api_key=os.environ["DEEPSEEK_API_KEY"],
)
resp = client.chat.completions.create(
model="deepseek-chat",
messages=[{"role": "user", "content": f"Analyze this code:\n{slim}"}],
)
result = resp.choices[0].message.content
subprocess.run(["custom-cache", "put", "analyze handle_request", result])
else:
result = cached
from langchain_mcp_adapters.client import MultiServerMCPClient
from langchain_openai import ChatOpenAI
from langgraph.prebuilt import create_react_agent
import os
async def run():
async with MultiServerMCPClient({
"kwj": {
"command": "custom-mcp",
"args": ["serve"],
"env": {"KWJ_API_KEY": os.environ["KWJ_API_KEY"]},
"transport": "stdio",
}
}) as client:
tools = client.get_tools()
# tools now includes kwj_digest, kwj_cache_get, kwj_slice, etc.
llm = ChatOpenAI(model="gpt-4o")
agent = create_react_agent(llm, tools)
result = await agent.ainvoke({"messages": [("user", "Analyze this codebase")]})
from langchain_mcp_adapters.client import MultiServerMCPClient
from langgraph.prebuilt import create_react_agent
import os
async def build_agent():
async with MultiServerMCPClient({
"kwj": {
"command": "custom-mcp",
"args": ["serve"],
"env": {"KWJ_API_KEY": os.environ["KWJ_API_KEY"]},
"transport": "stdio",
}
}) as mcp:
tools = mcp.get_tools()
# Works with any model — gpt-4o, claude-*, deepseek-chat, etc.
agent = create_react_agent("gpt-4o", tools)
return agent
from autogen_ext.tools.mcp import StdioMcpToolAdapter, StdioServerParams
from autogen_agentchat.agents import AssistantAgent
from autogen_ext.models.openai import OpenAIChatCompletionClient
import os
# Create KWJ tool adapter
kwj_params = StdioServerParams(
command="custom-mcp",
args=["serve"],
env={"KWJ_API_KEY": os.environ["KWJ_API_KEY"]},
)
kwj_tool = StdioMcpToolAdapter(server_params=kwj_params)
# Create agent with KWJ tools
agent = AssistantAgent(
name="assistant",
model_client=OpenAIChatCompletionClient(model="gpt-4o"),
tools=[kwj_tool],
)
# Cursor: add to .cursor/mcp.json in your project root (or global ~/.cursor/mcp.json)
# Windsurf: add to ~/.codeium/windsurf/mcp_config.json
# Continue.dev: add to .continue/config.json under "mcpServers"
{
"mcpServers": {
"kwj": {
"command": "custom-mcp",
"args": ["serve"],
"env": {
"KWJ_API_KEY": "kwj_your_key_here"
}
}
}
}
# Restart your editor. KWJ tools appear in the agent tool panel automatically.
import subprocess
# Cache expensive computation — skip LLM call on a hit
def cached_analyze(query: str, compute_fn) -> str:
result = subprocess.run(
["custom-cache", "get", "--fuzzy", query, "--threshold", "0.35"],
capture_output=True, text=True
).stdout.strip()
if not result:
result = compute_fn()
subprocess.run(["custom-cache", "put", query, result])
return result
# Compress build output before feeding to any LLM
def compress_output(big_output: str) -> str:
proc = subprocess.run(
["custom-digest", "line-cap", "--max-lines", "50"],
input=big_output.encode(), capture_output=True
)
return proc.stdout.decode()
# Slice only the function you need from a large file
def get_symbol(filepath: str, symbol: str) -> str:
return subprocess.check_output(
["custom-context", "slice", filepath, symbol]
).decode()
import { execSync, spawnSync } from "child_process";
// Shrink a large file to just the symbol you need
function sliceSymbol(filepath: string, symbol: string): string {
return execSync(`custom-context slice ${filepath} ${symbol}`).toString();
}
// Cache results across agent runs
function cachedResult(query: string, compute: () => string): string {
const hit = spawnSync("custom-cache", ["get", "--fuzzy", query], { encoding: "utf8" });
if (hit.stdout.trim()) return hit.stdout.trim();
const result = compute();
spawnSync("custom-cache", ["put", query, result]);
return result;
}
// Compress noisy output before sending to any LLM API
function compress(output: string): string {
const proc = spawnSync("custom-digest", ["line-cap", "--max-lines", "50"],
{ input: output, encoding: "utf8" });
return proc.stdout;
}
// Example: OpenAI SDK with KWJ context shrinking
import OpenAI from "openai";
const openai = new OpenAI();
const slim = sliceSymbol("src/index.ts", "handleRequest");
const response = await openai.chat.completions.create({
model: "gpt-4o",
messages: [{ role: "user", content: `Analyze: ${slim}` }],
});
#!/usr/bin/env bash
# Shrink context, cache result, compress output — works with any model
# 1. Slice only the function you need (2000 lines -> 40 lines)
SLIM=$(custom-context slice main.rs my_fn)
# 2. Check cache before calling any LLM
CACHED=$(custom-cache get --fuzzy "analyze my_fn" 2>/dev/null)
if [ -z "$CACHED" ]; then
# 3. Compress any big output before it enters the prompt
BIG_LOG=$(cargo build 2>&1)
LEAN=$(echo "$BIG_LOG" | custom-digest line-cap --max-lines 50)
# 4. Call your LLM of choice (swap in any CLI)
RESULT=$(echo "Code: $SLIM\nBuild: $LEAN" | your-llm-cli analyze)
# 5. Cache for next time
custom-cache put "analyze my_fn" "$RESULT"
echo "$RESULT"
else
echo "$CACHED"
fi
What you get
| Tool | What it does | Token savings | How to use |
|---|---|---|---|
| custom-context | Slice only the code symbol you need from large files | 98% (2000 lines → 40) | CLI or MCP |
| custom-digest | Compress noisy build/log output to key lines only | 94% (500 lines → 30) | CLI or MCP |
| custom-cache | Skip recomputing the same answer (fuzzy Jaccard match) | 100% on hit | CLI or MCP |
| custom-recall | TF-IDF search over memory files — pull only relevant facts | 95% (full file → 3-5 facts) | CLI or MCP |
| custom-bash | Cached shell command runner with TTL and auto-truncation | 98.7% (300 calls → 1) | CLI or MCP |
| web_read | Cached URL fetch — eliminates redundant web fetches | 100% on cache hit | MCP or HTTP API |
| doc_extract | PDF / XLSX / CSV extraction without full-load waste | 80-98% | MCP or HTTP API |
1-hour free trial. No credit card. $19/month after.