Agents: The 6 Specialized Feature Generators

Explore all 6 built-in agents, the AgentRegistry, and how to build a custom agent.
Published

May 11, 2026

Introduction

Feature Forge ships with 6 specialized agents, each designed for a distinct type of feature transformation. This notebook explores every agent, the AgentRegistry discovery mechanism, and how to create a custom agent via subclassing.

Setup

Code
import os
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
from sklearn.datasets import make_classification

import feature_forge
print(f"feature_forge: {feature_forge.__spec__.origin}")
feature_forge: /Users/minghao/Desktop/personal/feature_forge/src/feature_forge/__init__.py

Discover Built-in Agents

Code
from feature_forge.agents import AgentRegistry

agents = AgentRegistry.get_builtin_agents()
print(f"Built-in agents ({len(agents)}):")
for name, cls in agents.items():
    print(f"  - {name}: {cls.__name__}")
Built-in agents (6):
  - unary: UnaryFeatureAgent
  - cross_compositional: CrossCompositionalAgent
  - aggregation: AggregationConstructAgent
  - temporal: TemporalFeatureAgent
  - local_transform: LocalTransformAgent
  - local_pattern: LocalPatternAgent

Agent Capabilities

Code
from feature_forge.agents.router import RouterAgent
from feature_forge.config import Settings

router = RouterAgent(Settings())
caps = router.AGENT_CAPABILITIES

import pandas as pd
cap_df = pd.DataFrame.from_dict(caps, orient="index")
cap_df
description required_column_types excluded_if min_columns requires_enrich
unary Generates features from single columns [numerical, categorical] [no_single_column_features] NaN NaN
cross_compositional Generates cross features between 2+ columns [numerical, categorical] [single_column_dataset] 2.0 NaN
aggregation Generates aggregation-based features [categorical, groupable] [no_categorical_for_grouping] NaN NaN
temporal Generates time-based features [datetime, temporal] [no_datetime_columns] NaN NaN
local_transform Generates local transformation features [numerical] [no_numerical_columns] NaN NaN
local_pattern Generates features based on distributional pat... [numerical, categorical] [] NaN True

Load Data

Code
X, y = make_classification(
    n_samples=200, n_features=6, n_informative=4, random_state=42
)
df = pd.DataFrame(X, columns=[f"f{i+1}" for i in range(X.shape[1])])
df["target"] = y
df.head()
f1 f2 f3 f4 f5 f6 target
0 -0.046871 2.138351 -1.285521 -1.072700 -0.903037 -1.492674 1
1 -1.024936 -0.038298 -2.124122 0.856841 -0.242814 -1.609433 0
2 -2.179571 -1.074040 -1.657299 -0.709876 -0.162627 -1.781360 0
3 1.025595 2.005875 -1.119966 1.060502 -0.868782 -0.191783 1
4 -0.180249 -1.306034 0.576556 0.460190 0.719035 0.403197 1

Instantiate and Run Each Agent

Code
from feature_forge.config import LLMConfig, Settings
from feature_forge.llm.base import LLMClient
from feature_forge.llm.providers.deepseek import DeepSeekProvider

settings = Settings(
    task="classification",
    llm=LLMConfig(
        model="deepseek-chat",
        api_key=os.environ.get("FF_LLM__API_KEY", ""),
        max_concurrent_calls=2,
    ),
)

llm = DeepSeekProvider(
    model=settings.llm.model,
    api_key=settings.llm.api_key.get_secret_value() if settings.llm.api_key else None,
)

context = {
    "description": {col: {"type": "numerical"} for col in df.columns if col != "target"},
    "memory": "",
    "round_idx": 0,
    "positive_features": [],
    "negative_features": [],
}

agent_results = {}
for name, cls in agents.items():
    try:
        agent = cls(config=settings, llm_client=llm)
        specs = await agent.generate(
            X=df.drop(columns=["target"]),
            y=df["target"],
            context=context,
        )
        agent_results[name] = {
            "specs": len(specs),
            "class": cls.__name__,
            "error": None,
        }
    except Exception as exc:
        agent_results[name] = {"specs": 0, "class": cls.__name__, "error": str(exc)[:100]}

pd.DataFrame.from_dict(agent_results, orient="index")
{"agent": "unary", "num_columns": 6, "round_idx": 0, "event": "agent_generate_start", "level": "info", "timestamp": "2026-05-11T13:05:30.865481Z", "span": null}
{"provider": "deepseek", "model": "deepseek-chat", "num_messages": 2, "temperature": 0.2, "max_tokens": 4096, "event": "llm_request", "level": "info", "timestamp": "2026-05-11T13:05:30.865935Z", "span": null}
{"provider": "deepseek", "model": "deepseek-chat", "error": "Error code: 401 - {'error': {'message': 'Authentication Fails, Your api key: ****4973 is invalid', 'type': 'authentication_error', 'param': None, 'code': 'invalid_request_error'}}", "event": "llm_error", "level": "error", "timestamp": "2026-05-11T13:05:31.141073Z", "span": null}
{"agent": "unary", "error": "OpenAI API error: Error code: 401 - {'error': {'message': 'Authentication Fails, Your api key: ****4973 is invalid', 'type': 'authentication_error', 'param': None, 'code': 'invalid_request_error'}}", "event": "agent_generate_error", "level": "error", "timestamp": "2026-05-11T13:05:31.141412Z", "span": null}
{"agent": "cross_compositional", "num_columns": 6, "round_idx": 0, "event": "agent_generate_start", "level": "info", "timestamp": "2026-05-11T13:05:31.142118Z", "span": null}
{"provider": "deepseek", "model": "deepseek-chat", "num_messages": 2, "temperature": 0.2, "max_tokens": 4096, "event": "llm_request", "level": "info", "timestamp": "2026-05-11T13:05:31.142331Z", "span": null}
{"provider": "deepseek", "model": "deepseek-chat", "error": "Error code: 401 - {'error': {'message': 'Authentication Fails, Your api key: ****4973 is invalid', 'type': 'authentication_error', 'param': None, 'code': 'invalid_request_error'}}", "event": "llm_error", "level": "error", "timestamp": "2026-05-11T13:05:31.258792Z", "span": null}
{"agent": "cross_compositional", "error": "OpenAI API error: Error code: 401 - {'error': {'message': 'Authentication Fails, Your api key: ****4973 is invalid', 'type': 'authentication_error', 'param': None, 'code': 'invalid_request_error'}}", "event": "agent_generate_error", "level": "error", "timestamp": "2026-05-11T13:05:31.259083Z", "span": null}
{"agent": "aggregation", "num_columns": 6, "round_idx": 0, "event": "agent_generate_start", "level": "info", "timestamp": "2026-05-11T13:05:31.260003Z", "span": null}
{"provider": "deepseek", "model": "deepseek-chat", "num_messages": 2, "temperature": 0.2, "max_tokens": 4096, "event": "llm_request", "level": "info", "timestamp": "2026-05-11T13:05:31.260160Z", "span": null}
{"provider": "deepseek", "model": "deepseek-chat", "error": "Error code: 401 - {'error': {'message': 'Authentication Fails, Your api key: ****4973 is invalid', 'type': 'authentication_error', 'param': None, 'code': 'invalid_request_error'}}", "event": "llm_error", "level": "error", "timestamp": "2026-05-11T13:05:31.541470Z", "span": null}
{"agent": "aggregation", "error": "OpenAI API error: Error code: 401 - {'error': {'message': 'Authentication Fails, Your api key: ****4973 is invalid', 'type': 'authentication_error', 'param': None, 'code': 'invalid_request_error'}}", "event": "agent_generate_error", "level": "error", "timestamp": "2026-05-11T13:05:31.541805Z", "span": null}
{"agent": "temporal", "num_columns": 6, "round_idx": 0, "event": "agent_generate_start", "level": "info", "timestamp": "2026-05-11T13:05:31.542602Z", "span": null}
{"provider": "deepseek", "model": "deepseek-chat", "num_messages": 2, "temperature": 0.2, "max_tokens": 4096, "event": "llm_request", "level": "info", "timestamp": "2026-05-11T13:05:31.542791Z", "span": null}
{"provider": "deepseek", "model": "deepseek-chat", "error": "Error code: 401 - {'error': {'message': 'Authentication Fails, Your api key: ****4973 is invalid', 'type': 'authentication_error', 'param': None, 'code': 'invalid_request_error'}}", "event": "llm_error", "level": "error", "timestamp": "2026-05-11T13:05:31.647705Z", "span": null}
{"agent": "temporal", "error": "OpenAI API error: Error code: 401 - {'error': {'message': 'Authentication Fails, Your api key: ****4973 is invalid', 'type': 'authentication_error', 'param': None, 'code': 'invalid_request_error'}}", "event": "agent_generate_error", "level": "error", "timestamp": "2026-05-11T13:05:31.648107Z", "span": null}
{"agent": "local_transform", "num_columns": 6, "round_idx": 0, "event": "agent_generate_start", "level": "info", "timestamp": "2026-05-11T13:05:31.649116Z", "span": null}
{"provider": "deepseek", "model": "deepseek-chat", "num_messages": 2, "temperature": 0.2, "max_tokens": 4096, "event": "llm_request", "level": "info", "timestamp": "2026-05-11T13:05:31.649438Z", "span": null}
{"provider": "deepseek", "model": "deepseek-chat", "error": "Error code: 401 - {'error': {'message': 'Authentication Fails, Your api key: ****4973 is invalid', 'type': 'authentication_error', 'param': None, 'code': 'invalid_request_error'}}", "event": "llm_error", "level": "error", "timestamp": "2026-05-11T13:05:31.858416Z", "span": null}
{"agent": "local_transform", "error": "OpenAI API error: Error code: 401 - {'error': {'message': 'Authentication Fails, Your api key: ****4973 is invalid', 'type': 'authentication_error', 'param': None, 'code': 'invalid_request_error'}}", "event": "agent_generate_error", "level": "error", "timestamp": "2026-05-11T13:05:31.859221Z", "span": null}
{"agent": "local_pattern", "num_columns": 6, "round_idx": 0, "event": "agent_generate_start", "level": "info", "timestamp": "2026-05-11T13:05:31.860830Z", "span": null}
{"provider": "deepseek", "model": "deepseek-chat", "num_messages": 2, "temperature": 0.2, "max_tokens": 4096, "event": "llm_request", "level": "info", "timestamp": "2026-05-11T13:05:31.861087Z", "span": null}
{"provider": "deepseek", "model": "deepseek-chat", "error": "Error code: 401 - {'error': {'message': 'Authentication Fails, Your api key: ****4973 is invalid', 'type': 'authentication_error', 'param': None, 'code': 'invalid_request_error'}}", "event": "llm_error", "level": "error", "timestamp": "2026-05-11T13:05:32.007430Z", "span": null}
{"agent": "local_pattern", "error": "OpenAI API error: Error code: 401 - {'error': {'message': 'Authentication Fails, Your api key: ****4973 is invalid', 'type': 'authentication_error', 'param': None, 'code': 'invalid_request_error'}}", "event": "agent_generate_error", "level": "error", "timestamp": "2026-05-11T13:05:32.007724Z", "span": null}
specs class error
unary 0 UnaryFeatureAgent unary LLM call failed: OpenAI API error: Error...
cross_compositional 0 CrossCompositionalAgent cross_compositional LLM call failed: OpenAI AP...
aggregation 0 AggregationConstructAgent aggregation LLM call failed: OpenAI API error:...
temporal 0 TemporalFeatureAgent temporal LLM call failed: OpenAI API error: Er...
local_transform 0 LocalTransformAgent local_transform LLM call failed: OpenAI API er...
local_pattern 0 LocalPatternAgent local_pattern LLM call failed: OpenAI API erro...

Inspect Generated Specifications

Code
# Pick one agent and show its raw specs
agent_name = "unary"
if agent_name in agents and not agent_results[agent_name].get("error"):
    agent = agents[agent_name](config=settings, llm_client=llm)
    specs = await agent.generate(
        X=df.drop(columns=["target"]), y=df["target"], context=context
    )
    print(f"{agent_name} generated {len(specs)} specs:")
    for s in specs[:3]:
        print(f"  - {s}")

Custom Agent

Code
from feature_forge.agents import BaseFeatureAgent

class PolynomialAgent(BaseFeatureAgent):
    """Custom agent that suggests polynomial combinations."""
    prompt_filename = "unary.txt"  # Reuse unary prompt as template
    agent_name = "polynomial"

# Custom agents are typically registered via pyproject.toml entry points:
# [project.entry-points."feature_forge.agents"]
# polynomial = "my_package:PolynomialAgent"
print(f"Custom agent class: {PolynomialAgent.agent_name}")
print(f"Built-in agents: {list(AgentRegistry.get_builtin_agents().keys())}")
Custom agent class: polynomial
Built-in agents: ['unary', 'cross_compositional', 'aggregation', 'temporal', 'local_transform', 'local_pattern']

Plot: Specs per Agent

Code
import matplotlib.pyplot as plt

plot_df = pd.DataFrame.from_dict(agent_results, orient="index")
if "specs" in plot_df.columns:
    plot_df = plot_df[plot_df["error"].isna()]
    if not plot_df.empty:
        fig, ax = plt.subplots(figsize=(8, 4))
        plot_df["specs"].plot(kind="bar", ax=ax, color="teal")
        ax.set_title("Feature Specifications Generated per Agent")
        ax.set_ylabel("Count")
        ax.tick_params(axis="x", rotation=45)
        plt.tight_layout()
        plt.show()
    else:
        print("No successful agent runs to plot.")
No successful agent runs to plot.

Summary

  • 6 built-in agents cover unary, cross-compositional, aggregation, temporal, local transform, and local pattern features
  • AgentRegistry discovers agents via entry points and manual registration
  • Each agent has declared capabilities used by the router for data-driven selection
  • Custom agents subclass BaseFeatureAgent and register via AgentRegistry.register()