Published on June 25, 2026
Go homeAgentic design patterns
Below is an agentic loop pattern which makes use of generators to separate the calling code from the loop. This codes assumes you are using ollama for inference.
The generator provides two way communication between the loop and the calling code. This allows the developer to update state at various checkpoints as well execute tools and send the response back to the agent.
import dataclasses
import datetime
import enum
import typing as t
import zoneinfo
from ollama import Client as SyncOllama
class Tool(t.Protocol):
@staticmethod
def get_schema() -> dict[str, t.Any]: ...
def execute(self, **kwargs: t.Any) -> str: ...
class ToolError(Exception): ...
class DependencyUnavailable(ToolError): ...
class Timezone:
"""Timezone utility."""
def __init__(self, tzinfo: zoneinfo.ZoneInfo) -> None:
self.tzinfo = tzinfo
def now(self) -> datetime.datetime:
"""Return the current datetime in the provided timezone."""
return datetime.datetime.now(datetime.UTC).astimezone(self.tzinfo)
timezone = Timezone(zoneinfo.ZoneInfo("Africa/Johannesburg"))
class CheckpointKind(enum.StrEnum):
"""Models different kinds of checkpoints."""
START = enum.auto()
REASONING = enum.auto()
TOOL_CALLS = enum.auto()
RESPONSE = enum.auto()
@dataclasses.dataclass
class Checkpoint:
"""Models an agent checkpoint."""
kind: CheckpointKind
messages: list[dict[str, t.Any]]
content: str
loop_count: int
suspened_at: datetime.datetime
class TokenKind(enum.StrEnum):
"""Models different kinds of tokens."""
REASONING = enum.auto()
RESPONSE = enum.auto()
@dataclasses.dataclass
class Token:
"""Models the tokens an agent produces."""
kind: TokenKind
text: str
suspened_at: datetime.datetime
type AgentChannel = t.Generator[Checkpoint | Token | ToolCall, str | None]
@dataclasses.dataclass
class ToolCall:
"""Models the an agent tool call."""
name: str
arguments: dict[str, t.Any]
class Agent:
def __init__(
self,
ollama: SyncOllama,
messages: list[dict[str, t.Any]],
model: str,
think: bool,
tools: list[dict[str, t.Any]],
max_loop_count: int,
) -> None:
self.ollama = ollama
self.messages = messages
self.model = model
self.think = think
self.tools = tools
self.max_loop_count = max_loop_count
def open_channel(self) -> AgentChannel:
"""Start the agentic loop."""
messages = self.messages
ollama = self.ollama
tools = self.tools
think = self.think
max_loop_count = self.max_loop_count
model = self.model
loop_count = 0
yield Checkpoint(
kind=CheckpointKind.START,
messages=messages,
content="",
loop_count=loop_count,
suspened_at=timezone.now(),
)
while loop_count <= max_loop_count:
thinking_tokens = []
response_tokens = []
tool_calls = []
resp = ollama.chat(model=model, messages=messages, tools=tools, think=think, stream=True)
loop_count += 1
for chunk in resp:
if chunk.message.thinking:
thinking = chunk.message.thinking
thinking_tokens.append(thinking)
yield Token(kind=TokenKind.REASONING, text=thinking, suspened_at=timezone.now())
if chunk.message.content:
content = chunk.message.content
response_tokens.append(content)
yield Token(kind=TokenKind.RESPONSE, text=content, suspened_at=timezone.now())
if chunk.message.tool_calls:
tool_calls.extend(chunk.message.tool_calls)
if thinking_tokens:
content = "".join(thinking_tokens)
messages.append(
{
"role": "assistant",
"content": "",
"thinking": content,
"tool_calls": [tc.model_dump() for tc in tool_calls],
}
)
yield Checkpoint(
kind=CheckpointKind.REASONING,
messages=messages,
content=content,
loop_count=loop_count,
suspened_at=timezone.now(),
)
if not tool_calls:
break
for tool_call in tool_calls:
function = tool_call.function
tool_name = function.name
arguments = function.arguments
# Ask the caller to execute the tool and return the content
content = yield ToolCall(name=tool_name, arguments=arguments)
messages.append(
{
"role": "tool",
"content": content,
"tool_name": tool_name,
"tool_arguments": arguments,
}
)
yield Checkpoint(
kind=CheckpointKind.TOOL_CALLS,
messages=messages,
content="",
loop_count=loop_count,
suspened_at=timezone.now(),
)
content = "".join(response_tokens)
if loop_count > max_loop_count:
content += f"Maximum agentic loop count {max_loop_count} reach before inquiry could be answered."
messages.append(
{
"role": "assistant",
"content": content,
"thinking": None,
"tool_calls": [],
}
)
# We need to let the user know we reached the maximum and could not answer
yield Token(kind=TokenKind.RESPONSE, text=content, suspened_at=timezone.now())
if content:
messages.append(
{
"role": "assistant",
"content": content,
"thinking": "",
"tool_calls": [],
}
)
yield Checkpoint(
kind=CheckpointKind.RESPONSE,
messages=messages,
content=content,
loop_count=loop_count,
suspened_at=timezone.now(),
)
Usage
class GetCurrentTime:
"""Agent tool which returns the current datetime in iso format."""
@staticmethod
def get_schema() -> dict[str, t.Any]:
return {
"type": "function",
"function": {
"name": "GetCurrentTime",
"description": "Return the current datetime in iso format.",
"parameters": {},
},
}
def execute(self) -> str:
"""Return the current datetime."""
current_datetime = timezone.now()
return current_datetime.isoformat()
ollama = SyncClient()
generation_model = "minimax-m3:cloud"
tools = [
GetCurrentTime.get_schema(),
]
system_prompt = "You are a super intelligent time teller"
prompt = "What is the current time?"
messages: list[dict[str, t.Any]] = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": prompt},
]
agent = Agent(
ollama=ollama,
messages=messages,
model=generation_model,
think=True,
tools=tools,
max_loop_count=10,
)
has_more = True
# Create the generator
channel = agent.open_channel()
# Each message yielded from the generator can be handled here
for message in channel:
match message:
case Checkpoint(kind=CheckpointKind.START) as checkpoint:
print("Clanker started")
case Checkpoint(kind=CheckpointKind.REASONING) as checkpoint:
print("Reasoning completed")
case Checkpoint(kind=CheckpointKind.RESPONSE) as checkpoint:
print("Clanker has responded")
case ToolCall() as tool_call:
content = None
if tool_call.name == "GetCurrentTime":
get_current_time = GetCurrentTime()
content = get_current_time.execute()
# Send the response back to the model
channel.send(content)
case Token(kind=TokenKind.REASONING) as token:
print(f"The clanker is thinking: {token.text}")
case Token(kind=TokenKind.RESPONSE) as token:
print(f"The clanker is responding: {token.text}")