All Block Entropy endpoints utilize Outlines for structured generation.
A common way to use Chat Completions is to instruct the model to always return a JSON object, obey Regex, or pick from a list of choices that makes sense for your use case. Without structured generation, there is no guarantee that your model will output your desired results. Here is how to use structured generation.
JSON Mode
from langchain.schema import HumanMessage, SystemMessage, AIMessage
from langchain_openai import ChatOpenAI
import json
from enum import Enum
from pydantic import BaseModel, constr
llm = ChatOpenAI(temperature=1.0,
openai_api_base="https://api.blockentropy.ai/v1",
openai_api_key="be_...",
streaming=True,
max_tokens=1024)
class Weapon(str, Enum):
sword = "sword"
axe = "axe"
mace = "mace"
spear = "spear"
bow = "bow"
crossbow = "crossbow"
class Armor(str, Enum):
leather = "leather"
chainmail = "chainmail"
plate = "plate"
class Character(BaseModel):
name: constr(max_length=10)
age: int
armor: Armor
weapon: Weapon
strength: int
messages = [
SystemMessage(
content="You are a helpful assistant."
),
HumanMessage(
content=f"Give me an interesting character description based on the following schema: {json.dumps(Character.schema())}"
)
]
for chunk in llm.stream(messages, extra_body={"outlines_type": "json", "json": json.dumps(Character.schema())}):
print(chunk.content, end="", flush=True)
Choices
from langchain.schema import HumanMessage, SystemMessage, AIMessage
from langchain_openai import ChatOpenAI
import json
from enum import Enum
from pydantic import BaseModel, constr
llm = ChatOpenAI(temperature=1.0,
openai_api_base="https://api.blockentropy.ai/v1",
openai_api_key="be_...",
streaming=True,
max_tokens=1024)
messages = [
SystemMessage(
content="You are a helpful assistant."
),
HumanMessage(
content="Who is better bob or fred?"
)
]
for chunk in llm.stream(messages, extra_body={"outlines_type": "choices", "choices": ["bob", "fred"]}):
print(chunk.content, end="", flush=True)