Usage

Example (Single Contract)

Here’s a simple example of how to load and transform data for a single contract:

# Step 1: Find and initialize the optimal contract
from optrade.data.contracts import Contract
from rich.console import Console


ctx = Console()
ctx.log("Searching for an ATM call option available January 3, 2023 with approximately 30 days to expiration...")
contract = Contract.find_optimal(
    root="AAPL",
    right="C",              # Call option
    start_date="20230103",  # First trading day of 2023
    target_tte=30,          # Desired expiration: 30 days
    tte_tolerance=(20, 40), # Min 20, max 40 days expiration
    interval_min=15,         # Data requested at 15-min level
    moneyness="ATM",        # At-the-money option
    verbose=True,
)
ctx.log(f"Optimal contract found: {contract}")

# Step 2: Load market data (NBBO quotes and OHLCV)
ctx.log("Loading market data from ThetaData API...")
df = contract.load_data()
print(df.head())

# Step 3: Transform raw data into ML-ready features
from optrade.data.features import transform_features
from rich.table import Table
from rich import box

data = transform_features(
    df=df,
    core_feats=[
        "option_returns",     # Option price returns
        "stock_returns",      # Underlying stock returns
        "moneyness",          # Log(S/K)
        "option_lob_imbalance", # Order book imbalance
        "stock_quote_spread", # Bid-ask spread normalized
    ],
    tte_feats=["sqrt"],  # Time-to-expiration features
    datetime_feats=["minute_of_day"],  # Time features
    vol_feats=["rolling_volatility"], # Rolling volatility window and short-to-long volatility ratio
    rolling_volatility_range=[60], # 60min rolling volatility windows
    strike=contract.strike,
    exp=contract.exp,
    root=contract.root,
    right=contract.right,
)


table = Table(title="Transformed Features", box=box.SIMPLE)

# Add column headers
for col in data.columns:
    table.add_column(col, justify="center", style="cyan", no_wrap=True)

# Add top 10 rows only
for i, row in data.head(10).iterrows():
    table.add_row(*[str(item) for item in row.values])
ctx.log(table)


# Step 4: Create dataset for time series forecasting
from optrade.data.forecasting import ForecastingDataset
from torch.utils.data import DataLoader

ctx.log("Converting data to PyTorch dataset with lookback window size of 20 data points (300min) and forecast horizon of 5 data points (25min)")
ctx.log("Using all features as inputs, with target channel set to option returns")
torch_dataset = ForecastingDataset(
    data=data,
    seq_len=20,        # 300-minute lookback window
    pred_len=5,        # 25-minute forecast horizon
    target_channels=["option_returns"],  # Forecast option returns
)

torch_loader = DataLoader(torch_dataset, batch_size=32)

for batch in torch_loader:
    x, y = batch
    ctx.log("Grabbing a single example:")
    ctx.log(f"Input shape: {x.shape}, Target shape: {y.shape}")
    break

Example (Multiple Contracts)

When modeling multiple contracts, you can use the ContractDataset class to find a set of optimal contracts with similar parameters and then use the get_forecasting_dataset function to load and transform the data for all contracts:

# Step 1: Find a set of optimal contracts from total_start_date to total_end_date
from optrade.data.contracts import ContractDataset
from rich.console import Console
import random
ctx = Console()

ctx.log("Constructing a dataset of put options for Amazon from January 1, 2022 to June 1, 2022...")
contract_dataset = ContractDataset(
    root="AMZN",
    total_start_date="20220101",
    total_end_date="20220601",
    contract_stride=3,
    interval_min=15,
    right="P",
    target_tte=30,
    tte_tolerance=(15,45),
    moneyness="OTM",
    volatility_scaled=True,
    volatility_scalar=0.1,
    hist_vol=0.1117,
)
with ctx.status("Generating contracts..."):
    contract_dataset.generate()
ctx.log(f"Found a total of {len(contract_dataset)} contracts!")
n = random.randint(0, len(contract_dataset)-1)
ctx.log(f"Randomly chosen contract ({n}): {contract_dataset[n]}")


# Step 2: Load market data and transform features for all contracts then put into a concatenated torch dataset
from optrade.data.forecasting import get_forecasting_dataset
from torch.utils.data import DataLoader

concat_dataset, updated_contract_dataset = get_forecasting_dataset(
    contract_dataset=contract_dataset,
    core_feats=["option_returns"],
    tte_feats=["sqrt"],
    datetime_feats=["sin_minute_of_day"],
    tte_tolerance=(25, 35),
    seq_len=100,
    pred_len=10,
    verbose=True
)
torch_loader = DataLoader(concat_dataset)


# Total number of examples
n = sum([len(concat_dataset.datasets[i]) for i in range(len(concat_dataset.datasets))])
ctx.log(f"A total of {n} training examples (input/target pairs) were generated.")

Example (PyTorch Forecasting)

When running forecasting experiments, you can use the Experiment class from optrade.exp.forecasting which supports PyTorch deep learning (DL) models. Several state-of-the-art models are available in the optrade.models.pytorch, allowing you to easily experiment with different modern DL architectures:

# Step 1: Initialize the experiment with offline logging
from optrade.exp.forecasting import Experiment
exp = Experiment(logging="offline")

# Set device to GPU if available, otherwise CPU
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Define feature sets for the model
core_feats = ["option_returns", "option_volume", "stock_lob_imbalance"]  # Core features
tte_feats = ["sqrt"]  # Time-to-expiration features
datetime_feats = ["sin_minute_of_day"]  # Temporal features
input_channels = core_feats + tte_feats + datetime_feats  # Combined input features
target_channels = ["option_returns"]  # Target variable

# Step 2: Initialize data loaders with specified configuration
exp.init_loaders(
    root="TSLA",                       # Ticker symbol
    start_date="20210601",             # Full dataset start date
    end_date="20211231",               # Full dataset end date
    contract_stride=5,                 # Sample contracts every 5 days
    interval_min=5,                    # 5-minute intervals
    right="C",                         # Call options
    target_tte=30,                     # Target 30 days to expiration
    tte_tolerance=(15, 45),            # Accept options with 15-45 days to expiration
    moneyness="ATM",                   # At-the-money options
    train_split=0.5,                   # 50% of data for training
    val_split=0.25,                    # 25% of data for validation (remaining 25% for testing)
    seq_len=12,                        # Input sequence length (12 x 5min = 1 hour lookback)
    pred_len=4,                        # Prediction length (4 x 5min = 20 minute forecast)
    scaling=True,                      # Normalize all features
    core_feats=core_feats,
    tte_feats=tte_feats,
    datetime_feats=datetime_feats,
    target_channels=target_channels,
    # DataLoader settings
    num_workers=0,                     # Single-process (development safe)
    prefetch_factor=None,              # No prefetching batches
    persistent_workers=False,          # Kill workers between epochs
)

# Step 3: Define model architecture
from optrade.models.pytorch.patchtst import Model as PatchTST
model = PatchTST(
    num_enc_layers=2,                  # Number of Transformer encoder layers
    d_model=32,                        # Model dimension (embedding size)
    d_ff=64,                           # Feed-forward network dimension
    num_heads=2,                       # Number of self-attention heads
    seq_len=12,                        # Input sequence length (must match data config)
    pred_len=4,                        # Prediction length (must match data config)
    patch_dim=2,                       # Patch dimension
    stride=2,                          # Patch stride
    input_channels=input_channels,
    target_channels=target_channels,
).to(device)

# Define optimization method and objetive (loss) function
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)  # Adam optimizer
criterion = torch.nn.MSELoss()                             # Mean Squared Error loss

# Step 4: Train the model
model = exp.train_torch(
    model=model,
    device=device,
    optimizer=optimizer,
    criterion=criterion,
    num_epochs=5,                      # Number of training epochs
    early_stopping=True,               # Enable early stopping
    patience=20,                       # Number of epochs before early stopping
)

# Step 5: Evaluate model on test set
exp.test(
    model=model,
    criterion=criterion,
    metrics=["mse"],                  # Metrics to compute
    device=device,                     # Computing device (CPU/GPU)
)
exp.save_logs() # Save experiment logs to disk

Example (Universe Selection)

When modeling a universe of securities, you can use the Universe class to filter by parameters such as fundamentals (e.g., P/E ratio), volatility, and Fama-French factor exposures. Here’s an example:

from optrade.data.universe import Universe
from rich.console import Console

# Step 1: Initialize Universe
ctx = Console()
universe = Universe(
    dow_jones=True,                # Use Dow Jones as the starting universe
    start_date="20210101",
    end_date="20211001",

    # Filters
    pe_ratio="low",          # Low debt ratio (bottom third)
    market_cap="high",             # Large-cap (top third)
    investment_beta="aggressive",  # Aggressive investment strategy (Fama-French exposure)
    verbose=True,
    dev_mode=True
)

# Step 2: Fetch constituents from Wikipedia
universe.set_roots()

# Step 3: Get market data via yfinance & compute Fama-French exposures
universe.get_market_metrics()

# Step 4: Apply filters (low debt, high market cap, aggressive investment beta)
universe.filter()

# Step 5: Download options data for filtered universe
universe.download(
    contract_stride=3,          # Sample contracts every 3 days
    interval_min=1,             # Data requested at 1-min level
    right="C",                  # Calls options only
    target_tte=30,              # Desired expiration: 30 days
    tte_tolerance=(20, 40),     # Min 20, max 40 days expiration
    moneyness="ATM",            # At-the-money option
    train_split=0.5,            # 50% training
    val_split=0.3,              # 30% validation and (hence 20% test)
)












# # Step 6: Select a stock the universe and create PyTorch dataloders
# root = universe.roots[0]
# print(f"Loading data for root: {root}")

# loaders = universe.get_forecasting_loaders(
#     offline=True,               # Use cached data
#     root=root,                  # Stock symbol
#     tte_tolerance=(20, 40),     # DTE range
#     seq_len=30,                 # 30-min lookback
#     pred_len=5,                 # 5-min forecast
#     core_feats=["option_mid_price"],  # Feature
#     target_channels=["option_mid_price"],  # Target
#     dtype="float32",            # Precision
#     scaling=False,              # No normalization
# )

# # Display dataset sizes for each split
# print(f"Train loader: {len(loaders[0].dataset)} samples")
# print(f"Validation loader: {len(loaders[1].dataset)} samples")
# print(f"Test loader: {len(loaders[2].dataset)} samples")