Usage

Example (Single Contract)

Here’s a simple example of how to load and transform data for a single contract:

 1# Step 1: Find and initialize the optimal contract
 2from optrade.data.contracts import Contract
 3from rich.console import Console
 4
 5
 6ctx = Console()
 7ctx.log("Searching for an ATM call option available January 3, 2023 with approximately 30 days to expiration...")
 8contract = Contract.find_optimal(
 9    root="AAPL",
10    right="C",              # Call option
11    start_date="20230103",  # First trading day of 2023
12    target_tte=30,          # Desired expiration: 30 days
13    tte_tolerance=(20, 40), # Min 20, max 40 days expiration
14    interval_min=15,         # Data requested at 15-min level
15    moneyness="ATM",        # At-the-money option
16    verbose=True,
17)
18ctx.log(f"Optimal contract found: {contract}")
19
20# Step 2: Load market data (NBBO quotes and OHLCV)
21ctx.log("Loading market data from ThetaData API...")
22df = contract.load_data()
23print(df.head())
24
25# Step 3: Transform raw data into ML-ready features
26from optrade.data.features import transform_features
27from rich.table import Table
28from rich import box
29
30data = transform_features(
31    df=df,
32    core_feats=[
33        "option_returns",     # Option price returns
34        "stock_returns",      # Underlying stock returns
35        "moneyness",          # Log(S/K)
36        "option_lob_imbalance", # Order book imbalance
37        "stock_quote_spread", # Bid-ask spread normalized
38    ],
39    tte_feats=["sqrt"],  # Time-to-expiration features
40    datetime_feats=["minute_of_day"],  # Time features
41    vol_feats=["rolling_volatility"], # Rolling volatility window and short-to-long volatility ratio
42    rolling_volatility_range=[60], # 60min rolling volatility windows
43    strike=contract.strike,
44    exp=contract.exp,
45    root=contract.root,
46    right=contract.right,
47)
48
49
50table = Table(title="Transformed Features", box=box.SIMPLE)
51
52# Add column headers
53for col in data.columns:
54    table.add_column(col, justify="center", style="cyan", no_wrap=True)
55
56# Add top 10 rows only
57for i, row in data.head(10).iterrows():
58    table.add_row(*[str(item) for item in row.values])
59ctx.log(table)
60
61
62# Step 4: Create dataset for time series forecasting
63from optrade.data.forecasting import ForecastingDataset
64from torch.utils.data import DataLoader
65
66ctx.log("Converting data to PyTorch dataset with lookback window size of 20 data points (300min) and forecast horizon of 5 data points (25min)")
67ctx.log("Using all features as inputs, with target channel set to option returns")
68torch_dataset = ForecastingDataset(
69    data=data,
70    seq_len=20,        # 300-minute lookback window
71    pred_len=5,        # 25-minute forecast horizon
72    target_channels=["option_returns"],  # Forecast option returns
73)
74
75torch_loader = DataLoader(torch_dataset, batch_size=32)
76
77for batch in torch_loader:
78    x, y = batch
79    ctx.log("Grabbing a single example:")
80    ctx.log(f"Input shape: {x.shape}, Target shape: {y.shape}")
81    break

Example (Multiple Contracts)

When modeling multiple contracts, you can use the ContractDataset class to find a set of optimal contracts with similar parameters and then use the get_forecasting_dataset function to load and transform the data for all contracts:

 1# Step 1: Find a set of optimal contracts from total_start_date to total_end_date
 2from optrade.data.contracts import ContractDataset
 3from rich.console import Console
 4import random
 5ctx = Console()
 6
 7ctx.log("Constructing a dataset of put options for Amazon from January 1, 2022 to June 1, 2022...")
 8contract_dataset = ContractDataset(
 9    root="AMZN",
10    total_start_date="20220101",
11    total_end_date="20220601",
12    contract_stride=3,
13    interval_min=15,
14    right="P",
15    target_tte=30,
16    tte_tolerance=(15,45),
17    moneyness="OTM",
18    volatility_scaled=True,
19    volatility_scalar=0.1,
20    hist_vol=0.1117,
21)
22with ctx.status("Generating contracts..."):
23    contract_dataset.generate()
24ctx.log(f"Found a total of {len(contract_dataset)} contracts!")
25n = random.randint(0, len(contract_dataset)-1)
26ctx.log(f"Randomly chosen contract ({n}): {contract_dataset[n]}")
27
28
29# Step 2: Load market data and transform features for all contracts then put into a concatenated torch dataset
30from optrade.data.forecasting import get_forecasting_dataset
31from torch.utils.data import DataLoader
32
33concat_dataset, updated_contract_dataset = get_forecasting_dataset(
34    contract_dataset=contract_dataset,
35    core_feats=["option_returns"],
36    tte_feats=["sqrt"],
37    datetime_feats=["sin_minute_of_day"],
38    tte_tolerance=(25, 35),
39    seq_len=100,
40    pred_len=10,
41    verbose=True
42)
43torch_loader = DataLoader(concat_dataset)
44
45
46# Total number of examples
47n = sum([len(concat_dataset.datasets[i]) for i in range(len(concat_dataset.datasets))])
48ctx.log(f"A total of {n} training examples (input/target pairs) were generated.")

Example (PyTorch Forecasting)

When running forecasting experiments, you can use the Experiment class from optrade.exp.forecasting which supports PyTorch deep learning (DL) models. Several state-of-the-art models are available in the optrade.models.pytorch, allowing you to easily experiment with different modern DL architectures:

 1# Step 1: Initialize the experiment with offline logging
 2from optrade.exp.forecasting import Experiment
 3exp = Experiment(logging="offline")
 4
 5# Set device to GPU if available, otherwise CPU
 6import torch
 7device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 8print(f"Using device: {device}")
 9
10# Define feature sets for the model
11core_feats = ["option_returns", "option_volume", "stock_lob_imbalance"]  # Core features
12tte_feats = ["sqrt"]  # Time-to-expiration features
13datetime_feats = ["sin_minute_of_day"]  # Temporal features
14input_channels = core_feats + tte_feats + datetime_feats  # Combined input features
15target_channels = ["option_returns"]  # Target variable
16
17# Step 2: Initialize data loaders with specified configuration
18exp.init_loaders(
19    root="TSLA",                       # Ticker symbol
20    start_date="20210601",             # Full dataset start date
21    end_date="20211231",               # Full dataset end date
22    contract_stride=5,                 # Sample contracts every 5 days
23    interval_min=5,                    # 5-minute intervals
24    right="C",                         # Call options
25    target_tte=30,                     # Target 30 days to expiration
26    tte_tolerance=(15, 45),            # Accept options with 15-45 days to expiration
27    moneyness="ATM",                   # At-the-money options
28    train_split=0.5,                   # 50% of data for training
29    val_split=0.25,                    # 25% of data for validation (remaining 25% for testing)
30    seq_len=12,                        # Input sequence length (12 x 5min = 1 hour lookback)
31    pred_len=4,                        # Prediction length (4 x 5min = 20 minute forecast)
32    scaling=True,                      # Normalize all features
33    core_feats=core_feats,
34    tte_feats=tte_feats,
35    datetime_feats=datetime_feats,
36    target_channels=target_channels,
37    # DataLoader settings
38    num_workers=0,                     # Single-process (development safe)
39    prefetch_factor=None,              # No prefetching batches
40    persistent_workers=False,          # Kill workers between epochs
41)
42
43# Step 3: Define model architecture
44from optrade.models.pytorch.patchtst import Model as PatchTST
45model = PatchTST(
46    num_enc_layers=2,                  # Number of Transformer encoder layers
47    d_model=32,                        # Model dimension (embedding size)
48    d_ff=64,                           # Feed-forward network dimension
49    num_heads=2,                       # Number of self-attention heads
50    seq_len=12,                        # Input sequence length (must match data config)
51    pred_len=4,                        # Prediction length (must match data config)
52    patch_dim=2,                       # Patch dimension
53    stride=2,                          # Patch stride
54    input_channels=input_channels,
55    target_channels=target_channels,
56).to(device)
57
58# Define optimization method and objetive (loss) function
59optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)  # Adam optimizer
60criterion = torch.nn.MSELoss()                             # Mean Squared Error loss
61
62# Step 4: Train the model
63model = exp.train_torch(
64    model=model,
65    device=device,
66    optimizer=optimizer,
67    criterion=criterion,
68    num_epochs=5,                      # Number of training epochs
69    early_stopping=True,               # Enable early stopping
70    patience=20,                       # Number of epochs before early stopping
71)
72
73# Step 5: Evaluate model on test set
74exp.test(
75    model=model,
76    criterion=criterion,
77    metrics=["mse"],                  # Metrics to compute
78    device=device,                     # Computing device (CPU/GPU)
79)
80exp.save_logs() # Save experiment logs to disk

Example (Universe Selection)

When modeling a universe of securities, you can use the Universe class to filter by parameters such as fundamentals (e.g., P/E ratio), volatility, and Fama-French factor exposures. Here’s an example:

 1from optrade.data.universe import Universe
 2from rich.console import Console
 3
 4# Step 1: Initialize Universe
 5ctx = Console()
 6universe = Universe(
 7    dow_jones=True,                # Use Dow Jones as the starting universe
 8    start_date="20210101",
 9    end_date="20211001",
10
11    # Filters
12    pe_ratio="low",          # Low debt ratio (bottom third)
13    market_cap="high",             # Large-cap (top third)
14    investment_beta="aggressive",  # Aggressive investment strategy (Fama-French exposure)
15    verbose=True,
16    dev_mode=True
17)
18
19# Step 2: Fetch constituents from Wikipedia
20universe.set_roots()
21
22# Step 3: Get market data via yfinance & compute Fama-French exposures
23universe.get_market_metrics()
24
25# Step 4: Apply filters (low debt, high market cap, aggressive investment beta)
26universe.filter()
27
28# Step 5: Download options data for filtered universe
29universe.download(
30    contract_stride=3,          # Sample contracts every 3 days
31    interval_min=1,             # Data requested at 1-min level
32    right="C",                  # Calls options only
33    target_tte=30,              # Desired expiration: 30 days
34    tte_tolerance=(20, 40),     # Min 20, max 40 days expiration
35    moneyness="ATM",            # At-the-money option
36    train_split=0.5,            # 50% training
37    val_split=0.3,              # 30% validation and (hence 20% test)
38)
39
40
41
42
43
44
45
46
47
48
49
50
51# # Step 6: Select a stock the universe and create PyTorch dataloders
52# root = universe.roots[0]
53# print(f"Loading data for root: {root}")
54
55# loaders = universe.get_forecasting_loaders(
56#     offline=True,               # Use cached data
57#     root=root,                  # Stock symbol
58#     tte_tolerance=(20, 40),     # DTE range
59#     seq_len=30,                 # 30-min lookback
60#     pred_len=5,                 # 5-min forecast
61#     core_feats=["option_mid_price"],  # Feature
62#     target_channels=["option_mid_price"],  # Target
63#     dtype="float32",            # Precision
64#     scaling=False,              # No normalization
65# )
66
67# # Display dataset sizes for each split
68# print(f"Train loader: {len(loaders[0].dataset)} samples")
69# print(f"Validation loader: {len(loaders[1].dataset)} samples")
70# print(f"Test loader: {len(loaders[2].dataset)} samples")