Usage
Example (Single Contract)
Here’s a simple example of how to load and transform data for a single contract:
1# Step 1: Find and initialize the optimal contract
2from optrade.data.contracts import Contract
3from rich.console import Console
4
5
6ctx = Console()
7ctx.log("Searching for an ATM call option available January 3, 2023 with approximately 30 days to expiration...")
8contract = Contract.find_optimal(
9 root="AAPL",
10 right="C", # Call option
11 start_date="20230103", # First trading day of 2023
12 target_tte=30, # Desired expiration: 30 days
13 tte_tolerance=(20, 40), # Min 20, max 40 days expiration
14 interval_min=15, # Data requested at 15-min level
15 moneyness="ATM", # At-the-money option
16 verbose=True,
17)
18ctx.log(f"Optimal contract found: {contract}")
19
20# Step 2: Load market data (NBBO quotes and OHLCV)
21ctx.log("Loading market data from ThetaData API...")
22df = contract.load_data()
23print(df.head())
24
25# Step 3: Transform raw data into ML-ready features
26from optrade.data.features import transform_features
27from rich.table import Table
28from rich import box
29
30data = transform_features(
31 df=df,
32 core_feats=[
33 "option_returns", # Option price returns
34 "stock_returns", # Underlying stock returns
35 "moneyness", # Log(S/K)
36 "option_lob_imbalance", # Order book imbalance
37 "stock_quote_spread", # Bid-ask spread normalized
38 ],
39 tte_feats=["sqrt"], # Time-to-expiration features
40 datetime_feats=["minute_of_day"], # Time features
41 vol_feats=["rolling_volatility"], # Rolling volatility window and short-to-long volatility ratio
42 rolling_volatility_range=[60], # 60min rolling volatility windows
43 strike=contract.strike,
44 exp=contract.exp,
45 root=contract.root,
46 right=contract.right,
47)
48
49
50table = Table(title="Transformed Features", box=box.SIMPLE)
51
52# Add column headers
53for col in data.columns:
54 table.add_column(col, justify="center", style="cyan", no_wrap=True)
55
56# Add top 10 rows only
57for i, row in data.head(10).iterrows():
58 table.add_row(*[str(item) for item in row.values])
59ctx.log(table)
60
61
62# Step 4: Create dataset for time series forecasting
63from optrade.data.forecasting import ForecastingDataset
64from torch.utils.data import DataLoader
65
66ctx.log("Converting data to PyTorch dataset with lookback window size of 20 data points (300min) and forecast horizon of 5 data points (25min)")
67ctx.log("Using all features as inputs, with target channel set to option returns")
68torch_dataset = ForecastingDataset(
69 data=data,
70 seq_len=20, # 300-minute lookback window
71 pred_len=5, # 25-minute forecast horizon
72 target_channels=["option_returns"], # Forecast option returns
73)
74
75torch_loader = DataLoader(torch_dataset, batch_size=32)
76
77for batch in torch_loader:
78 x, y = batch
79 ctx.log("Grabbing a single example:")
80 ctx.log(f"Input shape: {x.shape}, Target shape: {y.shape}")
81 break
Example (Multiple Contracts)
When modeling multiple contracts, you can use the ContractDataset class to find a set of optimal contracts with similar parameters and then use the get_forecasting_dataset function to load and transform the data for all contracts:
1# Step 1: Find a set of optimal contracts from total_start_date to total_end_date
2from optrade.data.contracts import ContractDataset
3from rich.console import Console
4import random
5ctx = Console()
6
7ctx.log("Constructing a dataset of put options for Amazon from January 1, 2022 to June 1, 2022...")
8contract_dataset = ContractDataset(
9 root="AMZN",
10 total_start_date="20220101",
11 total_end_date="20220601",
12 contract_stride=3,
13 interval_min=15,
14 right="P",
15 target_tte=30,
16 tte_tolerance=(15,45),
17 moneyness="OTM",
18 volatility_scaled=True,
19 volatility_scalar=0.1,
20 hist_vol=0.1117,
21)
22with ctx.status("Generating contracts..."):
23 contract_dataset.generate()
24ctx.log(f"Found a total of {len(contract_dataset)} contracts!")
25n = random.randint(0, len(contract_dataset)-1)
26ctx.log(f"Randomly chosen contract ({n}): {contract_dataset[n]}")
27
28
29# Step 2: Load market data and transform features for all contracts then put into a concatenated torch dataset
30from optrade.data.forecasting import get_forecasting_dataset
31from torch.utils.data import DataLoader
32
33concat_dataset, updated_contract_dataset = get_forecasting_dataset(
34 contract_dataset=contract_dataset,
35 core_feats=["option_returns"],
36 tte_feats=["sqrt"],
37 datetime_feats=["sin_minute_of_day"],
38 tte_tolerance=(25, 35),
39 seq_len=100,
40 pred_len=10,
41 verbose=True
42)
43torch_loader = DataLoader(concat_dataset)
44
45
46# Total number of examples
47n = sum([len(concat_dataset.datasets[i]) for i in range(len(concat_dataset.datasets))])
48ctx.log(f"A total of {n} training examples (input/target pairs) were generated.")
Example (PyTorch Forecasting)
When running forecasting experiments, you can use the Experiment class from optrade.exp.forecasting which supports PyTorch deep learning (DL) models. Several state-of-the-art models are available in the optrade.models.pytorch, allowing you to easily experiment with different modern DL architectures:
1# Step 1: Initialize the experiment with offline logging
2from optrade.exp.forecasting import Experiment
3exp = Experiment(logging="offline")
4
5# Set device to GPU if available, otherwise CPU
6import torch
7device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
8print(f"Using device: {device}")
9
10# Define feature sets for the model
11core_feats = ["option_returns", "option_volume", "stock_lob_imbalance"] # Core features
12tte_feats = ["sqrt"] # Time-to-expiration features
13datetime_feats = ["sin_minute_of_day"] # Temporal features
14input_channels = core_feats + tte_feats + datetime_feats # Combined input features
15target_channels = ["option_returns"] # Target variable
16
17# Step 2: Initialize data loaders with specified configuration
18exp.init_loaders(
19 root="TSLA", # Ticker symbol
20 start_date="20210601", # Full dataset start date
21 end_date="20211231", # Full dataset end date
22 contract_stride=5, # Sample contracts every 5 days
23 interval_min=5, # 5-minute intervals
24 right="C", # Call options
25 target_tte=30, # Target 30 days to expiration
26 tte_tolerance=(15, 45), # Accept options with 15-45 days to expiration
27 moneyness="ATM", # At-the-money options
28 train_split=0.5, # 50% of data for training
29 val_split=0.25, # 25% of data for validation (remaining 25% for testing)
30 seq_len=12, # Input sequence length (12 x 5min = 1 hour lookback)
31 pred_len=4, # Prediction length (4 x 5min = 20 minute forecast)
32 scaling=True, # Normalize all features
33 core_feats=core_feats,
34 tte_feats=tte_feats,
35 datetime_feats=datetime_feats,
36 target_channels=target_channels,
37 # DataLoader settings
38 num_workers=0, # Single-process (development safe)
39 prefetch_factor=None, # No prefetching batches
40 persistent_workers=False, # Kill workers between epochs
41)
42
43# Step 3: Define model architecture
44from optrade.models.pytorch.patchtst import Model as PatchTST
45model = PatchTST(
46 num_enc_layers=2, # Number of Transformer encoder layers
47 d_model=32, # Model dimension (embedding size)
48 d_ff=64, # Feed-forward network dimension
49 num_heads=2, # Number of self-attention heads
50 seq_len=12, # Input sequence length (must match data config)
51 pred_len=4, # Prediction length (must match data config)
52 patch_dim=2, # Patch dimension
53 stride=2, # Patch stride
54 input_channels=input_channels,
55 target_channels=target_channels,
56).to(device)
57
58# Define optimization method and objetive (loss) function
59optimizer = torch.optim.Adam(model.parameters(), lr=1e-4) # Adam optimizer
60criterion = torch.nn.MSELoss() # Mean Squared Error loss
61
62# Step 4: Train the model
63model = exp.train_torch(
64 model=model,
65 device=device,
66 optimizer=optimizer,
67 criterion=criterion,
68 num_epochs=5, # Number of training epochs
69 early_stopping=True, # Enable early stopping
70 patience=20, # Number of epochs before early stopping
71)
72
73# Step 5: Evaluate model on test set
74exp.test(
75 model=model,
76 criterion=criterion,
77 metrics=["mse"], # Metrics to compute
78 device=device, # Computing device (CPU/GPU)
79)
80exp.save_logs() # Save experiment logs to disk
Example (Universe Selection)
When modeling a universe of securities, you can use the Universe class to filter by parameters such as fundamentals (e.g., P/E ratio), volatility, and Fama-French factor exposures. Here’s an example:
1from optrade.data.universe import Universe
2from rich.console import Console
3
4# Step 1: Initialize Universe
5ctx = Console()
6universe = Universe(
7 dow_jones=True, # Use Dow Jones as the starting universe
8 start_date="20210101",
9 end_date="20211001",
10
11 # Filters
12 pe_ratio="low", # Low debt ratio (bottom third)
13 market_cap="high", # Large-cap (top third)
14 investment_beta="aggressive", # Aggressive investment strategy (Fama-French exposure)
15 verbose=True,
16 dev_mode=True
17)
18
19# Step 2: Fetch constituents from Wikipedia
20universe.set_roots()
21
22# Step 3: Get market data via yfinance & compute Fama-French exposures
23universe.get_market_metrics()
24
25# Step 4: Apply filters (low debt, high market cap, aggressive investment beta)
26universe.filter()
27
28# Step 5: Download options data for filtered universe
29universe.download(
30 contract_stride=3, # Sample contracts every 3 days
31 interval_min=1, # Data requested at 1-min level
32 right="C", # Calls options only
33 target_tte=30, # Desired expiration: 30 days
34 tte_tolerance=(20, 40), # Min 20, max 40 days expiration
35 moneyness="ATM", # At-the-money option
36 train_split=0.5, # 50% training
37 val_split=0.3, # 30% validation and (hence 20% test)
38)
39
40
41
42
43
44
45
46
47
48
49
50
51# # Step 6: Select a stock the universe and create PyTorch dataloders
52# root = universe.roots[0]
53# print(f"Loading data for root: {root}")
54
55# loaders = universe.get_forecasting_loaders(
56# offline=True, # Use cached data
57# root=root, # Stock symbol
58# tte_tolerance=(20, 40), # DTE range
59# seq_len=30, # 30-min lookback
60# pred_len=5, # 5-min forecast
61# core_feats=["option_mid_price"], # Feature
62# target_channels=["option_mid_price"], # Target
63# dtype="float32", # Precision
64# scaling=False, # No normalization
65# )
66
67# # Display dataset sizes for each split
68# print(f"Train loader: {len(loaders[0].dataset)} samples")
69# print(f"Validation loader: {len(loaders[1].dataset)} samples")
70# print(f"Test loader: {len(loaders[2].dataset)} samples")