Advanced Examples¶
Advanced usage patterns for data analysis and feature engineering.
Find a Recent Completed Game¶
from scrapernhl.scrapers.schedule import scrapeSchedule
from datetime import datetime
# Get recent games from any team
schedule = scrapeSchedule("MTL", "20252026")
completed = schedule[schedule['gameState'] == 'OFF']
if len(completed) > 0:
game_id = completed.iloc[0]['id']
game_info = completed.iloc[0]
print(f"Using game: {game_info['awayTeam.abbrev']} @ {game_info['homeTeam.abbrev']}")
print(f"Date: {game_info['gameDate']}")
print(f"Game ID: {game_id}")
else:
print("No completed games found. Using a known game ID...")
game_id = 2024020001
Working with Complete Game Data¶
from scrapernhl import scrape_game
# Get comprehensive game data
game_tuple = scrape_game(game_id=game_id, include_tuple=True)
pbp = game_tuple.data
print(f"Game: {game_tuple.awayTeam} @ {game_tuple.homeTeam}")
print(f"Total events: {len(pbp)}")
print("\nRosters:")
game_tuple.rosters
Expected Goals (xG) Features¶
from scrapernhl import engineer_xg_features, predict_xg_for_pbp
# Engineer xG features
pbp_with_features = engineer_xg_features(pbp)
# Predict xG
pbp_with_xg = predict_xg_for_pbp(pbp_with_features)
# Get shots and goals
shots = pbp_with_xg[pbp_with_xg['Event'].isin(['SHOT', 'GOAL', 'MISS'])].copy()
print(f"Total shot attempts: {len(shots)}")
print("\nShot attempts with xG:")
shots[['period', 'timeInPeriod', 'Event', 'eventTeam', 'player1Id', 'xG', 'distanceFromGoal']].head(10)
# Calculate team xG totals
home_team = game_tuple.homeTeam
away_team = game_tuple.awayTeam
home_shots = shots[shots['eventTeam'] == home_team]
away_shots = shots[shots['eventTeam'] == away_team]
home_xg = home_shots['xG'].sum()
away_xg = away_shots['xG'].sum()
home_goals = len(pbp[(pbp['Event'] == 'GOAL') & (pbp['eventTeam'] == home_team)])
away_goals = len(pbp[(pbp['Event'] == 'GOAL') & (pbp['eventTeam'] == away_team)])
print(f"\n{away_team} @ {home_team}")
print(f"Score: {away_goals} - {home_goals}")
print(f"xG: {away_xg:.2f} - {home_xg:.2f}")
print(f"xG differential: {home_xg - away_xg:.2f} (positive favors {home_team})")
Time on Ice (TOI) Analysis¶
from scrapernhl import toi_by_strength
# Calculate TOI by strength
toi_df = toi_by_strength(pbp)
print("TOI by strength:")
toi_df.head(10)
# Calculate individual player stats
from scrapernhl import combo_on_ice_stats_both_teams
combo_stats = combo_on_ice_stats_both_teams(
pbp,
n_team=1,
m_opp=0, # set 0 for "vs ANY"
min_TOI=0,
include_goalies=False,
rates=True,
player_df=game_tuple.rosters # DataFrame with ids/teams/positions
)
combo_stats[['player1Id', 'player1Name', 'player1Position', 'player1Number', 'team', 'opp', 'strength', 'seconds', 'minutes']]
Player Combinations Analysis¶
Defensive Pairs (2-player combinations)¶
# Get 2-player combinations (defensive pairs)
combo_stats_2 = combo_on_ice_stats_both_teams(
pbp,
n_team=2,
m_opp=0, # set 0 for "vs ANY"
min_TOI=60,
include_goalies=False,
rates=True,
player_df=game_tuple.rosters # DataFrame with ids/teams/positions
)
top_10_pairs_5v5 = (combo_stats_2
.query("team_combo_pos == '2D'") # Get defensive pairs
.query("strength == '5v5'") # Filter for 5v5
.nlargest(10, 'seconds')
)[['team_combo', 'team_combo_ids', 'team', 'opp', 'strength', 'seconds', 'minutes']]
print("Most common defensive pairs (5v5):")
top_10_pairs_5v5
Forward Lines (3-player combinations)¶
# Get 3-player combinations (forward lines)
combo_stats_3 = combo_on_ice_stats_both_teams(
pbp,
n_team=3,
m_opp=0, # set 0 for "vs ANY"
min_TOI=60,
include_goalies=False,
rates=True,
player_df=game_tuple.rosters # DataFrame with ids/teams/positions
)
top_10_lines_5v5 = (combo_stats_3
.query("team_combo_pos == '3F'") # Get offensive lines
.query("strength == '5v5'") # Filter for 5v5
.nlargest(10, 'seconds')
)[['team_combo', 'team_combo_ids', 'team', 'opp', 'strength', 'seconds', 'minutes']]
print("Most common offensive lines (5v5):")
top_10_lines_5v5
On-Ice Statistics by Player¶
from scrapernhl import scrape_game, on_ice_stats_by_player_strength
# Get game with xG
pbp, _ = scrape_game(2024020001)
# Calculate on-ice stats for each player
player_stats = on_ice_stats_by_player_strength(
pbp,
include_goalies=False,
rates=True # Convert to per-60 rates
)
# Show top players by Corsi For %
print("Best Corsi For % (min 5 min TOI):")
cf_leaders = player_stats[player_stats['TOI'] >= 5].nlargest(10, 'CF%')
print(cf_leaders[['player', 'team', 'strength', 'TOI', 'CF', 'CA', 'CF%']])
# Show players with best xG differential
print("\\nBest xG differential:")
xg_leaders = player_stats[player_stats['TOI'] >= 5]
xg_leaders['xG_diff'] = xg_leaders['xG'] - xg_leaders['xGA']
print(xg_leaders.nlargest(10, 'xG_diff')[['player', 'team', 'xG', 'xGA', 'xG_diff']])
Team-Level Aggregates¶
from scrapernhl import team_strength_aggregates
# Calculate team stats by strength
team_stats = team_strength_aggregates(
pbp_with_xg,
include_goalies=False,
rates=True,
min_TOI=1
)
print("Team statistics by strength:")
team_stats[['team', 'minutes', 'CF', 'CA', 'xG', 'xGA', 'GF', 'GA']].sort_values(by=['minutes'], ascending=False)
# 5v5 stats only
stats_5v5 = team_stats[team_stats['strength'] == '5v5'].copy()
print("\n5v5 Team Stats:")
stats_5v5[['team', 'minutes', 'CF', 'CA', 'xG', 'xGA', 'GF', 'GA']]
Multi-Game Season Analysis¶
import pandas as pd
# Scrape multiple games (just 3 for demonstration)
print("Scraping multiple games for season analysis...")
game_ids_to_scrape = completed.head(3)['id'].tolist()
all_team_stats = []
for gid in game_ids_to_scrape:
try:
print(f"Processing game {gid}...")
game_tuple = scrape_game(gid, include_tuple=True)
pbp = game_tuple.data
pbp = engineer_xg_features(pbp)
pbp = predict_xg_for_pbp(pbp)
stats = team_strength_aggregates(pbp)
stats['game_id'] = gid
all_team_stats.append(stats)
except Exception as e:
print(f"Error with game {gid}: {e}")
# Combine all games
if all_team_stats:
season_stats = pd.concat(all_team_stats, ignore_index=True)
# Aggregate by team
team_summary = season_stats.groupby('team').agg({
'minutes': 'sum',
'CF': 'sum',
'CA': 'sum',
'xG': 'sum',
'xGA': 'sum',
'GF': 'sum',
'GA': 'sum'
}).reset_index()
team_summary['CF%'] = 100 * team_summary['CF'] / (team_summary['CF'] + team_summary['CA'])
print("\\nSeason stats across sampled games:")
team_summary
- API Reference - Complete function documentation