Skip to content

Advanced Examples

Advanced usage patterns for data analysis and feature engineering.

Find a Recent Completed Game

from scrapernhl.scrapers.schedule import scrapeSchedule
from datetime import datetime

# Get recent games from any team
schedule = scrapeSchedule("MTL", "20252026")
completed = schedule[schedule['gameState'] == 'OFF']

if len(completed) > 0:
    game_id = completed.iloc[0]['id']
    game_info = completed.iloc[0]
    print(f"Using game: {game_info['awayTeam.abbrev']} @ {game_info['homeTeam.abbrev']}")
    print(f"Date: {game_info['gameDate']}")
    print(f"Game ID: {game_id}")
else:
    print("No completed games found. Using a known game ID...")
    game_id = 2024020001

Working with Complete Game Data

from scrapernhl import scrape_game

# Get comprehensive game data
game_tuple = scrape_game(game_id=game_id, include_tuple=True)

pbp = game_tuple.data

print(f"Game: {game_tuple.awayTeam} @ {game_tuple.homeTeam}")
print(f"Total events: {len(pbp)}")

print("\nRosters:")
game_tuple.rosters

Expected Goals (xG) Features

from scrapernhl import engineer_xg_features, predict_xg_for_pbp

# Engineer xG features
pbp_with_features = engineer_xg_features(pbp)

# Predict xG
pbp_with_xg = predict_xg_for_pbp(pbp_with_features)

# Get shots and goals
shots = pbp_with_xg[pbp_with_xg['Event'].isin(['SHOT', 'GOAL', 'MISS'])].copy()

print(f"Total shot attempts: {len(shots)}")

print("\nShot attempts with xG:")
shots[['period', 'timeInPeriod', 'Event', 'eventTeam', 'player1Id', 'xG', 'distanceFromGoal']].head(10)

# Calculate team xG totals
home_team = game_tuple.homeTeam
away_team = game_tuple.awayTeam

home_shots = shots[shots['eventTeam'] == home_team]
away_shots = shots[shots['eventTeam'] == away_team]

home_xg = home_shots['xG'].sum()
away_xg = away_shots['xG'].sum()

home_goals = len(pbp[(pbp['Event'] == 'GOAL') & (pbp['eventTeam'] == home_team)])
away_goals = len(pbp[(pbp['Event'] == 'GOAL') & (pbp['eventTeam'] == away_team)])

print(f"\n{away_team} @ {home_team}")
print(f"Score: {away_goals} - {home_goals}")
print(f"xG: {away_xg:.2f} - {home_xg:.2f}")
print(f"xG differential: {home_xg - away_xg:.2f} (positive favors {home_team})")

Time on Ice (TOI) Analysis

from scrapernhl import toi_by_strength

# Calculate TOI by strength
toi_df = toi_by_strength(pbp)

print("TOI by strength:")
toi_df.head(10)

# Calculate individual player stats
from scrapernhl import combo_on_ice_stats_both_teams

combo_stats = combo_on_ice_stats_both_teams(
    pbp,
    n_team=1,
    m_opp=0,          # set 0 for "vs ANY"
    min_TOI=0,
    include_goalies=False,
    rates=True,
    player_df=game_tuple.rosters  # DataFrame with ids/teams/positions
)

combo_stats[['player1Id', 'player1Name', 'player1Position', 'player1Number', 'team', 'opp', 'strength', 'seconds', 'minutes']]

Player Combinations Analysis

Defensive Pairs (2-player combinations)

# Get 2-player combinations (defensive pairs)
combo_stats_2 = combo_on_ice_stats_both_teams(
    pbp,
    n_team=2,
    m_opp=0,          # set 0 for "vs ANY"
    min_TOI=60,
    include_goalies=False,
    rates=True,
    player_df=game_tuple.rosters  # DataFrame with ids/teams/positions
)

top_10_pairs_5v5 = (combo_stats_2
                    .query("team_combo_pos == '2D'")  # Get defensive pairs
                    .query("strength == '5v5'")  # Filter for 5v5
                    .nlargest(10, 'seconds')
                    )[['team_combo', 'team_combo_ids', 'team', 'opp', 'strength', 'seconds', 'minutes']]

print("Most common defensive pairs (5v5):")
top_10_pairs_5v5

Forward Lines (3-player combinations)

# Get 3-player combinations (forward lines)
combo_stats_3 = combo_on_ice_stats_both_teams(
    pbp,
    n_team=3,
    m_opp=0,          # set 0 for "vs ANY"
    min_TOI=60,
    include_goalies=False,
    rates=True,
    player_df=game_tuple.rosters  # DataFrame with ids/teams/positions
)

top_10_lines_5v5 = (combo_stats_3
                    .query("team_combo_pos == '3F'")  # Get offensive lines
                    .query("strength == '5v5'")  # Filter for 5v5
                    .nlargest(10, 'seconds')
                    )[['team_combo', 'team_combo_ids', 'team', 'opp', 'strength', 'seconds', 'minutes']]

print("Most common offensive lines (5v5):")
top_10_lines_5v5

On-Ice Statistics by Player

from scrapernhl import scrape_game, on_ice_stats_by_player_strength

# Get game with xG
pbp, _ = scrape_game(2024020001)

# Calculate on-ice stats for each player
player_stats = on_ice_stats_by_player_strength(
    pbp,
    include_goalies=False,
    rates=True  # Convert to per-60 rates
)

# Show top players by Corsi For %
print("Best Corsi For % (min 5 min TOI):")
cf_leaders = player_stats[player_stats['TOI'] >= 5].nlargest(10, 'CF%')
print(cf_leaders[['player', 'team', 'strength', 'TOI', 'CF', 'CA', 'CF%']])

# Show players with best xG differential
print("\\nBest xG differential:")
xg_leaders = player_stats[player_stats['TOI'] >= 5]
xg_leaders['xG_diff'] = xg_leaders['xG'] - xg_leaders['xGA']
print(xg_leaders.nlargest(10, 'xG_diff')[['player', 'team', 'xG', 'xGA', 'xG_diff']])

Team-Level Aggregates

from scrapernhl import team_strength_aggregates

# Calculate team stats by strength
team_stats = team_strength_aggregates(
    pbp_with_xg,
    include_goalies=False,
    rates=True,
    min_TOI=1
)

print("Team statistics by strength:")
team_stats[['team', 'minutes', 'CF', 'CA', 'xG', 'xGA', 'GF', 'GA']].sort_values(by=['minutes'], ascending=False)

# 5v5 stats only
stats_5v5 = team_stats[team_stats['strength'] == '5v5'].copy()

print("\n5v5 Team Stats:")
stats_5v5[['team', 'minutes', 'CF', 'CA', 'xG', 'xGA', 'GF', 'GA']]

Multi-Game Season Analysis

import pandas as pd

# Scrape multiple games (just 3 for demonstration)
print("Scraping multiple games for season analysis...")

game_ids_to_scrape = completed.head(3)['id'].tolist()
all_team_stats = []

for gid in game_ids_to_scrape:
    try:
        print(f"Processing game {gid}...")
        game_tuple = scrape_game(gid, include_tuple=True)
        pbp = game_tuple.data
        pbp = engineer_xg_features(pbp)
        pbp = predict_xg_for_pbp(pbp)

        stats = team_strength_aggregates(pbp)
        stats['game_id'] = gid
        all_team_stats.append(stats)
    except Exception as e:
        print(f"Error with game {gid}: {e}")

# Combine all games
if all_team_stats:
    season_stats = pd.concat(all_team_stats, ignore_index=True)

    # Aggregate by team
    team_summary = season_stats.groupby('team').agg({
        'minutes': 'sum',
        'CF': 'sum',
        'CA': 'sum',
        'xG': 'sum',
        'xGA': 'sum',
        'GF': 'sum',
        'GA': 'sum'
    }).reset_index()

    team_summary['CF%'] = 100 * team_summary['CF'] / (team_summary['CF'] + team_summary['CA'])

    print("\\nSeason stats across sampled games:")
    team_summary