Basic Scraping Examples¶
Examples showing how to scrape NHL data.
Setup¶
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.float_format', lambda x: '%.2f' % x)
1. Scraping NHL Teams¶
Retrieve information about all NHL teams including their names, IDs, and locations.
from scrapernhl.scrapers.teams import scrapeTeams
# Get all NHL teams
teams = scrapeTeams()
print(f"Found {len(teams)} teams")
# Display selected columns
teams[['name.default', 'abbrev','id', 'placeName.default', 'commonName.default']].head(10)
2. Scraping Team Schedule¶
Get the complete schedule for a specific team and season, including game dates, opponents, scores, and game states.
from scrapernhl.scrapers.schedule import scrapeSchedule
# Get Montreal Canadiens schedule for current season
schedule = scrapeSchedule("MTL", "20252026")
print(f"MTL has {len(schedule)} games this season")
# Show first 5 games with key information
schedule[['gameDate', 'gameType', 'homeTeam.abbrev', 'homeTeam.score',
'awayTeam.abbrev', 'awayTeam.score', 'gameOutcome.lastPeriodType', 'gameState']].head()
3. Current Standings¶
Fetch the league standings for a specific date, including wins, losses, points, and point percentage.
from scrapernhl.scrapers.standings import scrapeStandings
from datetime import datetime
# Get current standings
today = datetime.now().strftime("%Y-%m-%d")
standings = scrapeStandings(today)
print(f"Standings as of {today}:")
standings[['teamName.default', 'teamAbbrev.default', 'gamesPlayed', 'wins', 'losses', 'otLosses', 'points', 'pointPctg', 'date']].sort_values(by='pointPctg', ascending=False).head(10)
4. Team Roster¶
Get the complete roster for a team, including player names, positions, physical attributes, and biographical information.
from scrapernhl.scrapers.roster import scrapeRoster
# Get Montreal Canadiens roster
roster = scrapeRoster("MTL", "20252026")
# Separate by position
forwards = roster[roster['positionCode'].isin(['C', 'L', 'R'])] # Forwards: Centers, Left Wings, Right Wings
defensemen = roster[roster['positionCode'] == 'D']
goalies = roster[roster['positionCode'] == 'G']
print(f"Forwards: {len(forwards)}, Defense: {len(defensemen)}, Goalies: {len(goalies)}")
print("\nForwards:")
forwards[['id', 'firstName.default', 'lastName.default', 'positionCode', 'shootsCatches',
'sweaterNumber', 'heightInInches', 'weightInPounds', 'birthDate', 'birthCountry']].assign(team="MTL").head(10)
5. Player Statistics¶
Scrape player statistics for both skaters and goalies, including goals, assists, points, wins, and save percentage.
from scrapernhl.scrapers.stats import scrapeTeamStats
# Get skater stats
skaters = scrapeTeamStats("MTL", "20252026", session=2, goalies=False)
print("Top 10 scorers:")
skaters.nlargest(10, 'points')[['playerId', 'firstName.default', 'lastName.default', 'positionCode',
'gamesPlayed', 'goals', 'assists', 'points']].assign(pointsPerGame=lambda df: df['points'].div(df['gamesPlayed']))
# Get goalie stats
goalies = scrapeTeamStats("MTL", "20252026", session=2, goalies=True)
print("\nGoalie statistics:")
goalies[['playerId', 'firstName.default', 'lastName.default', 'gamesPlayed', 'wins', 'losses',
'overtimeLosses', 'goalsAgainstAverage', 'savePercentage']]
6. Play-by-Play Data¶
Retrieve detailed play-by-play data for a specific game, including all events like shots, goals, hits, and faceoffs.
from scrapernhl.scrapers.games import scrapePlays
# Get a recent game ID from schedule
completed_games = schedule[schedule['gameState'] == 'OFF']
if len(completed_games) > 0:
game_id = completed_games.iloc[0]['id']
print(f"Scraping game {game_id}...")
pbp = scrapePlays(game_id)
print(f"Game has {len(pbp)} events")
# Show event types
print("\nEvent counts:")
pbp['typeDescKey'].value_counts()
# Show first few events
print("\nFirst 10 events:")
pbp[['periodDescriptor.number', 'timeInPeriod', 'typeDescKey', 'details.eventOwnerTeamId', 'gameId']].head(10)
else:
print("No completed games found in schedule")
7. Draft Data¶
Access historical NHL draft data including player information, draft position, and team selections.
from scrapernhl.scrapers.draft import scrapeDraftData
# Get 2025 first round picks
draft_2025_r1 = scrapeDraftData("2025", 1)
print(f"2025 Draft - Round 1: {len(draft_2025_r1)} picks")
draft_2025_r1[['round', 'pickInRound', 'overallPick', 'teamAbbrev', 'firstName.default', 'lastName.default',
'positionCode', 'countryCode', 'height', 'weight', 'year']].head(10)
8. Using Polars (Alternative to Pandas)¶
Polars is a faster alternative to Pandas for large datasets. The scraper supports both output formats.
# Get data as Polars DataFrame (faster for large datasets)
teams_pl = scrapeTeams(output_format="polars")
print(f"Type: {type(teams_pl)}")
print(f"Shape: {teams_pl.shape}")
# Polars syntax
teams_pl.select(['name', 'abbrev','id', 'placeName', 'commonName']).head(5)
9. Backward Compatibility Test¶
The package maintains backward compatibility with older import styles for ease of migration.
# The old import style still works
from scrapernhl import scrapeTeams, scrapeSchedule
teams_old_style = scrapeTeams()
print(f"Old import style works: {len(teams_old_style)} teams scraped")
See Also¶
- Advanced Examples - Feature engineering, analytics
- Data Export - Saving data to files
- API Reference - Complete API documentation
from scrapernhl.scrapers.teams import scrapeTeams
# Get all NHL teams
teams = scrapeTeams()
print(f"Found {len(teams)} teams")
# Display selected columns
teams[['name.default', 'abbrev','id', 'placeName.default', 'commonName.default']].head(10)
Scraping Schedule¶
from scrapernhl.scrapers.schedule import scrapeSchedule
# Get Montreal Canadiens schedule for current season
schedule = scrapeSchedule("MTL", "20252026")
print(f"MTL has {len(schedule)} games this season")
# Show first 5 games with key information
schedule[['gameDate', 'gameType', 'homeTeam.abbrev', 'homeTeam.score',
'awayTeam.abbrev', 'awayTeam.score', 'gameOutcome.lastPeriodType', 'gameState']].head()
Scraping Standings¶
from scrapernhl.scrapers.standings import scrapeStandings
from datetime import datetime
# Get current standings
today = datetime.now().strftime("%Y-%m-%d")
standings = scrapeStandings(today)
print(f"Standings as of {today}:")
standings[['teamName.default', 'teamAbbrev.default', 'gamesPlayed', 'wins', 'losses', 'otLosses', 'points', 'pointPctg', 'date']].sort_values(by='pointPctg', ascending=False).head(10)
Getting Play-by-Play Data¶
from scrapernhl.scrapers.games import scrapePlays
# Get play-by-play for a specific game
game_id = 2024020001
pbp = scrapePlays(game_id)
print(f"Game {game_id} has {len(pbp)} events")
# Show event types
print("\nEvent counts:")
pbp['typeDescKey'].value_counts()
# Show first few events
print("\nFirst 10 events:")
pbp[['periodDescriptor.number', 'timeInPeriod', 'typeDescKey', 'details.eventOwnerTeamId', 'gameId']].head(10)
With Goal Replay Data¶
from scrapernhl.scrapers.games import scrapePlays
# Include goal replay data
pbp = scrapePlays(2024020001, addGoalReplayData=True)
# Filter for goals only
goals = pbp[pbp['eventType'] == 'goal']
print(f"Goals scored: {len(goals)}")
Scraping Multiple Games¶
from scrapernhl.scrapers.games import scrapePlays
import pandas as pd
# Scrape multiple games
game_ids = [2024020001, 2024020002, 2024020003]
all_plays = []
for game_id in game_ids:
print(f"Scraping game {game_id}...")
pbp = scrapePlays(game_id)
all_plays.append(pbp)
# Combine all games
combined_pbp = pd.concat(all_plays, ignore_index=True)
print(f"Total events across {len(game_ids)} games: {len(combined_pbp)}")
Getting Roster Information¶
from scrapernhl.scrapers.roster import scrapeRoster
# Get Montreal Canadiens roster
roster = scrapeRoster("MTL", "20252026")
# Separate by position
forwards = roster[roster['positionCode'].isin(['C', 'L', 'R'])] # Forwards: Centers, Left Wings, Right Wings
defensemen = roster[roster['positionCode'] == 'D']
goalies = roster[roster['positionCode'] == 'G']
print(f"Forwards: {len(forwards)}, Defense: {len(defensemen)}, Goalies: {len(goalies)}")
print("\nForwards:")
forwards[['id', 'firstName.default', 'lastName.default', 'positionCode', 'shootsCatches',
'sweaterNumber', 'heightInInches', 'weightInPounds', 'birthDate', 'birthCountry']].assign(team="MTL").head(10)
Getting Player Statistics¶
from scrapernhl.scrapers.stats import scrapeTeamStats
# Get skater stats
skaters = scrapeTeamStats("MTL", "20252026", session=2, goalies=False)
print("Top 10 scorers:")
skaters.nlargest(10, 'points')[['playerId', 'firstName.default', 'lastName.default', 'positionCode',
'gamesPlayed', 'goals', 'assists', 'points']].assign(pointsPerGame=lambda df: df['points'].div(df['gamesPlayed']))
# Get goalie stats
goalies = scrapeTeamStats("MTL", "20252026", session=2, goalies=True)
print("\nGoalie statistics:")
goalies[['playerId', 'firstName.default', 'lastName.default', 'gamesPlayed', 'wins', 'losses',
'overtimeLosses', 'goalsAgainstAverage', 'savePercentage']]
Getting Draft Data¶
from scrapernhl.scrapers.draft import scrapeDraftData
# Get 2025 first round picks
draft_2025_r1 = scrapeDraftData("2025", 1)
print(f"2025 Draft - Round 1: {len(draft_2025_r1)} picks")
draft_2025_r1[['round', 'pickInRound', 'overallPick', 'teamAbbrev', 'firstName.default', 'lastName.default',
'positionCode', 'countryCode', 'height', 'weight', 'year']].head(10)
Using Polars Instead of Pandas¶
# Get data as Polars DataFrame (faster for large datasets)
teams_pl = scrapeTeams(output_format="polars")
print(f"Type: {type(teams_pl)}")
print(f"Shape: {teams_pl.shape}")
# Polars syntax
teams_pl.select(['name', 'abbrev','id', 'placeName', 'commonName']).head(5)
Backward Compatible Style¶
If you have existing code, the old import style still works:
# The old import style still works
from scrapernhl import scrapeTeams, scrapeSchedule
teams_old_style = scrapeTeams()
print(f"Old import style works: {len(teams_old_style)} teams scraped")
Error Handling¶
from scrapernhl.scrapers.games import scrapePlays
try:
pbp = scrapePlays(9999999999) # Invalid game ID
except Exception as e:
print(f"Error scraping game: {e}")
Async Scraping (Advanced)¶
For scraping multiple games efficiently:
import asyncio
from scrapernhl import scrape_game_async
async def scrape_multiple_games(game_ids):
tasks = [scrape_game_async(game_id) for game_id in game_ids]
results = await asyncio.gather(*tasks)
return results
# Run async scraping
game_ids = [2024020001, 2024020002, 2024020003]
results = asyncio.run(scrape_multiple_games(game_ids))
print(f"Scraped {len(results)} games")
See Also¶
- Advanced Examples - Feature engineering, analytics
- Data Export - Saving data to files
- API Reference - Complete API documentation