In [1]:
import pandas as pd
import numpy as np
import json
import os
import glob
from tqdm import tqdm
In [6]:
json_files = glob.glob("*.json")
print("Total files:", len(json_files))
Total files: 63
In [3]:
all_deliveries = []
for file in tqdm(json_files):
with open(file, "r", encoding="utf-8") as f:
data = json.load(f)
info = data.get("info", {})
match_id = os.path.basename(file).replace(".json","")
date = info.get("dates",[None])[0]
teams = info.get("teams", [])
venue = info.get("venue", "")
winner = info.get("outcome", {}).get("winner", None)
innings_data = data.get("innings", [])
for innings_num, innings in enumerate(innings_data, start=1):
team = innings.get("team", "")
overs = innings.get("overs", [])
for over_data in overs:
over_number = over_data.get("over")
deliveries = over_data.get("deliveries", [])
for ball_no, delivery in enumerate(deliveries, start=1):
batter = delivery.get("batter")
bowler = delivery.get("bowler")
non_striker = delivery.get("non_striker")
runs_batter = delivery.get("runs", {}).get("batter", 0)
extras = delivery.get("runs", {}).get("extras", 0)
total_runs = delivery.get("runs", {}).get("total", 0)
wicket = 0
dismissal_type = None
if "wickets" in delivery:
wicket = 1
dismissal_type = delivery["wickets"][0].get("kind")
all_deliveries.append({
"match_id": match_id,
"date": date,
"venue": venue,
"winner": winner,
"batting_team": team,
"innings": innings_num,
"over": over_number,
"ball": ball_no,
"batter": batter,
"bowler": bowler,
"non_striker": non_striker,
"runs_batter": runs_batter,
"extras": extras,
"total_runs": total_runs,
"wicket": wicket,
"dismissal_type": dismissal_type
})
0it [00:00, ?it/s]
In [15]:
with open(json_files[0], "r") as f:
sample = json.load(f)
print(sample.keys())
dict_keys(['meta', 'info', 'innings'])
In [4]:
deliveries_df = pd.DataFrame(all_deliveries)
print(deliveries_df.shape)
deliveries_df.head()
(0, 0)
Out[4]:
In [8]:
print(deliveries_df.shape)
print(deliveries_df.columns.tolist())
deliveries_df.head()
(0, 0) []
Out[8]:
In [9]:
print(len(all_deliveries))
0
In [10]:
import json
with open(json_files[0], "r") as f:
sample = json.load(f)
print(sample.keys())
dict_keys(['meta', 'info', 'innings'])
In [16]:
matches = []
for file in tqdm(json_files):
with open(file, "r") as f:
data = json.load(f)
info = data["info"]
match_id = os.path.basename(file).replace(".json","")
teams = info["teams"]
winner = info.get("outcome", {}).get("winner")
matches.append({
"match_id": match_id,
"season": info.get("season"),
"date": info["dates"][0],
"venue": info.get("venue"),
"city": info.get("city"),
"team1": teams[0],
"team2": teams[1],
"winner": winner,
"toss_winner": info["toss"]["winner"],
"toss_decision": info["toss"]["decision"]
})
100%|██████████| 63/63 [00:00<00:00, 1384.42it/s]
In [17]:
matches_df = pd.DataFrame(matches)
matches_df.head()
Out[17]:
| match_id | season | date | venue | city | team1 | team2 | winner | toss_winner | toss_decision | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1426263 | 2024 | 2024-04-11 | Wankhede Stadium, Mumbai | Mumbai | Royal Challengers Bengaluru | Mumbai Indians | Mumbai Indians | Mumbai Indians | field |
| 1 | 1535462 | 2026 | 2026-05-26 | Himachal Pradesh Cricket Association Stadium, ... | Dharamsala | Royal Challengers Bengaluru | Gujarat Titans | Royal Challengers Bengaluru | Gujarat Titans | field |
| 2 | 1422137 | 2024 | 2024-04-06 | Sawai Mansingh Stadium, Jaipur | Jaipur | Royal Challengers Bengaluru | Rajasthan Royals | Rajasthan Royals | Rajasthan Royals | field |
| 3 | 1469298 | 2024/25 | 2025-02-14 | Kotambi Stadium, Vadodara | Vadodara | Gujarat Giants | Royal Challengers Bengaluru | Royal Challengers Bengaluru | Royal Challengers Bengaluru | field |
| 4 | 1426274 | 2024 | 2024-04-21 | Eden Gardens, Kolkata | Kolkata | Kolkata Knight Riders | Royal Challengers Bengaluru | Kolkata Knight Riders | Royal Challengers Bengaluru | field |
In [18]:
print(matches_df.shape)
matches_df.info()
matches_df.head()
(63, 10) <class 'pandas.core.frame.DataFrame'> RangeIndex: 63 entries, 0 to 62 Data columns (total 10 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 match_id 63 non-null object 1 season 63 non-null object 2 date 63 non-null object 3 venue 63 non-null object 4 city 63 non-null object 5 team1 63 non-null object 6 team2 63 non-null object 7 winner 62 non-null object 8 toss_winner 63 non-null object 9 toss_decision 63 non-null object dtypes: object(10) memory usage: 5.1+ KB
Out[18]:
| match_id | season | date | venue | city | team1 | team2 | winner | toss_winner | toss_decision | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1426263 | 2024 | 2024-04-11 | Wankhede Stadium, Mumbai | Mumbai | Royal Challengers Bengaluru | Mumbai Indians | Mumbai Indians | Mumbai Indians | field |
| 1 | 1535462 | 2026 | 2026-05-26 | Himachal Pradesh Cricket Association Stadium, ... | Dharamsala | Royal Challengers Bengaluru | Gujarat Titans | Royal Challengers Bengaluru | Gujarat Titans | field |
| 2 | 1422137 | 2024 | 2024-04-06 | Sawai Mansingh Stadium, Jaipur | Jaipur | Royal Challengers Bengaluru | Rajasthan Royals | Rajasthan Royals | Rajasthan Royals | field |
| 3 | 1469298 | 2024/25 | 2025-02-14 | Kotambi Stadium, Vadodara | Vadodara | Gujarat Giants | Royal Challengers Bengaluru | Royal Challengers Bengaluru | Royal Challengers Bengaluru | field |
| 4 | 1426274 | 2024 | 2024-04-21 | Eden Gardens, Kolkata | Kolkata | Kolkata Knight Riders | Royal Challengers Bengaluru | Kolkata Knight Riders | Royal Challengers Bengaluru | field |
In [19]:
deliveries = []
for file in tqdm(json_files):
with open(file, "r") as f:
data = json.load(f)
info = data["info"]
match_id = os.path.basename(file).replace(".json","")
season = info["season"]
date = info["dates"][0]
innings_list = data["innings"]
for innings_no, innings in enumerate(innings_list, start=1):
batting_team = innings["team"]
for over_data in innings["overs"]:
over = over_data["over"]
for ball_no, delivery in enumerate(
over_data["deliveries"],
start=1
):
wicket = 0
dismissal_type = None
player_out = None
if "wickets" in delivery:
wicket = 1
dismissal_type = delivery["wickets"][0].get("kind")
player_out = delivery["wickets"][0].get(
"player_out"
)
deliveries.append({
"match_id": match_id,
"season": season,
"date": date,
"innings": innings_no,
"batting_team": batting_team,
"over": over,
"ball": ball_no,
"actual_delivery":
delivery.get("actual_delivery"),
"batter":
delivery.get("batter"),
"non_striker":
delivery.get("non_striker"),
"bowler":
delivery.get("bowler"),
"runs_batter":
delivery["runs"]["batter"],
"extras":
delivery["runs"]["extras"],
"total_runs":
delivery["runs"]["total"],
"wicket":
wicket,
"dismissal_type":
dismissal_type,
"player_out":
player_out
})
100%|██████████| 63/63 [00:00<00:00, 681.95it/s]
In [20]:
deliveries_df = pd.DataFrame(deliveries)
In [21]:
print(deliveries_df.shape)
print(deliveries_df.columns)
deliveries_df.head()
(14863, 17)
Index(['match_id', 'season', 'date', 'innings', 'batting_team', 'over', 'ball',
'actual_delivery', 'batter', 'non_striker', 'bowler', 'runs_batter',
'extras', 'total_runs', 'wicket', 'dismissal_type', 'player_out'],
dtype='object')
Out[21]:
| match_id | season | date | innings | batting_team | over | ball | actual_delivery | batter | non_striker | bowler | runs_batter | extras | total_runs | wicket | dismissal_type | player_out | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1426263 | 2024 | 2024-04-11 | 1 | Royal Challengers Bengaluru | 0 | 1 | 0.1 | V Kohli | F du Plessis | Mohammad Nabi | 0 | 0 | 0 | 0 | None | None |
| 1 | 1426263 | 2024 | 2024-04-11 | 1 | Royal Challengers Bengaluru | 0 | 2 | 0.2 | V Kohli | F du Plessis | Mohammad Nabi | 1 | 0 | 1 | 0 | None | None |
| 2 | 1426263 | 2024 | 2024-04-11 | 1 | Royal Challengers Bengaluru | 0 | 3 | 0.3 | F du Plessis | V Kohli | Mohammad Nabi | 0 | 0 | 0 | 0 | None | None |
| 3 | 1426263 | 2024 | 2024-04-11 | 1 | Royal Challengers Bengaluru | 0 | 4 | 0.4 | F du Plessis | V Kohli | Mohammad Nabi | 1 | 0 | 1 | 0 | None | None |
| 4 | 1426263 | 2024 | 2024-04-11 | 1 | Royal Challengers Bengaluru | 0 | 5 | 0.5 | V Kohli | F du Plessis | Mohammad Nabi | 1 | 0 | 1 | 0 | None | None |
In [22]:
rcb_names = [
"Royal Challengers Bengaluru",
"Royal Challengers Bangalore"
]
rcb_df = deliveries_df[
deliveries_df["batting_team"].isin(rcb_names)
]
print(rcb_df.shape)
(7343, 17)
In [23]:
kohli_df = rcb_df[
rcb_df["batter"] == "V Kohli"
]
kohli_df.head()
Out[23]:
| match_id | season | date | innings | batting_team | over | ball | actual_delivery | batter | non_striker | bowler | runs_batter | extras | total_runs | wicket | dismissal_type | player_out | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1426263 | 2024 | 2024-04-11 | 1 | Royal Challengers Bengaluru | 0 | 1 | 0.1 | V Kohli | F du Plessis | Mohammad Nabi | 0 | 0 | 0 | 0 | None | None |
| 1 | 1426263 | 2024 | 2024-04-11 | 1 | Royal Challengers Bengaluru | 0 | 2 | 0.2 | V Kohli | F du Plessis | Mohammad Nabi | 1 | 0 | 1 | 0 | None | None |
| 4 | 1426263 | 2024 | 2024-04-11 | 1 | Royal Challengers Bengaluru | 0 | 5 | 0.5 | V Kohli | F du Plessis | Mohammad Nabi | 1 | 0 | 1 | 0 | None | None |
| 6 | 1426263 | 2024 | 2024-04-11 | 1 | Royal Challengers Bengaluru | 1 | 1 | 1.1 | V Kohli | F du Plessis | G Coetzee | 0 | 0 | 0 | 0 | None | None |
| 7 | 1426263 | 2024 | 2024-04-11 | 1 | Royal Challengers Bengaluru | 1 | 2 | 1.2 | V Kohli | F du Plessis | G Coetzee | 0 | 0 | 0 | 0 | None | None |
In [24]:
runs = kohli_df["runs_batter"].sum()
balls = len(kohli_df)
fours = (
kohli_df["runs_batter"] == 4
).sum()
sixes = (
kohli_df["runs_batter"] == 6
).sum()
strike_rate = round(
(runs / balls) * 100,
2
)
print("Runs =", runs)
print("Balls =", balls)
print("4s =", fours)
print("6s =", sixes)
print("SR =", strike_rate)
Runs = 2073 Balls = 1388 4s = 201 6s = 82 SR = 149.35
In [25]:
# RCB team names
rcb_names = [
"Royal Challengers Bengaluru",
"Royal Challengers Bangalore"
]
# Matches involving RCB
rcb_matches = matches_df[
(matches_df["team1"].isin(rcb_names)) |
(matches_df["team2"].isin(rcb_names))
]
# Matches played
matches_played = len(rcb_matches)
# Wins
wins = rcb_matches[
rcb_matches["winner"].isin(rcb_names)
].shape[0]
# Losses
losses = matches_played - wins
# Win Percentage
win_percentage = round((wins / matches_played) * 100, 2)
print(f"Matches Played : {matches_played}")
print(f"Wins : {wins}")
print(f"Losses : {losses}")
print(f"Win % : {win_percentage}")
Matches Played : 63 Wins : 39 Losses : 24 Win % : 61.9
In [26]:
plt.figure(figsize=(6,6))
plt.pie(
[wins, losses],
labels=["Wins", "Losses"],
autopct="%1.1f%%"
)
plt.title("RCB Match Results")
plt.show()
In [27]:
import matplotlib.pyplot as plt
plt.figure(figsize=(6,4))
plt.bar(
["Wins", "Losses"],
[wins, losses]
)
plt.title("RCB Wins vs Losses")
plt.ylabel("Matches")
plt.show()
In [28]:
rcb_batting = (
rcb_df.groupby("batter")
.agg(
Runs=("runs_batter", "sum"),
Balls=("runs_batter", "count")
)
.reset_index()
)
rcb_batting["Strike Rate"] = round(
(rcb_batting["Runs"] / rcb_batting["Balls"]) * 100,
2
)
rcb_batting = rcb_batting.sort_values(
"Runs",
ascending=False
)
rcb_batting.head(10)
Out[28]:
| batter | Runs | Balls | Strike Rate | |
|---|---|---|---|---|
| 54 | V Kohli | 2073 | 1388 | 149.35 |
| 40 | RM Patidar | 1208 | 738 | 163.69 |
| 7 | D Padikkal | 711 | 456 | 155.92 |
| 36 | PD Salt | 605 | 361 | 167.59 |
| 45 | S Mandhana | 576 | 403 | 142.93 |
| 53 | TH David | 492 | 280 | 175.71 |
| 11 | F du Plessis | 438 | 276 | 158.70 |
| 39 | RM Ghosh | 421 | 264 | 159.47 |
| 18 | JM Sharma | 377 | 251 | 150.20 |
| 10 | EA Perry | 372 | 257 | 144.75 |
In [29]:
print(rcb_batting[["batter", "Runs", "Balls", "Strike Rate"]].head(10))
batter Runs Balls Strike Rate 54 V Kohli 2073 1388 149.35 40 RM Patidar 1208 738 163.69 7 D Padikkal 711 456 155.92 36 PD Salt 605 361 167.59 45 S Mandhana 576 403 142.93 53 TH David 492 280 175.71 11 F du Plessis 438 276 158.70 39 RM Ghosh 421 264 159.47 18 JM Sharma 377 251 150.20 10 EA Perry 372 257 144.75
In [30]:
import matplotlib.pyplot as plt
top10 = rcb_batting.head(10)
plt.figure(figsize=(12,6))
plt.bar(top10["batter"], top10["Runs"])
plt.title("Top 10 Run Scorers for RCB")
plt.xlabel("Player")
plt.ylabel("Runs")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
In [31]:
top_scorer = rcb_batting.iloc[0]
print("Highest Run Scorer for RCB")
print("Player :", top_scorer["batter"])
print("Runs :", top_scorer["Runs"])
print("SR :", top_scorer["Strike Rate"])
Highest Run Scorer for RCB Player : V Kohli Runs : 2073 SR : 149.35
In [34]:
# RCB bowlers only
rcb_bowling = deliveries_df[
~deliveries_df["batting_team"].isin([
"Royal Challengers Bengaluru",
"Royal Challengers Bangalore"
])
]
# Wickets credited to bowlers
wickets_df = rcb_bowling[
(rcb_bowling["wicket"] == 1) &
(rcb_bowling["dismissal_type"] != "run out")
]
# Wicket count
wicket_table = (
wickets_df.groupby("bowler")
.size()
.reset_index(name="Wickets")
.sort_values("Wickets", ascending=False)
)
print(wicket_table.head(10))
bowler Wickets 4 B Kumar 46 14 JR Hazlewood 37 15 KH Pandya 31 40 Yash Dayal 28 31 Rasikh Salam 21 36 Suyash Sharma 17 26 N de Klerk 16 24 Mohammed Siraj 15 28 R Shepherd 13 21 LK Bell 12
In [35]:
top_bowler = wicket_table.iloc[0]
print("Highest Wicket Taker for RCB")
print("Bowler :", top_bowler["bowler"])
print("Wickets:", top_bowler["Wickets"])
Highest Wicket Taker for RCB Bowler : B Kumar Wickets: 46
In [36]:
import matplotlib.pyplot as plt
top10 = wicket_table.head(10)
plt.figure(figsize=(10,5))
plt.bar(top10["bowler"], top10["Wickets"])
plt.title("Top 10 Wicket Takers for RCB")
plt.xlabel("Bowler")
plt.ylabel("Wickets")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
In [37]:
from IPython.display import display
display(
wicket_table.head(10)
.style
.background_gradient(cmap="Reds")
)
| bowler | Wickets | |
|---|---|---|
| 4 | B Kumar | 46 |
| 14 | JR Hazlewood | 37 |
| 15 | KH Pandya | 31 |
| 40 | Yash Dayal | 28 |
| 31 | Rasikh Salam | 21 |
| 36 | Suyash Sharma | 17 |
| 26 | N de Klerk | 16 |
| 24 | Mohammed Siraj | 15 |
| 28 | R Shepherd | 13 |
| 21 | LK Bell | 12 |
In [38]:
top = wicket_table.iloc[0]
print("="*40)
print("RCB LEADING WICKET TAKER")
print("="*40)
print(f"Bowler : {top['bowler']}")
print(f"Wickets : {top['Wickets']}")
print("="*40)
======================================== RCB LEADING WICKET TAKER ======================================== Bowler : B Kumar Wickets : 46 ========================================
In [39]:
rcb_names = [
"Royal Challengers Bengaluru",
"Royal Challengers Bangalore"
]
# RCB matches
rcb_matches = matches_df[
(matches_df["team1"].isin(rcb_names)) |
(matches_df["team2"].isin(rcb_names))
]
venue_stats = []
for venue in rcb_matches["venue"].unique():
venue_matches = rcb_matches[
rcb_matches["venue"] == venue
]
matches = len(venue_matches)
wins = venue_matches[
venue_matches["winner"].isin(rcb_names)
].shape[0]
losses = matches - wins
win_pct = round((wins / matches) * 100, 2)
venue_stats.append([
venue,
matches,
wins,
losses,
win_pct
])
venue_df = pd.DataFrame(
venue_stats,
columns=[
"Venue",
"Matches",
"Wins",
"Losses",
"Win %"
]
)
venue_df = venue_df.sort_values(
"Win %",
ascending=False
)
venue_df.head(15)
Out[39]:
| Venue | Matches | Wins | Losses | Win % | |
|---|---|---|---|---|---|
| 1 | Himachal Pradesh Cricket Association Stadium, ... | 3 | 3 | 0 | 100.00 |
| 5 | Dr DY Patil Sports Academy, Navi Mumbai | 4 | 4 | 0 | 100.00 |
| 6 | Arun Jaitley Stadium, Delhi | 2 | 2 | 0 | 100.00 |
| 10 | Maharaja Yadavindra Singh International Cricke... | 2 | 2 | 0 | 100.00 |
| 12 | Shaheed Veer Narayan Singh International Stadi... | 2 | 2 | 0 | 100.00 |
| 15 | Brabourne Stadium, Mumbai | 1 | 1 | 0 | 100.00 |
| 3 | Kotambi Stadium, Vadodara | 7 | 5 | 2 | 71.43 |
| 0 | Wankhede Stadium, Mumbai | 3 | 2 | 1 | 66.67 |
| 11 | Narendra Modi Stadium, Ahmedabad | 5 | 3 | 2 | 60.00 |
| 8 | Rajiv Gandhi International Stadium, Uppal, Hyd... | 2 | 1 | 1 | 50.00 |
| 2 | Sawai Mansingh Stadium, Jaipur | 2 | 1 | 1 | 50.00 |
| 4 | Eden Gardens, Kolkata | 2 | 1 | 1 | 50.00 |
| 9 | MA Chidambaram Stadium, Chepauk, Chennai | 2 | 1 | 1 | 50.00 |
| 7 | M Chinnaswamy Stadium, Bengaluru | 21 | 10 | 11 | 47.62 |
| 13 | Bharat Ratna Shri Atal Bihari Vajpayee Ekana C... | 4 | 1 | 3 | 25.00 |
In [40]:
best_venue = venue_df.iloc[0]
print("Best Venue for RCB")
print("Venue :", best_venue["Venue"])
print("Win % :", best_venue["Win %"])
Best Venue for RCB Venue : Himachal Pradesh Cricket Association Stadium, Dharamsala Win % : 100.0
In [42]:
worst_venue = venue_df.iloc[-1]
print("Worst Venue for RCB")
print("Venue :", worst_venue["Venue"])
print("Win % :", worst_venue["Win %"])
Worst Venue for RCB Venue : Barsapara Cricket Stadium, Guwahati Win % : 0.0
In [43]:
import seaborn as sns
import matplotlib.pyplot as plt
heatmap_data = venue_df.set_index("Venue")[["Win %"]]
plt.figure(figsize=(8,10))
sns.heatmap(
heatmap_data,
annot=True,
cmap="YlGnBu"
)
plt.title("RCB Venue Performance Heatmap")
plt.show()
In [44]:
from IPython.display import display, HTML
matches_played = len(rcb_matches)
wins = rcb_matches[
rcb_matches["winner"].isin(rcb_names)
].shape[0]
losses = matches_played - wins
win_pct = round((wins / matches_played) * 100, 2)
top_run_scorer = rcb_batting.iloc[0]["batter"]
top_runs = rcb_batting.iloc[0]["Runs"]
top_wicket_taker = wicket_table.iloc[0]["bowler"]
top_wickets = wicket_table.iloc[0]["Wickets"]
display(HTML(f"""
<div style="display:flex;gap:20px;flex-wrap:wrap">
<div style="
background:#1f77b4;
padding:20px;
border-radius:15px;
width:220px;
color:white;
text-align:center;">
<h3>Matches</h3>
<h1>{matches_played}</h1>
</div>
<div style="
background:#2ca02c;
padding:20px;
border-radius:15px;
width:220px;
color:white;
text-align:center;">
<h3>Win %</h3>
<h1>{win_pct}%</h1>
</div>
<div style="
background:#ff7f0e;
padding:20px;
border-radius:15px;
width:220px;
color:white;
text-align:center;">
<h3>Top Scorer</h3>
<h4>{top_run_scorer}</h4>
<h2>{top_runs}</h2>
</div>
<div style="
background:#d62728;
padding:20px;
border-radius:15px;
width:220px;
color:white;
text-align:center;">
<h3>Top Wicket Taker</h3>
<h4>{top_wicket_taker}</h4>
<h2>{top_wickets}</h2>
</div>
</div>
"""))
Matches
63
Win %
61.9%
Top Scorer
V Kohli
2073
Top Wicket Taker
B Kumar
46
In [45]:
best_venue = venue_df.iloc[0]["Venue"]
display(HTML(f"""
<div style="display:flex;gap:20px;flex-wrap:wrap">
<div style="background:#4CAF50;padding:20px;border-radius:15px;width:220px;color:white;text-align:center;">
<h3>Wins</h3>
<h1>{wins}</h1>
</div>
<div style="background:#F44336;padding:20px;border-radius:15px;width:220px;color:white;text-align:center;">
<h3>Losses</h3>
<h1>{losses}</h1>
</div>
<div style="background:#2196F3;padding:20px;border-radius:15px;width:220px;color:white;text-align:center;">
<h3>Best Venue</h3>
<h4>{best_venue}</h4>
</div>
<div style="background:#9C27B0;padding:20px;border-radius:15px;width:220px;color:white;text-align:center;">
<h3>Top Batter</h3>
<h4>{top_run_scorer}</h4>
</div>
</div>
"""))
Wins
39
Losses
24
Best Venue
Himachal Pradesh Cricket Association Stadium, Dharamsala
Top Batter
V Kohli
In [48]:
# RCB Men's team names
rcb_names = [
"Royal Challengers Bengaluru",
"Royal Challengers Bangalore"
]
# Only RCB batting deliveries
rcb_batting = deliveries_df[
deliveries_df["batting_team"].isin(rcb_names)
].copy()
# Create pair irrespective of strike rotation
rcb_batting["pair"] = rcb_batting.apply(
lambda x: " & ".join(sorted([x["batter"], x["non_striker"]])),
axis=1
)
# Partnership runs
partnership_runs = (
rcb_batting.groupby("pair")
.agg(
Runs=("total_runs", "sum"),
Balls=("total_runs", "count")
)
.reset_index()
.sort_values("Runs", ascending=False)
)
partnership_runs.head(15)
Out[48]:
| pair | Runs | Balls | |
|---|---|---|---|
| 37 | D Padikkal & V Kohli | 1078 | 646 |
| 120 | PD Salt & V Kohli | 792 | 486 |
| 52 | F du Plessis & V Kohli | 707 | 438 |
| 131 | RM Patidar & V Kohli | 695 | 434 |
| 66 | GM Harris & S Mandhana | 392 | 241 |
| 56 | G Voll & S Mandhana | 381 | 246 |
| 69 | JG Bethell & V Kohli | 280 | 172 |
| 74 | JM Sharma & RM Patidar | 275 | 162 |
| 24 | C Green & V Kohli | 270 | 200 |
| 35 | D Padikkal & RM Patidar | 241 | 142 |
| 142 | V Kohli & WG Jacks | 227 | 123 |
| 141 | V Kohli & VR Iyer | 203 | 103 |
| 130 | RM Patidar & TH David | 202 | 115 |
| 88 | KH Pandya & RM Patidar | 199 | 134 |
| 133 | RM Patidar & WG Jacks | 190 | 105 |
In [49]:
top10 = partnership_runs.head(10)
print(top10)
pair Runs Balls 37 D Padikkal & V Kohli 1078 646 120 PD Salt & V Kohli 792 486 52 F du Plessis & V Kohli 707 438 131 RM Patidar & V Kohli 695 434 66 GM Harris & S Mandhana 392 241 56 G Voll & S Mandhana 381 246 69 JG Bethell & V Kohli 280 172 74 JM Sharma & RM Patidar 275 162 24 C Green & V Kohli 270 200 35 D Padikkal & RM Patidar 241 142
In [50]:
import matplotlib.pyplot as plt
top10 = partnership_runs.head(10)
plt.figure(figsize=(12,6))
plt.barh(
top10["pair"],
top10["Runs"]
)
plt.title("Top 10 RCB Men's Batting Partnerships")
plt.xlabel("Partnership Runs")
plt.ylabel("Batting Pair")
plt.gca().invert_yaxis()
plt.tight_layout()
plt.show()
In [51]:
best = partnership_runs.iloc[0]
print("Best RCB Men's Partnership")
print("Pair :", best["pair"])
print("Runs :", best["Runs"])
print("Balls:", best["Balls"])
Best RCB Men's Partnership Pair : D Padikkal & V Kohli Runs : 1078 Balls: 646
In [52]:
rcb_names = [
"Royal Challengers Bengaluru",
"Royal Challengers Bangalore"
]
# RCB matches
rcb_matches = matches_df[
(matches_df["team1"].isin(rcb_names)) |
(matches_df["team2"].isin(rcb_names))
].copy()
# Find opponent
rcb_matches["Opponent"] = rcb_matches.apply(
lambda x: x["team2"] if x["team1"] in rcb_names else x["team1"],
axis=1
)
opponent_stats = []
for opponent in rcb_matches["Opponent"].unique():
temp = rcb_matches[
rcb_matches["Opponent"] == opponent
]
matches = len(temp)
wins = temp[
temp["winner"].isin(rcb_names)
].shape[0]
losses = matches - wins
win_pct = round((wins / matches) * 100, 2)
opponent_stats.append([
opponent,
matches,
wins,
losses,
win_pct
])
opponent_df = pd.DataFrame(
opponent_stats,
columns=[
"Opponent",
"Matches",
"Wins",
"Losses",
"Win %"
]
)
opponent_df = opponent_df.sort_values(
"Win %",
ascending=False
)
opponent_df
Out[52]:
| Opponent | Matches | Wins | Losses | Win % | |
|---|---|---|---|---|---|
| 8 | Punjab Kings | 7 | 6 | 1 | 85.71 |
| 7 | Chennai Super Kings | 5 | 4 | 1 | 80.00 |
| 3 | Gujarat Giants | 4 | 3 | 1 | 75.00 |
| 1 | Gujarat Titans | 7 | 5 | 2 | 71.43 |
| 0 | Mumbai Indians | 8 | 5 | 3 | 62.50 |
| 5 | Delhi Capitals | 10 | 6 | 4 | 60.00 |
| 9 | Lucknow Super Giants | 4 | 2 | 2 | 50.00 |
| 10 | UP Warriorz | 4 | 2 | 2 | 50.00 |
| 4 | Kolkata Knight Riders | 4 | 2 | 2 | 50.00 |
| 2 | Rajasthan Royals | 5 | 2 | 3 | 40.00 |
| 6 | Sunrisers Hyderabad | 5 | 2 | 3 | 40.00 |
In [53]:
best = opponent_df.iloc[0]
print("Best Opponent for RCB")
print("Team :", best["Opponent"])
print("Win %:", best["Win %"])
Best Opponent for RCB Team : Punjab Kings Win %: 85.71
In [54]:
worst = opponent_df.iloc[-1]
print("Toughest Opponent for RCB")
print("Team :", worst["Opponent"])
print("Win %:", worst["Win %"])
Toughest Opponent for RCB Team : Sunrisers Hyderabad Win %: 40.0
In [55]:
worst = opponent_df.iloc[-1]
print("Toughest Opponent for RCB")
print("Team :", worst["Opponent"])
print("Win %:", worst["Win %"])
Toughest Opponent for RCB Team : Sunrisers Hyderabad Win %: 40.0
In [56]:
import matplotlib.pyplot as plt
plt.figure(figsize=(12,6))
plt.barh(
opponent_df["Opponent"],
opponent_df["Win %"]
)
plt.xlabel("Win Percentage")
plt.ylabel("Opponent")
plt.title("RCB Opponent-wise Win Percentage")
plt.gca().invert_yaxis()
plt.tight_layout()
plt.show()
In [57]:
display(opponent_df.head(5))
| Opponent | Matches | Wins | Losses | Win % | |
|---|---|---|---|---|---|
| 8 | Punjab Kings | 7 | 6 | 1 | 85.71 |
| 7 | Chennai Super Kings | 5 | 4 | 1 | 80.00 |
| 3 | Gujarat Giants | 4 | 3 | 1 | 75.00 |
| 1 | Gujarat Titans | 7 | 5 | 2 | 71.43 |
| 0 | Mumbai Indians | 8 | 5 | 3 | 62.50 |
In [58]:
# Kohli deliveries
kohli = deliveries_df[
deliveries_df["batter"] == "V Kohli"
]
# Balls faced by Kohli in each match
kohli_match_stats = (
kohli.groupby("match_id")
.agg(
Balls=("batter", "count"),
Runs=("runs_batter", "sum")
)
.reset_index()
)
# Matches where Kohli faced 30+ balls
kohli_30 = kohli_match_stats[
kohli_match_stats["Balls"] >= 30
]
# Merge with match results
kohli_30 = kohli_30.merge(
matches_df[["match_id","winner"]],
on="match_id"
)
rcb_names = [
"Royal Challengers Bengaluru",
"Royal Challengers Bangalore"
]
# Count wins
wins = kohli_30[
kohli_30["winner"].isin(rcb_names)
].shape[0]
matches = len(kohli_30)
win_pct = round(
(wins / matches) * 100,
2
)
print("Matches where Kohli faced 30+ balls:", matches)
print("RCB Wins:", wins)
print("RCB Win %:", win_pct)
Matches where Kohli faced 30+ balls: 24 RCB Wins: 22 RCB Win %: 91.67
In [59]:
kohli_match_stats = kohli_match_stats.merge(
matches_df[["match_id","winner"]],
on="match_id"
)
kohli_match_stats["Category"] = np.where(
kohli_match_stats["Balls"] >= 30,
"30+ Balls",
"<30 Balls"
)
summary = []
for cat in kohli_match_stats["Category"].unique():
temp = kohli_match_stats[
kohli_match_stats["Category"] == cat
]
wins = temp[
temp["winner"].isin(rcb_names)
].shape[0]
total = len(temp)
summary.append([
cat,
total,
wins,
round(wins/total*100,2)
])
comparison = pd.DataFrame(
summary,
columns=[
"Category",
"Matches",
"Wins",
"Win %"
]
)
comparison
Out[59]:
| Category | Matches | Wins | Win % | |
|---|---|---|---|---|
| 0 | <30 Balls | 22 | 7 | 31.82 |
| 1 | 30+ Balls | 24 | 22 | 91.67 |
In [60]:
import matplotlib.pyplot as plt
plt.figure(figsize=(6,4))
plt.bar(
comparison["Category"],
comparison["Win %"]
)
plt.ylabel("Win Percentage")
plt.title("RCB Win % vs Kohli Balls Faced")
plt.show()
In [61]:
# Tim David match-wise runs
tim_match_runs = (
deliveries_df[
deliveries_df["batter"] == "TH David"
]
.groupby("match_id")
.agg(
Runs=("runs_batter","sum")
)
.reset_index()
)
# Matches where Tim David scored 25+
tim_25 = tim_match_runs[
tim_match_runs["Runs"] > 25
]
# Merge with match results
tim_25 = tim_25.merge(
matches_df[["match_id","winner"]],
on="match_id"
)
rcb_names = [
"Royal Challengers Bengaluru",
"Royal Challengers Bangalore"
]
matches = len(tim_25)
wins = tim_25[
tim_25["winner"].isin(rcb_names)
].shape[0]
losses = matches - wins
win_pct = round(
(wins / matches) * 100,
2
)
print("Matches where Tim David scored >25 :", matches)
print("Wins :", wins)
print("Losses :", losses)
print("Win % :", win_pct)
Matches where Tim David scored >25 : 8 Wins : 3 Losses : 5 Win % : 37.5
In [62]:
tim_match_runs = tim_match_runs.merge(
matches_df[["match_id","winner"]],
on="match_id"
)
tim_match_runs["Category"] = np.where(
tim_match_runs["Runs"] > 25,
">25 Runs",
"≤25 Runs"
)
comparison = []
for cat in tim_match_runs["Category"].unique():
temp = tim_match_runs[
tim_match_runs["Category"] == cat
]
wins = temp[
temp["winner"].isin(rcb_names)
].shape[0]
total = len(temp)
comparison.append([
cat,
total,
wins,
round((wins/total)*100,2)
])
comparison_df = pd.DataFrame(
comparison,
columns=[
"Category",
"Matches",
"Wins",
"Win %"
]
)
comparison_df
Out[62]:
| Category | Matches | Wins | Win % | |
|---|---|---|---|---|
| 0 | ≤25 Runs | 16 | 12 | 75.0 |
| 1 | >25 Runs | 8 | 3 | 37.5 |
In [63]:
import matplotlib.pyplot as plt
plt.figure(figsize=(6,4))
plt.bar(
comparison_df["Category"],
comparison_df["Win %"]
)
plt.ylabel("Win Percentage")
plt.title("RCB Win % vs Tim David Runs")
plt.show()
In [64]:
# Bhuvneshwar Kumar deliveries
bhuvi = deliveries_df[
deliveries_df["bowler"] == "Bhuvneshwar Kumar"
]
# Runs conceded
runs_conceded = bhuvi["total_runs"].sum()
# Legal deliveries (exclude wides and no-balls if you have those columns)
balls_bowled = len(bhuvi)
overs = balls_bowled / 6
economy = round(runs_conceded / overs, 2)
print("Runs Conceded:", runs_conceded)
print("Balls Bowled:", balls_bowled)
print("Overs:", round(overs,1))
print("Economy:", economy)
Runs Conceded: 0 Balls Bowled: 0 Overs: 0.0 Economy: nan
/tmp/ipykernel_2675/1617914298.py:14: RuntimeWarning: invalid value encountered in divide economy = round(runs_conceded / overs, 2)
In [65]:
legal_balls = bhuvi[
~bhuvi["actual_delivery"].duplicated()
]
runs_conceded = bhuvi["total_runs"].sum()
overs = len(legal_balls) / 6
economy = round(runs_conceded / overs, 2)
print("Economy:", economy)
Economy: nan
/tmp/ipykernel_2675/441802929.py:9: RuntimeWarning: invalid value encountered in divide economy = round(runs_conceded / overs, 2)
In [67]:
# Search for Bhuvi in bowler names
sorted(
deliveries_df["bowler"]
.dropna()
.unique()
)
Out[67]:
['A Badoni', 'A Gardner', 'A Kamboj', 'A Reddy', 'A Sutherland', 'AB Kaur', 'AC Kerr', 'AD Russell', 'AM Ghazanfar', 'AR Patel', 'AS Joseph', 'AS Roy', 'Abhinandan Singh', 'Abhishek Sharma', 'Akash Deep', 'Akash Madhwal', 'Akash Singh', 'Arshad Khan', 'Arshdeep Singh', 'Auqib Nabi', 'Avesh Khan', 'Azmatullah Omarzai', 'B Kumar', 'Brijesh Sharma', 'C Bosch', 'C Green', 'CA Henry', 'CE Dean', 'CL Tryon', 'CV Varun', 'D Hemalatha', 'DA Payne', 'DB Sharma', 'DJS Dottin', 'DL Chahar', 'DS Rathi', 'E Bisht', 'E Malinga', 'EA Perry', 'Fazalhaq Farooqi', 'G Coetzee', 'G Voll', 'G Wareham', 'GF Linde', 'GJ Maxwell', 'GM Harris', 'H Graham', 'H Sharma', 'HH Pandya', 'HK Matthews', 'HV Patel', 'Happy Kumari', 'Harpreet Brar', 'Harsh Dubey', 'Harshit Rana', 'I Sharma', 'J Little', 'J Overton', 'JA Duffy', 'JC Archer', 'JD Unadkat', 'JJ Bumrah', 'JL Jonassen', 'JO Holder', 'JR Hazlewood', 'K Anjali Sarvani', 'K Goud', 'K Kartikeya', 'K Khejroliya', 'K Rabada', 'KA Jamieson', 'KH Pandya', 'KJ Garth', 'KK Ahmed', 'KS Ahuja', 'KS Gautam', 'KV Sharma', 'Kartik Tyagi', 'Kuldeep Yadav', 'L Ngidi', 'LCN Smith', 'LH Ferguson', 'LK Bell', 'LK Hamilton', 'LS Livingstone', 'M Jansen', 'M Kapp', 'M Mani', 'M Markande', 'M Pathirana', 'M Prasidh Krishna', 'M Siddharth', 'M Theekshana', 'MA Starc', 'MJ Henry', 'MJ Santner', 'MJ Suthar', 'MK Lomror', 'MM Sharma', 'MP Stoinis', 'MP Yadav', 'Mayank Dagar', 'Meghna Singh', 'Mohammad Nabi', 'Mohammed Shami', 'Mohammed Siraj', 'Mukesh Kumar', 'Musheer Khan', 'Mustafizur Rahman', 'N Burger', 'N Shree Charani', 'N Thushara', 'N Wadhera', 'N de Klerk', 'NJ Carey', 'NR Sciver-Brunt', 'NSS Sharma', 'Naveen-ul-Haq', 'Nithish Kumar Reddy', 'Noor Ahmad', 'P Rawat', 'PJ Cummins', 'PS Sisodia', 'PVD Chameera', 'PWH de Silva', 'Prince Yadav', 'Priya Mishra', 'R Ashwin', 'R Parag', 'R Sai Kishore', 'R Shepherd', 'RA Bawa', 'RA Jadeja', 'RD Chahar', 'RJW Topley', 'RP Yadav', 'Raghvi Bist', 'Rashid Khan', 'Rasikh Salam', 'Ravi Bishnoi', 'Renuka Singh', 'S Asha', 'S Dube', 'S Ecclestone', 'S Gopal', 'S Gupta', 'S Ishaque', 'S Ismail', 'S Pandey', 'S Rana', 'S Sandeep Warrier', 'SFM Devine', 'SG Satghare', 'SH Johnson', 'SM Curran', 'SN Thakur', 'SP Narine', 'SR Dubey', 'SR Patil', 'SZ Thakor', 'Sakib Hussain', 'Sandeep Sharma', 'Shafali Verma', 'Shahbaz Ahmed', 'Shashank Singh', 'Shivang Kumar', 'Simarjeet Singh', 'Suyash Sharma', 'Swapnil Singh', 'T Natarajan', 'TA Boult', 'TH David', 'TM Head', 'TM McGrath', 'TP Kanwar', 'TU Deshpande', 'V Kaverappa', 'V Nigam', 'V Puthur', 'VG Arora', 'VJ Joshitha', 'Vaishnavi Sharma', 'Vijaykumar Vyshak', "W O'Rourke", 'WG Jacks', 'XC Bartlett', 'YS Chahal', 'Yash Dayal', 'Yash Thakur']
In [68]:
[x for x in deliveries_df["bowler"].dropna().unique()
if "Kumar" in x]
Out[68]:
['B Kumar', 'Mukesh Kumar', 'Shivang Kumar', 'Nithish Kumar Reddy', 'Happy Kumari']
In [70]:
bhuvi = deliveries_df[
deliveries_df["bowler"] == "B Kumar"
]
In [71]:
runs_conceded = bhuvi["total_runs"].sum()
balls_bowled = len(bhuvi)
economy = round(
runs_conceded / (balls_bowled/6),
2
)
print("Runs Conceded:", runs_conceded)
print("Balls Bowled:", balls_bowled)
print("Economy:", economy)
Runs Conceded: 1068 Balls Bowled: 743 Economy: 8.62
In [73]:
# Krunal match-wise runs
krunal_runs = (
deliveries_df[
deliveries_df["batter"] == "KH Pandya"
]
.groupby("match_id")
.agg(Runs=("runs_batter", "sum"))
.reset_index()
)
# RCB team runs per match
rcb_runs = (
rcb_df.groupby("match_id")
.agg(Team_Runs=("runs_batter", "sum"))
.reset_index()
)
# Merge
krunal_contribution = krunal_runs.merge(
rcb_runs,
on="match_id"
)
krunal_contribution["Contribution_%"] = round(
(krunal_contribution["Runs"] /
krunal_contribution["Team_Runs"]) * 100,
2
)
krunal_contribution.head()
Out[73]:
| match_id | Runs | Team_Runs | Contribution_% | |
|---|---|---|---|---|
| 0 | 1473445 | 0 | 185 | 0.00 |
| 1 | 1473451 | 5 | 162 | 3.09 |
| 2 | 1473461 | 18 | 148 | 12.16 |
| 3 | 1473471 | 1 | 94 | 1.06 |
| 4 | 1473483 | 73 | 161 | 45.34 |
In [74]:
print(
"Average Contribution:",
round(
krunal_contribution["Contribution_%"].mean(),
2
),
"%"
)
Average Contribution: 12.03 %
In [75]:
krunal_contribution = krunal_contribution.merge(
matches_df[["match_id", "winner"]],
on="match_id"
)
rcb_names = [
"Royal Challengers Bengaluru",
"Royal Challengers Bangalore"
]
wins = krunal_contribution[
krunal_contribution["winner"].isin(rcb_names)
]
print(
"Average Contribution in Wins:",
round(wins["Contribution_%"].mean(), 2),
"%"
)
Average Contribution in Wins: 17.87 %
In [76]:
krunal_bowling = deliveries_df[
deliveries_df["bowler"] == "KH Pandya"
]
wickets = krunal_bowling[
(krunal_bowling["wicket"] == 1) &
(krunal_bowling["dismissal_type"] != "run out")
].shape[0]
runs_conceded = krunal_bowling["total_runs"].sum()
overs = len(krunal_bowling) / 6
economy = round(runs_conceded / overs, 2)
print("Wickets:", wickets)
print("Economy:", economy)
Wickets: 31 Economy: 8.14
In [77]:
krunal_20 = krunal_runs[
krunal_runs["Runs"] >= 20
]
krunal_20 = krunal_20.merge(
matches_df[["match_id","winner"]],
on="match_id"
)
wins = krunal_20[
krunal_20["winner"].isin(rcb_names)
].shape[0]
total = len(krunal_20)
print(
"RCB Win % when Krunal scores 20+ runs:",
round(wins/total*100,2)
)
RCB Win % when Krunal scores 20+ runs: 66.67
In [78]:
krunal_20 = krunal_runs[
krunal_runs["Runs"] >= 20
]
krunal_20 = krunal_20.merge(
matches_df[["match_id","winner"]],
on="match_id"
)
wins = krunal_20[
krunal_20["winner"].isin(rcb_names)
].shape[0]
total = len(krunal_20)
print(
"RCB Win % when Krunal scores 20+ runs:",
round(wins/total*100,2)
)
RCB Win % when Krunal scores 20+ runs: 66.67