In [1]:
import pandas as pd
import numpy as np
import json
import os
import glob
from tqdm import tqdm
In [6]:
json_files = glob.glob("*.json")

print("Total files:", len(json_files))
Total files: 63
In [3]:
all_deliveries = []

for file in tqdm(json_files):

    with open(file, "r", encoding="utf-8") as f:
        data = json.load(f)

    info = data.get("info", {})

    match_id = os.path.basename(file).replace(".json","")

    date = info.get("dates",[None])[0]

    teams = info.get("teams", [])

    venue = info.get("venue", "")

    winner = info.get("outcome", {}).get("winner", None)

    innings_data = data.get("innings", [])

    for innings_num, innings in enumerate(innings_data, start=1):

        team = innings.get("team", "")

        overs = innings.get("overs", [])

        for over_data in overs:

            over_number = over_data.get("over")

            deliveries = over_data.get("deliveries", [])

            for ball_no, delivery in enumerate(deliveries, start=1):

                batter = delivery.get("batter")

                bowler = delivery.get("bowler")

                non_striker = delivery.get("non_striker")

                runs_batter = delivery.get("runs", {}).get("batter", 0)

                extras = delivery.get("runs", {}).get("extras", 0)

                total_runs = delivery.get("runs", {}).get("total", 0)

                wicket = 0
                dismissal_type = None

                if "wickets" in delivery:
                    wicket = 1
                    dismissal_type = delivery["wickets"][0].get("kind")

                all_deliveries.append({
                    "match_id": match_id,
                    "date": date,
                    "venue": venue,
                    "winner": winner,
                    "batting_team": team,
                    "innings": innings_num,
                    "over": over_number,
                    "ball": ball_no,
                    "batter": batter,
                    "bowler": bowler,
                    "non_striker": non_striker,
                    "runs_batter": runs_batter,
                    "extras": extras,
                    "total_runs": total_runs,
                    "wicket": wicket,
                    "dismissal_type": dismissal_type
                })
0it [00:00, ?it/s]
In [15]:
with open(json_files[0], "r") as f:
    sample = json.load(f)

print(sample.keys())
dict_keys(['meta', 'info', 'innings'])
In [4]:
deliveries_df = pd.DataFrame(all_deliveries)

print(deliveries_df.shape)

deliveries_df.head()
(0, 0)
Out[4]:
In [8]:
print(deliveries_df.shape)

print(deliveries_df.columns.tolist())

deliveries_df.head()
(0, 0)
[]
Out[8]:
In [9]:
print(len(all_deliveries))
0
In [10]:
import json

with open(json_files[0], "r") as f:
    sample = json.load(f)

print(sample.keys())
dict_keys(['meta', 'info', 'innings'])
In [16]:
matches = []

for file in tqdm(json_files):

    with open(file, "r") as f:
        data = json.load(f)

    info = data["info"]

    match_id = os.path.basename(file).replace(".json","")

    teams = info["teams"]

    winner = info.get("outcome", {}).get("winner")

    matches.append({
        "match_id": match_id,
        "season": info.get("season"),
        "date": info["dates"][0],
        "venue": info.get("venue"),
        "city": info.get("city"),
        "team1": teams[0],
        "team2": teams[1],
        "winner": winner,
        "toss_winner": info["toss"]["winner"],
        "toss_decision": info["toss"]["decision"]
    })
100%|██████████| 63/63 [00:00<00:00, 1384.42it/s]
In [17]:
matches_df = pd.DataFrame(matches)

matches_df.head()
Out[17]:
match_id season date venue city team1 team2 winner toss_winner toss_decision
0 1426263 2024 2024-04-11 Wankhede Stadium, Mumbai Mumbai Royal Challengers Bengaluru Mumbai Indians Mumbai Indians Mumbai Indians field
1 1535462 2026 2026-05-26 Himachal Pradesh Cricket Association Stadium, ... Dharamsala Royal Challengers Bengaluru Gujarat Titans Royal Challengers Bengaluru Gujarat Titans field
2 1422137 2024 2024-04-06 Sawai Mansingh Stadium, Jaipur Jaipur Royal Challengers Bengaluru Rajasthan Royals Rajasthan Royals Rajasthan Royals field
3 1469298 2024/25 2025-02-14 Kotambi Stadium, Vadodara Vadodara Gujarat Giants Royal Challengers Bengaluru Royal Challengers Bengaluru Royal Challengers Bengaluru field
4 1426274 2024 2024-04-21 Eden Gardens, Kolkata Kolkata Kolkata Knight Riders Royal Challengers Bengaluru Kolkata Knight Riders Royal Challengers Bengaluru field
In [18]:
print(matches_df.shape)

matches_df.info()

matches_df.head()
(63, 10)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 63 entries, 0 to 62
Data columns (total 10 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   match_id       63 non-null     object
 1   season         63 non-null     object
 2   date           63 non-null     object
 3   venue          63 non-null     object
 4   city           63 non-null     object
 5   team1          63 non-null     object
 6   team2          63 non-null     object
 7   winner         62 non-null     object
 8   toss_winner    63 non-null     object
 9   toss_decision  63 non-null     object
dtypes: object(10)
memory usage: 5.1+ KB
Out[18]:
match_id season date venue city team1 team2 winner toss_winner toss_decision
0 1426263 2024 2024-04-11 Wankhede Stadium, Mumbai Mumbai Royal Challengers Bengaluru Mumbai Indians Mumbai Indians Mumbai Indians field
1 1535462 2026 2026-05-26 Himachal Pradesh Cricket Association Stadium, ... Dharamsala Royal Challengers Bengaluru Gujarat Titans Royal Challengers Bengaluru Gujarat Titans field
2 1422137 2024 2024-04-06 Sawai Mansingh Stadium, Jaipur Jaipur Royal Challengers Bengaluru Rajasthan Royals Rajasthan Royals Rajasthan Royals field
3 1469298 2024/25 2025-02-14 Kotambi Stadium, Vadodara Vadodara Gujarat Giants Royal Challengers Bengaluru Royal Challengers Bengaluru Royal Challengers Bengaluru field
4 1426274 2024 2024-04-21 Eden Gardens, Kolkata Kolkata Kolkata Knight Riders Royal Challengers Bengaluru Kolkata Knight Riders Royal Challengers Bengaluru field
In [19]:
deliveries = []

for file in tqdm(json_files):

    with open(file, "r") as f:
        data = json.load(f)

    info = data["info"]

    match_id = os.path.basename(file).replace(".json","")

    season = info["season"]

    date = info["dates"][0]

    innings_list = data["innings"]

    for innings_no, innings in enumerate(innings_list, start=1):

        batting_team = innings["team"]

        for over_data in innings["overs"]:

            over = over_data["over"]

            for ball_no, delivery in enumerate(
                over_data["deliveries"],
                start=1
            ):

                wicket = 0
                dismissal_type = None
                player_out = None

                if "wickets" in delivery:

                    wicket = 1

                    dismissal_type = delivery["wickets"][0].get("kind")

                    player_out = delivery["wickets"][0].get(
                        "player_out"
                    )

                deliveries.append({

                    "match_id": match_id,

                    "season": season,

                    "date": date,

                    "innings": innings_no,

                    "batting_team": batting_team,

                    "over": over,

                    "ball": ball_no,

                    "actual_delivery":
                    delivery.get("actual_delivery"),

                    "batter":
                    delivery.get("batter"),

                    "non_striker":
                    delivery.get("non_striker"),

                    "bowler":
                    delivery.get("bowler"),

                    "runs_batter":
                    delivery["runs"]["batter"],

                    "extras":
                    delivery["runs"]["extras"],

                    "total_runs":
                    delivery["runs"]["total"],

                    "wicket":
                    wicket,

                    "dismissal_type":
                    dismissal_type,

                    "player_out":
                    player_out
                })
100%|██████████| 63/63 [00:00<00:00, 681.95it/s]
In [20]:
deliveries_df = pd.DataFrame(deliveries)
In [21]:
print(deliveries_df.shape)

print(deliveries_df.columns)

deliveries_df.head()
(14863, 17)
Index(['match_id', 'season', 'date', 'innings', 'batting_team', 'over', 'ball',
       'actual_delivery', 'batter', 'non_striker', 'bowler', 'runs_batter',
       'extras', 'total_runs', 'wicket', 'dismissal_type', 'player_out'],
      dtype='object')
Out[21]:
match_id season date innings batting_team over ball actual_delivery batter non_striker bowler runs_batter extras total_runs wicket dismissal_type player_out
0 1426263 2024 2024-04-11 1 Royal Challengers Bengaluru 0 1 0.1 V Kohli F du Plessis Mohammad Nabi 0 0 0 0 None None
1 1426263 2024 2024-04-11 1 Royal Challengers Bengaluru 0 2 0.2 V Kohli F du Plessis Mohammad Nabi 1 0 1 0 None None
2 1426263 2024 2024-04-11 1 Royal Challengers Bengaluru 0 3 0.3 F du Plessis V Kohli Mohammad Nabi 0 0 0 0 None None
3 1426263 2024 2024-04-11 1 Royal Challengers Bengaluru 0 4 0.4 F du Plessis V Kohli Mohammad Nabi 1 0 1 0 None None
4 1426263 2024 2024-04-11 1 Royal Challengers Bengaluru 0 5 0.5 V Kohli F du Plessis Mohammad Nabi 1 0 1 0 None None
In [22]:
rcb_names = [
    "Royal Challengers Bengaluru",
    "Royal Challengers Bangalore"
]

rcb_df = deliveries_df[
    deliveries_df["batting_team"].isin(rcb_names)
]

print(rcb_df.shape)
(7343, 17)
In [23]:
kohli_df = rcb_df[
    rcb_df["batter"] == "V Kohli"
]

kohli_df.head()
Out[23]:
match_id season date innings batting_team over ball actual_delivery batter non_striker bowler runs_batter extras total_runs wicket dismissal_type player_out
0 1426263 2024 2024-04-11 1 Royal Challengers Bengaluru 0 1 0.1 V Kohli F du Plessis Mohammad Nabi 0 0 0 0 None None
1 1426263 2024 2024-04-11 1 Royal Challengers Bengaluru 0 2 0.2 V Kohli F du Plessis Mohammad Nabi 1 0 1 0 None None
4 1426263 2024 2024-04-11 1 Royal Challengers Bengaluru 0 5 0.5 V Kohli F du Plessis Mohammad Nabi 1 0 1 0 None None
6 1426263 2024 2024-04-11 1 Royal Challengers Bengaluru 1 1 1.1 V Kohli F du Plessis G Coetzee 0 0 0 0 None None
7 1426263 2024 2024-04-11 1 Royal Challengers Bengaluru 1 2 1.2 V Kohli F du Plessis G Coetzee 0 0 0 0 None None
In [24]:
runs = kohli_df["runs_batter"].sum()

balls = len(kohli_df)

fours = (
    kohli_df["runs_batter"] == 4
).sum()

sixes = (
    kohli_df["runs_batter"] == 6
).sum()

strike_rate = round(
    (runs / balls) * 100,
    2
)

print("Runs =", runs)

print("Balls =", balls)

print("4s =", fours)

print("6s =", sixes)

print("SR =", strike_rate)
Runs = 2073
Balls = 1388
4s = 201
6s = 82
SR = 149.35
In [25]:
# RCB team names
rcb_names = [
    "Royal Challengers Bengaluru",
    "Royal Challengers Bangalore"
]

# Matches involving RCB
rcb_matches = matches_df[
    (matches_df["team1"].isin(rcb_names)) |
    (matches_df["team2"].isin(rcb_names))
]

# Matches played
matches_played = len(rcb_matches)

# Wins
wins = rcb_matches[
    rcb_matches["winner"].isin(rcb_names)
].shape[0]

# Losses
losses = matches_played - wins

# Win Percentage
win_percentage = round((wins / matches_played) * 100, 2)

print(f"Matches Played : {matches_played}")
print(f"Wins           : {wins}")
print(f"Losses         : {losses}")
print(f"Win %          : {win_percentage}")
Matches Played : 63
Wins           : 39
Losses         : 24
Win %          : 61.9
In [26]:
plt.figure(figsize=(6,6))

plt.pie(
    [wins, losses],
    labels=["Wins", "Losses"],
    autopct="%1.1f%%"
)

plt.title("RCB Match Results")

plt.show()
No description has been provided for this image
In [27]:
import matplotlib.pyplot as plt

plt.figure(figsize=(6,4))

plt.bar(
    ["Wins", "Losses"],
    [wins, losses]
)

plt.title("RCB Wins vs Losses")
plt.ylabel("Matches")

plt.show()
No description has been provided for this image
In [28]:
rcb_batting = (
    rcb_df.groupby("batter")
    .agg(
        Runs=("runs_batter", "sum"),
        Balls=("runs_batter", "count")
    )
    .reset_index()
)

rcb_batting["Strike Rate"] = round(
    (rcb_batting["Runs"] / rcb_batting["Balls"]) * 100,
    2
)

rcb_batting = rcb_batting.sort_values(
    "Runs",
    ascending=False
)

rcb_batting.head(10)
Out[28]:
batter Runs Balls Strike Rate
54 V Kohli 2073 1388 149.35
40 RM Patidar 1208 738 163.69
7 D Padikkal 711 456 155.92
36 PD Salt 605 361 167.59
45 S Mandhana 576 403 142.93
53 TH David 492 280 175.71
11 F du Plessis 438 276 158.70
39 RM Ghosh 421 264 159.47
18 JM Sharma 377 251 150.20
10 EA Perry 372 257 144.75
In [29]:
print(rcb_batting[["batter", "Runs", "Balls", "Strike Rate"]].head(10))
          batter  Runs  Balls  Strike Rate
54       V Kohli  2073   1388       149.35
40    RM Patidar  1208    738       163.69
7     D Padikkal   711    456       155.92
36       PD Salt   605    361       167.59
45    S Mandhana   576    403       142.93
53      TH David   492    280       175.71
11  F du Plessis   438    276       158.70
39      RM Ghosh   421    264       159.47
18     JM Sharma   377    251       150.20
10      EA Perry   372    257       144.75
In [30]:
import matplotlib.pyplot as plt

top10 = rcb_batting.head(10)

plt.figure(figsize=(12,6))

plt.bar(top10["batter"], top10["Runs"])

plt.title("Top 10 Run Scorers for RCB")
plt.xlabel("Player")
plt.ylabel("Runs")

plt.xticks(rotation=45)

plt.tight_layout()
plt.show()
No description has been provided for this image
In [31]:
top_scorer = rcb_batting.iloc[0]

print("Highest Run Scorer for RCB")
print("Player :", top_scorer["batter"])
print("Runs   :", top_scorer["Runs"])
print("SR     :", top_scorer["Strike Rate"])
Highest Run Scorer for RCB
Player : V Kohli
Runs   : 2073
SR     : 149.35
In [34]:
# RCB bowlers only
rcb_bowling = deliveries_df[
    ~deliveries_df["batting_team"].isin([
        "Royal Challengers Bengaluru",
        "Royal Challengers Bangalore"
    ])
]

# Wickets credited to bowlers
wickets_df = rcb_bowling[
    (rcb_bowling["wicket"] == 1) &
    (rcb_bowling["dismissal_type"] != "run out")
]

# Wicket count
wicket_table = (
    wickets_df.groupby("bowler")
    .size()
    .reset_index(name="Wickets")
    .sort_values("Wickets", ascending=False)
)

print(wicket_table.head(10))
            bowler  Wickets
4          B Kumar       46
14    JR Hazlewood       37
15       KH Pandya       31
40      Yash Dayal       28
31    Rasikh Salam       21
36   Suyash Sharma       17
26      N de Klerk       16
24  Mohammed Siraj       15
28      R Shepherd       13
21         LK Bell       12
In [35]:
top_bowler = wicket_table.iloc[0]

print("Highest Wicket Taker for RCB")
print("Bowler :", top_bowler["bowler"])
print("Wickets:", top_bowler["Wickets"])
Highest Wicket Taker for RCB
Bowler : B Kumar
Wickets: 46
In [36]:
import matplotlib.pyplot as plt

top10 = wicket_table.head(10)

plt.figure(figsize=(10,5))
plt.bar(top10["bowler"], top10["Wickets"])
plt.title("Top 10 Wicket Takers for RCB")
plt.xlabel("Bowler")
plt.ylabel("Wickets")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
No description has been provided for this image
In [37]:
from IPython.display import display

display(
    wicket_table.head(10)
    .style
    .background_gradient(cmap="Reds")
)
  bowler Wickets
4 B Kumar 46
14 JR Hazlewood 37
15 KH Pandya 31
40 Yash Dayal 28
31 Rasikh Salam 21
36 Suyash Sharma 17
26 N de Klerk 16
24 Mohammed Siraj 15
28 R Shepherd 13
21 LK Bell 12
In [38]:
top = wicket_table.iloc[0]

print("="*40)
print("RCB LEADING WICKET TAKER")
print("="*40)
print(f"Bowler  : {top['bowler']}")
print(f"Wickets : {top['Wickets']}")
print("="*40)
========================================
RCB LEADING WICKET TAKER
========================================
Bowler  : B Kumar
Wickets : 46
========================================
In [39]:
rcb_names = [
    "Royal Challengers Bengaluru",
    "Royal Challengers Bangalore"
]

# RCB matches
rcb_matches = matches_df[
    (matches_df["team1"].isin(rcb_names)) |
    (matches_df["team2"].isin(rcb_names))
]

venue_stats = []

for venue in rcb_matches["venue"].unique():

    venue_matches = rcb_matches[
        rcb_matches["venue"] == venue
    ]

    matches = len(venue_matches)

    wins = venue_matches[
        venue_matches["winner"].isin(rcb_names)
    ].shape[0]

    losses = matches - wins

    win_pct = round((wins / matches) * 100, 2)

    venue_stats.append([
        venue,
        matches,
        wins,
        losses,
        win_pct
    ])

venue_df = pd.DataFrame(
    venue_stats,
    columns=[
        "Venue",
        "Matches",
        "Wins",
        "Losses",
        "Win %"
    ]
)

venue_df = venue_df.sort_values(
    "Win %",
    ascending=False
)

venue_df.head(15)
Out[39]:
Venue Matches Wins Losses Win %
1 Himachal Pradesh Cricket Association Stadium, ... 3 3 0 100.00
5 Dr DY Patil Sports Academy, Navi Mumbai 4 4 0 100.00
6 Arun Jaitley Stadium, Delhi 2 2 0 100.00
10 Maharaja Yadavindra Singh International Cricke... 2 2 0 100.00
12 Shaheed Veer Narayan Singh International Stadi... 2 2 0 100.00
15 Brabourne Stadium, Mumbai 1 1 0 100.00
3 Kotambi Stadium, Vadodara 7 5 2 71.43
0 Wankhede Stadium, Mumbai 3 2 1 66.67
11 Narendra Modi Stadium, Ahmedabad 5 3 2 60.00
8 Rajiv Gandhi International Stadium, Uppal, Hyd... 2 1 1 50.00
2 Sawai Mansingh Stadium, Jaipur 2 1 1 50.00
4 Eden Gardens, Kolkata 2 1 1 50.00
9 MA Chidambaram Stadium, Chepauk, Chennai 2 1 1 50.00
7 M Chinnaswamy Stadium, Bengaluru 21 10 11 47.62
13 Bharat Ratna Shri Atal Bihari Vajpayee Ekana C... 4 1 3 25.00
In [40]:
best_venue = venue_df.iloc[0]

print("Best Venue for RCB")
print("Venue :", best_venue["Venue"])
print("Win % :", best_venue["Win %"])
Best Venue for RCB
Venue : Himachal Pradesh Cricket Association Stadium, Dharamsala
Win % : 100.0
In [42]:
worst_venue = venue_df.iloc[-1]

print("Worst Venue for RCB")
print("Venue :", worst_venue["Venue"])
print("Win % :", worst_venue["Win %"])
Worst Venue for RCB
Venue : Barsapara Cricket Stadium, Guwahati
Win % : 0.0
In [43]:
import seaborn as sns
import matplotlib.pyplot as plt

heatmap_data = venue_df.set_index("Venue")[["Win %"]]

plt.figure(figsize=(8,10))

sns.heatmap(
    heatmap_data,
    annot=True,
    cmap="YlGnBu"
)

plt.title("RCB Venue Performance Heatmap")

plt.show()
No description has been provided for this image
In [44]:
from IPython.display import display, HTML

matches_played = len(rcb_matches)

wins = rcb_matches[
    rcb_matches["winner"].isin(rcb_names)
].shape[0]

losses = matches_played - wins

win_pct = round((wins / matches_played) * 100, 2)

top_run_scorer = rcb_batting.iloc[0]["batter"]
top_runs = rcb_batting.iloc[0]["Runs"]

top_wicket_taker = wicket_table.iloc[0]["bowler"]
top_wickets = wicket_table.iloc[0]["Wickets"]

display(HTML(f"""

<div style="display:flex;gap:20px;flex-wrap:wrap">

<div style="
background:#1f77b4;
padding:20px;
border-radius:15px;
width:220px;
color:white;
text-align:center;">
<h3>Matches</h3>
<h1>{matches_played}</h1>
</div>

<div style="
background:#2ca02c;
padding:20px;
border-radius:15px;
width:220px;
color:white;
text-align:center;">
<h3>Win %</h3>
<h1>{win_pct}%</h1>
</div>

<div style="
background:#ff7f0e;
padding:20px;
border-radius:15px;
width:220px;
color:white;
text-align:center;">
<h3>Top Scorer</h3>
<h4>{top_run_scorer}</h4>
<h2>{top_runs}</h2>
</div>

<div style="
background:#d62728;
padding:20px;
border-radius:15px;
width:220px;
color:white;
text-align:center;">
<h3>Top Wicket Taker</h3>
<h4>{top_wicket_taker}</h4>
<h2>{top_wickets}</h2>
</div>

</div>

"""))

Matches

63

Win %

61.9%

Top Scorer

V Kohli

2073

Top Wicket Taker

B Kumar

46

In [45]:
best_venue = venue_df.iloc[0]["Venue"]

display(HTML(f"""

<div style="display:flex;gap:20px;flex-wrap:wrap">

<div style="background:#4CAF50;padding:20px;border-radius:15px;width:220px;color:white;text-align:center;">
<h3>Wins</h3>
<h1>{wins}</h1>
</div>

<div style="background:#F44336;padding:20px;border-radius:15px;width:220px;color:white;text-align:center;">
<h3>Losses</h3>
<h1>{losses}</h1>
</div>

<div style="background:#2196F3;padding:20px;border-radius:15px;width:220px;color:white;text-align:center;">
<h3>Best Venue</h3>
<h4>{best_venue}</h4>
</div>

<div style="background:#9C27B0;padding:20px;border-radius:15px;width:220px;color:white;text-align:center;">
<h3>Top Batter</h3>
<h4>{top_run_scorer}</h4>
</div>

</div>

"""))

Wins

39

Losses

24

Best Venue

Himachal Pradesh Cricket Association Stadium, Dharamsala

Top Batter

V Kohli

In [48]:
# RCB Men's team names
rcb_names = [
    "Royal Challengers Bengaluru",
    "Royal Challengers Bangalore"
]

# Only RCB batting deliveries
rcb_batting = deliveries_df[
    deliveries_df["batting_team"].isin(rcb_names)
].copy()

# Create pair irrespective of strike rotation
rcb_batting["pair"] = rcb_batting.apply(
    lambda x: " & ".join(sorted([x["batter"], x["non_striker"]])),
    axis=1
)

# Partnership runs
partnership_runs = (
    rcb_batting.groupby("pair")
    .agg(
        Runs=("total_runs", "sum"),
        Balls=("total_runs", "count")
    )
    .reset_index()
    .sort_values("Runs", ascending=False)
)

partnership_runs.head(15)
Out[48]:
pair Runs Balls
37 D Padikkal & V Kohli 1078 646
120 PD Salt & V Kohli 792 486
52 F du Plessis & V Kohli 707 438
131 RM Patidar & V Kohli 695 434
66 GM Harris & S Mandhana 392 241
56 G Voll & S Mandhana 381 246
69 JG Bethell & V Kohli 280 172
74 JM Sharma & RM Patidar 275 162
24 C Green & V Kohli 270 200
35 D Padikkal & RM Patidar 241 142
142 V Kohli & WG Jacks 227 123
141 V Kohli & VR Iyer 203 103
130 RM Patidar & TH David 202 115
88 KH Pandya & RM Patidar 199 134
133 RM Patidar & WG Jacks 190 105
In [49]:
top10 = partnership_runs.head(10)

print(top10)
                        pair  Runs  Balls
37      D Padikkal & V Kohli  1078    646
120        PD Salt & V Kohli   792    486
52    F du Plessis & V Kohli   707    438
131     RM Patidar & V Kohli   695    434
66    GM Harris & S Mandhana   392    241
56       G Voll & S Mandhana   381    246
69      JG Bethell & V Kohli   280    172
74    JM Sharma & RM Patidar   275    162
24         C Green & V Kohli   270    200
35   D Padikkal & RM Patidar   241    142
In [50]:
import matplotlib.pyplot as plt

top10 = partnership_runs.head(10)

plt.figure(figsize=(12,6))

plt.barh(
    top10["pair"],
    top10["Runs"]
)

plt.title("Top 10 RCB Men's Batting Partnerships")
plt.xlabel("Partnership Runs")
plt.ylabel("Batting Pair")

plt.gca().invert_yaxis()

plt.tight_layout()
plt.show()
No description has been provided for this image
In [51]:
best = partnership_runs.iloc[0]

print("Best RCB Men's Partnership")
print("Pair :", best["pair"])
print("Runs :", best["Runs"])
print("Balls:", best["Balls"])
Best RCB Men's Partnership
Pair : D Padikkal & V Kohli
Runs : 1078
Balls: 646
In [52]:
rcb_names = [
    "Royal Challengers Bengaluru",
    "Royal Challengers Bangalore"
]

# RCB matches
rcb_matches = matches_df[
    (matches_df["team1"].isin(rcb_names)) |
    (matches_df["team2"].isin(rcb_names))
].copy()

# Find opponent
rcb_matches["Opponent"] = rcb_matches.apply(
    lambda x: x["team2"] if x["team1"] in rcb_names else x["team1"],
    axis=1
)

opponent_stats = []

for opponent in rcb_matches["Opponent"].unique():

    temp = rcb_matches[
        rcb_matches["Opponent"] == opponent
    ]

    matches = len(temp)

    wins = temp[
        temp["winner"].isin(rcb_names)
    ].shape[0]

    losses = matches - wins

    win_pct = round((wins / matches) * 100, 2)

    opponent_stats.append([
        opponent,
        matches,
        wins,
        losses,
        win_pct
    ])

opponent_df = pd.DataFrame(
    opponent_stats,
    columns=[
        "Opponent",
        "Matches",
        "Wins",
        "Losses",
        "Win %"
    ]
)

opponent_df = opponent_df.sort_values(
    "Win %",
    ascending=False
)

opponent_df
Out[52]:
Opponent Matches Wins Losses Win %
8 Punjab Kings 7 6 1 85.71
7 Chennai Super Kings 5 4 1 80.00
3 Gujarat Giants 4 3 1 75.00
1 Gujarat Titans 7 5 2 71.43
0 Mumbai Indians 8 5 3 62.50
5 Delhi Capitals 10 6 4 60.00
9 Lucknow Super Giants 4 2 2 50.00
10 UP Warriorz 4 2 2 50.00
4 Kolkata Knight Riders 4 2 2 50.00
2 Rajasthan Royals 5 2 3 40.00
6 Sunrisers Hyderabad 5 2 3 40.00
In [53]:
best = opponent_df.iloc[0]

print("Best Opponent for RCB")
print("Team :", best["Opponent"])
print("Win %:", best["Win %"])
Best Opponent for RCB
Team : Punjab Kings
Win %: 85.71
In [54]:
worst = opponent_df.iloc[-1]

print("Toughest Opponent for RCB")
print("Team :", worst["Opponent"])
print("Win %:", worst["Win %"])
Toughest Opponent for RCB
Team : Sunrisers Hyderabad
Win %: 40.0
In [55]:
worst = opponent_df.iloc[-1]

print("Toughest Opponent for RCB")
print("Team :", worst["Opponent"])
print("Win %:", worst["Win %"])
Toughest Opponent for RCB
Team : Sunrisers Hyderabad
Win %: 40.0
In [56]:
import matplotlib.pyplot as plt

plt.figure(figsize=(12,6))

plt.barh(
    opponent_df["Opponent"],
    opponent_df["Win %"]
)

plt.xlabel("Win Percentage")
plt.ylabel("Opponent")
plt.title("RCB Opponent-wise Win Percentage")

plt.gca().invert_yaxis()

plt.tight_layout()
plt.show()
No description has been provided for this image
In [57]:
display(opponent_df.head(5))
Opponent Matches Wins Losses Win %
8 Punjab Kings 7 6 1 85.71
7 Chennai Super Kings 5 4 1 80.00
3 Gujarat Giants 4 3 1 75.00
1 Gujarat Titans 7 5 2 71.43
0 Mumbai Indians 8 5 3 62.50
In [58]:
# Kohli deliveries
kohli = deliveries_df[
    deliveries_df["batter"] == "V Kohli"
]

# Balls faced by Kohli in each match
kohli_match_stats = (
    kohli.groupby("match_id")
    .agg(
        Balls=("batter", "count"),
        Runs=("runs_batter", "sum")
    )
    .reset_index()
)

# Matches where Kohli faced 30+ balls
kohli_30 = kohli_match_stats[
    kohli_match_stats["Balls"] >= 30
]

# Merge with match results
kohli_30 = kohli_30.merge(
    matches_df[["match_id","winner"]],
    on="match_id"
)

rcb_names = [
    "Royal Challengers Bengaluru",
    "Royal Challengers Bangalore"
]

# Count wins
wins = kohli_30[
    kohli_30["winner"].isin(rcb_names)
].shape[0]

matches = len(kohli_30)

win_pct = round(
    (wins / matches) * 100,
    2
)

print("Matches where Kohli faced 30+ balls:", matches)
print("RCB Wins:", wins)
print("RCB Win %:", win_pct)
Matches where Kohli faced 30+ balls: 24
RCB Wins: 22
RCB Win %: 91.67
In [59]:
kohli_match_stats = kohli_match_stats.merge(
    matches_df[["match_id","winner"]],
    on="match_id"
)

kohli_match_stats["Category"] = np.where(
    kohli_match_stats["Balls"] >= 30,
    "30+ Balls",
    "<30 Balls"
)

summary = []

for cat in kohli_match_stats["Category"].unique():

    temp = kohli_match_stats[
        kohli_match_stats["Category"] == cat
    ]

    wins = temp[
        temp["winner"].isin(rcb_names)
    ].shape[0]

    total = len(temp)

    summary.append([
        cat,
        total,
        wins,
        round(wins/total*100,2)
    ])

comparison = pd.DataFrame(
    summary,
    columns=[
        "Category",
        "Matches",
        "Wins",
        "Win %"
    ]
)

comparison
Out[59]:
Category Matches Wins Win %
0 <30 Balls 22 7 31.82
1 30+ Balls 24 22 91.67
In [60]:
import matplotlib.pyplot as plt

plt.figure(figsize=(6,4))

plt.bar(
    comparison["Category"],
    comparison["Win %"]
)

plt.ylabel("Win Percentage")
plt.title("RCB Win % vs Kohli Balls Faced")

plt.show()
No description has been provided for this image
In [61]:
# Tim David match-wise runs

tim_match_runs = (
    deliveries_df[
        deliveries_df["batter"] == "TH David"
    ]
    .groupby("match_id")
    .agg(
        Runs=("runs_batter","sum")
    )
    .reset_index()
)

# Matches where Tim David scored 25+

tim_25 = tim_match_runs[
    tim_match_runs["Runs"] > 25
]

# Merge with match results

tim_25 = tim_25.merge(
    matches_df[["match_id","winner"]],
    on="match_id"
)

rcb_names = [
    "Royal Challengers Bengaluru",
    "Royal Challengers Bangalore"
]

matches = len(tim_25)

wins = tim_25[
    tim_25["winner"].isin(rcb_names)
].shape[0]

losses = matches - wins

win_pct = round(
    (wins / matches) * 100,
    2
)

print("Matches where Tim David scored >25 :", matches)
print("Wins :", wins)
print("Losses :", losses)
print("Win % :", win_pct)
Matches where Tim David scored >25 : 8
Wins : 3
Losses : 5
Win % : 37.5
In [62]:
tim_match_runs = tim_match_runs.merge(
    matches_df[["match_id","winner"]],
    on="match_id"
)

tim_match_runs["Category"] = np.where(
    tim_match_runs["Runs"] > 25,
    ">25 Runs",
    "≤25 Runs"
)

comparison = []

for cat in tim_match_runs["Category"].unique():

    temp = tim_match_runs[
        tim_match_runs["Category"] == cat
    ]

    wins = temp[
        temp["winner"].isin(rcb_names)
    ].shape[0]

    total = len(temp)

    comparison.append([
        cat,
        total,
        wins,
        round((wins/total)*100,2)
    ])

comparison_df = pd.DataFrame(
    comparison,
    columns=[
        "Category",
        "Matches",
        "Wins",
        "Win %"
    ]
)

comparison_df
Out[62]:
Category Matches Wins Win %
0 ≤25 Runs 16 12 75.0
1 >25 Runs 8 3 37.5
In [63]:
import matplotlib.pyplot as plt

plt.figure(figsize=(6,4))

plt.bar(
    comparison_df["Category"],
    comparison_df["Win %"]
)

plt.ylabel("Win Percentage")
plt.title("RCB Win % vs Tim David Runs")

plt.show()
No description has been provided for this image
In [64]:
# Bhuvneshwar Kumar deliveries
bhuvi = deliveries_df[
    deliveries_df["bowler"] == "Bhuvneshwar Kumar"
]

# Runs conceded
runs_conceded = bhuvi["total_runs"].sum()

# Legal deliveries (exclude wides and no-balls if you have those columns)
balls_bowled = len(bhuvi)

overs = balls_bowled / 6

economy = round(runs_conceded / overs, 2)

print("Runs Conceded:", runs_conceded)
print("Balls Bowled:", balls_bowled)
print("Overs:", round(overs,1))
print("Economy:", economy)
Runs Conceded: 0
Balls Bowled: 0
Overs: 0.0
Economy: nan
/tmp/ipykernel_2675/1617914298.py:14: RuntimeWarning: invalid value encountered in divide
  economy = round(runs_conceded / overs, 2)
In [65]:
legal_balls = bhuvi[
    ~bhuvi["actual_delivery"].duplicated()
]

runs_conceded = bhuvi["total_runs"].sum()

overs = len(legal_balls) / 6

economy = round(runs_conceded / overs, 2)

print("Economy:", economy)
Economy: nan
/tmp/ipykernel_2675/441802929.py:9: RuntimeWarning: invalid value encountered in divide
  economy = round(runs_conceded / overs, 2)
In [67]:
# Search for Bhuvi in bowler names
sorted(
    deliveries_df["bowler"]
    .dropna()
    .unique()
)
Out[67]:
['A Badoni',
 'A Gardner',
 'A Kamboj',
 'A Reddy',
 'A Sutherland',
 'AB Kaur',
 'AC Kerr',
 'AD Russell',
 'AM Ghazanfar',
 'AR Patel',
 'AS Joseph',
 'AS Roy',
 'Abhinandan Singh',
 'Abhishek Sharma',
 'Akash Deep',
 'Akash Madhwal',
 'Akash Singh',
 'Arshad Khan',
 'Arshdeep Singh',
 'Auqib Nabi',
 'Avesh Khan',
 'Azmatullah Omarzai',
 'B Kumar',
 'Brijesh Sharma',
 'C Bosch',
 'C Green',
 'CA Henry',
 'CE Dean',
 'CL Tryon',
 'CV Varun',
 'D Hemalatha',
 'DA Payne',
 'DB Sharma',
 'DJS Dottin',
 'DL Chahar',
 'DS Rathi',
 'E Bisht',
 'E Malinga',
 'EA Perry',
 'Fazalhaq Farooqi',
 'G Coetzee',
 'G Voll',
 'G Wareham',
 'GF Linde',
 'GJ Maxwell',
 'GM Harris',
 'H Graham',
 'H Sharma',
 'HH Pandya',
 'HK Matthews',
 'HV Patel',
 'Happy Kumari',
 'Harpreet Brar',
 'Harsh Dubey',
 'Harshit Rana',
 'I Sharma',
 'J Little',
 'J Overton',
 'JA Duffy',
 'JC Archer',
 'JD Unadkat',
 'JJ Bumrah',
 'JL Jonassen',
 'JO Holder',
 'JR Hazlewood',
 'K Anjali Sarvani',
 'K Goud',
 'K Kartikeya',
 'K Khejroliya',
 'K Rabada',
 'KA Jamieson',
 'KH Pandya',
 'KJ Garth',
 'KK Ahmed',
 'KS Ahuja',
 'KS Gautam',
 'KV Sharma',
 'Kartik Tyagi',
 'Kuldeep Yadav',
 'L Ngidi',
 'LCN Smith',
 'LH Ferguson',
 'LK Bell',
 'LK Hamilton',
 'LS Livingstone',
 'M Jansen',
 'M Kapp',
 'M Mani',
 'M Markande',
 'M Pathirana',
 'M Prasidh Krishna',
 'M Siddharth',
 'M Theekshana',
 'MA Starc',
 'MJ Henry',
 'MJ Santner',
 'MJ Suthar',
 'MK Lomror',
 'MM Sharma',
 'MP Stoinis',
 'MP Yadav',
 'Mayank Dagar',
 'Meghna Singh',
 'Mohammad Nabi',
 'Mohammed Shami',
 'Mohammed Siraj',
 'Mukesh Kumar',
 'Musheer Khan',
 'Mustafizur Rahman',
 'N Burger',
 'N Shree Charani',
 'N Thushara',
 'N Wadhera',
 'N de Klerk',
 'NJ Carey',
 'NR Sciver-Brunt',
 'NSS Sharma',
 'Naveen-ul-Haq',
 'Nithish Kumar Reddy',
 'Noor Ahmad',
 'P Rawat',
 'PJ Cummins',
 'PS Sisodia',
 'PVD Chameera',
 'PWH de Silva',
 'Prince Yadav',
 'Priya Mishra',
 'R Ashwin',
 'R Parag',
 'R Sai Kishore',
 'R Shepherd',
 'RA Bawa',
 'RA Jadeja',
 'RD Chahar',
 'RJW Topley',
 'RP Yadav',
 'Raghvi Bist',
 'Rashid Khan',
 'Rasikh Salam',
 'Ravi Bishnoi',
 'Renuka Singh',
 'S Asha',
 'S Dube',
 'S Ecclestone',
 'S Gopal',
 'S Gupta',
 'S Ishaque',
 'S Ismail',
 'S Pandey',
 'S Rana',
 'S Sandeep Warrier',
 'SFM Devine',
 'SG Satghare',
 'SH Johnson',
 'SM Curran',
 'SN Thakur',
 'SP Narine',
 'SR Dubey',
 'SR Patil',
 'SZ Thakor',
 'Sakib Hussain',
 'Sandeep Sharma',
 'Shafali Verma',
 'Shahbaz Ahmed',
 'Shashank Singh',
 'Shivang Kumar',
 'Simarjeet Singh',
 'Suyash Sharma',
 'Swapnil Singh',
 'T Natarajan',
 'TA Boult',
 'TH David',
 'TM Head',
 'TM McGrath',
 'TP Kanwar',
 'TU Deshpande',
 'V Kaverappa',
 'V Nigam',
 'V Puthur',
 'VG Arora',
 'VJ Joshitha',
 'Vaishnavi Sharma',
 'Vijaykumar Vyshak',
 "W O'Rourke",
 'WG Jacks',
 'XC Bartlett',
 'YS Chahal',
 'Yash Dayal',
 'Yash Thakur']
In [68]:
[x for x in deliveries_df["bowler"].dropna().unique()
 if "Kumar" in x]
Out[68]:
['B Kumar',
 'Mukesh Kumar',
 'Shivang Kumar',
 'Nithish Kumar Reddy',
 'Happy Kumari']
In [70]:
bhuvi = deliveries_df[
    deliveries_df["bowler"] == "B Kumar"
]
In [71]:
runs_conceded = bhuvi["total_runs"].sum()

balls_bowled = len(bhuvi)

economy = round(
    runs_conceded / (balls_bowled/6),
    2
)

print("Runs Conceded:", runs_conceded)
print("Balls Bowled:", balls_bowled)
print("Economy:", economy)
Runs Conceded: 1068
Balls Bowled: 743
Economy: 8.62
In [73]:
# Krunal match-wise runs
krunal_runs = (
    deliveries_df[
        deliveries_df["batter"] == "KH Pandya"
    ]
    .groupby("match_id")
    .agg(Runs=("runs_batter", "sum"))
    .reset_index()
)

# RCB team runs per match
rcb_runs = (
    rcb_df.groupby("match_id")
    .agg(Team_Runs=("runs_batter", "sum"))
    .reset_index()
)

# Merge
krunal_contribution = krunal_runs.merge(
    rcb_runs,
    on="match_id"
)

krunal_contribution["Contribution_%"] = round(
    (krunal_contribution["Runs"] /
     krunal_contribution["Team_Runs"]) * 100,
    2
)

krunal_contribution.head()
Out[73]:
match_id Runs Team_Runs Contribution_%
0 1473445 0 185 0.00
1 1473451 5 162 3.09
2 1473461 18 148 12.16
3 1473471 1 94 1.06
4 1473483 73 161 45.34
In [74]:
print(
    "Average Contribution:",
    round(
        krunal_contribution["Contribution_%"].mean(),
        2
    ),
    "%"
)
Average Contribution: 12.03 %
In [75]:
krunal_contribution = krunal_contribution.merge(
    matches_df[["match_id", "winner"]],
    on="match_id"
)

rcb_names = [
    "Royal Challengers Bengaluru",
    "Royal Challengers Bangalore"
]

wins = krunal_contribution[
    krunal_contribution["winner"].isin(rcb_names)
]

print(
    "Average Contribution in Wins:",
    round(wins["Contribution_%"].mean(), 2),
    "%"
)
Average Contribution in Wins: 17.87 %
In [76]:
krunal_bowling = deliveries_df[
    deliveries_df["bowler"] == "KH Pandya"
]

wickets = krunal_bowling[
    (krunal_bowling["wicket"] == 1) &
    (krunal_bowling["dismissal_type"] != "run out")
].shape[0]

runs_conceded = krunal_bowling["total_runs"].sum()

overs = len(krunal_bowling) / 6

economy = round(runs_conceded / overs, 2)

print("Wickets:", wickets)
print("Economy:", economy)
Wickets: 31
Economy: 8.14
In [77]:
krunal_20 = krunal_runs[
    krunal_runs["Runs"] >= 20
]

krunal_20 = krunal_20.merge(
    matches_df[["match_id","winner"]],
    on="match_id"
)

wins = krunal_20[
    krunal_20["winner"].isin(rcb_names)
].shape[0]

total = len(krunal_20)

print(
    "RCB Win % when Krunal scores 20+ runs:",
    round(wins/total*100,2)
)
RCB Win % when Krunal scores 20+ runs: 66.67
In [78]:
krunal_20 = krunal_runs[
    krunal_runs["Runs"] >= 20
]

krunal_20 = krunal_20.merge(
    matches_df[["match_id","winner"]],
    on="match_id"
)

wins = krunal_20[
    krunal_20["winner"].isin(rcb_names)
].shape[0]

total = len(krunal_20)

print(
    "RCB Win % when Krunal scores 20+ runs:",
    round(wins/total*100,2)
)
RCB Win % when Krunal scores 20+ runs: 66.67