With access to decades worth of data including player statistics like school attended, salaries, teams played for, height and weight, and more. using advanced SQL querying techniques to track how player statistics have changed over time and across different teams in the league.
- In each decade, how many schools were there that produced MLB players?
- What are the names of the top 5 schools that produced the most players?
- For each decade, what were the names of the top 3 schools that produced the most players?

SELECT * FROM [dbo].[schools];
SELECT * FROM [dbo].[school_details];
-- how many player in each decade
SELECT ROUND (yearID , -1) AS Decade , COUNT(DISTINCT schoolID ) AS NUM_SCHOOLS
FROM schools
GROUP BY ROUND (yearID , -1)
ORDER BY Decade;
-- top 5 schools that produced most player
SELECT TOP 5 SD.name_full, COUNT(DISTINCT S.playerID ) AS NUM_PLAYERS
FROM schools S LEFT JOIN school_details SD
ON S.schoolID=SD.schoolID
GROUP BY SD.name_full
ORDER BY NUM_PLAYERS DESC ;
-- for each decade name of the top 3 schools that produced most players
WITH SD AS (SELECT ROUND (S.yearID , -1)AS Decade , SD.name_full, COUNT(DISTINCT S.playerID ) AS NUM_PLAYERS
FROM schools S LEFT JOIN school_details SD
ON S.schoolID=SD.schoolID
GROUP BY ROUND (S.yearID , -1) , SD.name_full
),
RN AS (SELECT Decade,name_full,NUM_PLAYERS,
ROW_NUMBER() OVER(PARTITION BY Decade ORDER BY NUM_PLAYERS DESC) AS ROW_NUM
FROM SD)
SELECT Decade,name_full,NUM_PLAYERS
FROM RN
WHERE ROW_NUM <= 3
ORDER BY Decade DESC , ROW_NUM;
- Return the top 20% of teams in terms of average annual spending
- For each team, show the cumulative sum of spending over the years
- Return the first year that each team’s cumulative spending surpassed 1 billion

SELECT * FROM [dbo].[salaries];
-- return the top 20% of teams in term of average annual spending
WITH TS AS (SELECT teamID , yearID , SUM(CAST (salary AS bigint)) AS TOTAL_SPEND
FROM [dbo].[salaries]
GROUP BY teamID , yearID
),
SP AS (SELECT teamID , AVG(TOTAL_SPEND) AS AVG_SPEND ,
NTILE(5) OVER (ORDER BY AVG(TOTAL_SPEND) DESC ) AS SPEND_PCT
FROM TS
GROUP BY teamID)
SELECT teamID , ROUND(AVG_SPEND / 1000000 , 1 ) AS AVG_SPEND_MILLIONS FROM SP
WHERE SPEND_PCT=1;
-- for each team calculate the cumulative sum of spendinig over the years
WITH TS AS (SELECT teamID, yearID, SUM(CAST (salary AS BIGINT)) AS TOTAL_SPEND
FROM salaries
GROUP BY teamID , yearID)
SELECT teamID , yearID , TOTAL_SPEND/1000000 AS TOTAL_SPEND,
ROUND(SUM(TOTAL_SPEND) OVER( PARTITION BY teamID ORDER BY yearID) / 1000000,1 ) AS CUMULATIVE_SUM_MILIONS
FROM TS ;
-- return the first year that each team cumulative spending surpassed 1 bilion
WITH TS AS (SELECT teamID, yearID, SUM(CAST (salary AS BIGINT)) AS TOTAL_SPEND
FROM salaries
GROUP BY teamID , yearID),
CS AS (SELECT teamID , yearID ,
SUM(TOTAL_SPEND) OVER( PARTITION BY teamID ORDER BY yearID) AS CUMULATIVE_SUM
FROM TS ),
RN AS (SELECT teamID,yearID,CUMULATIVE_SUM ,
ROW_NUMBER() OVER(PARTITION BY teamID ORDER BY CUMULATIVE_SUM ) AS RN
FROM CS
WHERE CUMULATIVE_SUM > 1000000000)
SELECT teamID , yearID , CUMULATIVE_SUM
FROM RN
WHERE RN=1;
- For each player, calculate their age at their first (debut) game, their last game, and their career length (all in years). Sort from longest career to shortest career.
- What team did each player play on for their starting and ending years?
- How many players started and ended on the same team and also played for over a decade?

-- view player table and number of player
SELECT * FROM players ;
SELECT COUNT(playerID) FROM players;
-- for each player calculate their age at their first game and their last game
SELECT nameGiven,debut,finalGame,
TRY_CAST(CONCAT(birthYear,'-',
RIGHT('00'+ CAST(birthMonth AS VARCHAR),2),'-',
RIGHT('00'+ CAST(birthDay AS VARCHAR),2))AS DATE ) AS BIRTHDATE,
DATEDIFF(YEAR,TRY_CAST(CONCAT(birthYear,'-',birthMonth,'-',birthDay) AS DATE), debut ) AS STARTAGE ,
DATEDIFF(YEAR,TRY_CAST(CONCAT(birthYear,'-',birthMonth,'-',birthDay) AS DATE), finalGame ) AS ENDAGE ,
DATEDIFF(YEAR , debut , finalGame ) AS CAREER_LENGHT
FROM players
ORDER BY CAREER_LENGHT DESC ;
-- what team did each player play on for their starting and ending years ?
SELECT P.nameGiven,
S.yearID AS STARTING_YEAR ,S.teamID AS STARTING_TEAM,
E.yearID AS ENDING_YEAR ,E.teamID AS ENDING_TEAM
FROM players P INNER JOIN salaries S
ON P.playerID=S.playerID AND
YEAR(P.debut) =S.yearID
INNER JOIN salaries E
ON P.playerID=E.playerID AND
YEAR(P.finalGame) =E.yearID
WHERE S.teamID = E.teamID AND E.yearID - S.yearID > 10 ;
- Which players have the same birthday?
- Create a summary table that shows for each team, what percent of players bat right, left and both.
- How have average height and weight at debut game changed over the years, and what’s the decade-over-decade difference?

-- which player have the same birthday
WITH BN AS (SELECT TRY_CAST(CONCAT(birthYear,'-',birthMonth,'-',birthDay) AS DATE) AS BIRTHDATE,
nameGiven
FROM players)
SELECT BIRTHDATE , STRING_AGG(nameGiven , '- ') AS PLAYERS , COUNT(nameGiven)
FROM
BN
WHERE BIRTHDATE IS NOT NULL AND YEAR(BIRTHDATE) BETWEEN 1980 AND 1990
GROUP BY BIRTHDATE
HAVING COUNT(nameGiven) > 2
ORDER BY BIRTHDATE
;
-- SUMMARY TABLE
SELECT s.teamID,COUNT(s.playerID) AS NUM_PLAYERS,
ROUND(CAST(SUM(CASE WHEN P.bats='R' THEN 1 ELSE 0 END) AS FLOAT)/ COUNT(s.playerID) *100 ,1)AS BATS_RIGHT,
ROUND(CAST(SUM(CASE WHEN P.bats= 'L' THEN 1 ELSE 0 END) AS FLOAT)/ COUNT(s.playerID) *100 , 1) AS BATS_LEFT,
ROUND(CAST(SUM(CASE WHEN P.bats= 'B' THEN 1 ELSE 0 END) AS FLOAT) / COUNT(s.playerID) *100 , 1) AS BATS_BOTH
FROM salaries S LEFT JOIN players P
ON S.playerID = p.playerID
GROUP BY s.teamID ;
-- average height and weight at debut game changed over the years
WITH HW AS (SELECT ROUND(YEAR(debut),-1) AS DECADE ,AVG(height) AS AVG_HEIGHT , AVG(weight) AS AVG_WEIGHT
FROM players
GROUP BY ROUND(YEAR(debut),-1) )
SELECT DECADE,
AVG_HEIGHT - LAG(AVG_HEIGHT) OVER(ORDER BY DECADE) AS HEIGHT_DIFF,
AVG_WEIGHT - LAG(AVG_WEIGHT) OVER(ORDER BY DECADE) AS WEIGHT_DIFF
FROM HW
WHERE DECADE IS NOT NULL
;