Zomato SQL Analysis Project
Zomato SQL Analysis Project
Q1 Write a query to find the top 5 most frequently ordered dishes by customer
called "Arjun Mehta" in the last 2 year
SELECT DATEADD(YEAR,-2, CAST(GETDATE() AS date)) CurrentDate
WITH Popular_time_slot
AS
(
SELECT *,
CASE
WHEN DATEPART(HH,order_time) BETWEEN 0 AND 1 THEN '00:00 - 02:00'
WHEN DATEPART(HH,order_time) BETWEEN 2 AND 3 THEN '02:00 - 04:00'
WHEN DATEPART(HH,order_time) BETWEEN 4 AND 5 THEN '04:00 - 06:00'
WHEN DATEPART(HH,order_time) BETWEEN 6 AND 7 THEN '06:00 - 08:00'
WHEN DATEPART(HH,order_time) BETWEEN 8 AND 9 THEN '08:00 - 10:00'
WHEN DATEPART(HH,order_time) BETWEEN 10 AND 11 THEN '10:00 - 12:00'
WHEN DATEPART(HH,order_time) BETWEEN 12 AND 13 THEN '12:00 - 14:00'
WHEN DATEPART(HH,order_time) BETWEEN 14 AND 15 THEN '14:00 - 16:00'
WHEN DATEPART(HH,order_time) BETWEEN 16 AND 17 THEN '16:00 - 18:00'
WHEN DATEPART(HH,order_time) BETWEEN 18 AND 19 THEN '18:00 - 20:00'
WHEN DATEPART(HH,order_time) BETWEEN 20 AND 21 THEN '20:00 - 22:00'
WHEN DATEPART(HH,order_time) BETWEEN 22 AND 23 THEN '22:00 - 00:00'
END AS Time_Slot
FROM Orders
)
SELECT Time_Slot, COUNT(*) Total_Orders FROM Popular_time_slot
GROUP BY Time_Slot
ORDER BY COUNT(order_id) DESC
Q3 Order Value Analysis: Find the Average Order value per customer who has placed
more than 750 orders
Return Customer_name, and AOV(Average Order Value)
SELECT
c.customer_id,
c.customer_name,
CAST(AVG(O.total_amount) AS decimal(10,2)) Avg_Order_Value
FROM Customers c
JOIN Orders o
ON c.customer_id = o.customer_id
GROUP BY c.customer_id, c.customer_name
HAVING
COUNT(o.order_id) > 750
ORDER BY
AVG(total_amount) DESC
Q4 High Value Customers: List the Customers who have spent more than 100K in
total on food orders.
Return customer_name, and customer_id
SELECT
c.customer_id,
c.customer_name,
SUM(o.total_amount) Total_Spent
FROM Customers c
JOIN Orders o
ON c.customer_id = o.customer_id
GROUP BY c.customer_id,
c.customer_name
HAVING
SUM(o.total_amount) > 100000
ORDER BY
SUM(o.total_amount) DESC
Q5 Orders without Delivery: Write query to find orders that were placed but not
delivered.
-Return each restaurant name, city and number of not delivered orders
Here we have to include both cases where orders was not fulfilled and Delivery
status is "Not delivered"
SELECT
r.restaurant_name,
r.city,
COUNT(o.order_id) AS Total_Not_Delivered_Orders
FROM
Orders o
LEFT JOIN Deliveries d
ON o.order_id = d.order_id
LEFT JOIN Restaurants r
ON r.restaurant_id = o.restaurant_id
WHERE
d.delivery_status = 'Not Delivered'
OR d.delivery_status IS NULL -- Capture orders with no delivery entry
GROUP BY
r.restaurant_name, r.city
ORDER BY
Total_Not_Delivered_Orders DESC;
Q6 Restaurant Revenue Ranking: Rank restaurants by their total reveneu from the
last year. including their name, Total Revenue, and rank within their city
SELECT
r.city,
r.restaurant_name,
SUM(o.total_amount) Revenue,
DENSE_RANK() OVER(PARTITION BY city ORDER BY SUM(total_amount) DESC)
Rank_of_Restaurant
FROM Orders o
LEFT JOIN Restaurants r
ON r.restaurant_id = o.restaurant_id
WHERE YEAR(order_date) < 2024
GROUP BY city, r.restaurant_name
Top 3 Restaurant in their City based on Their Highest Revenue Revenue
WITH Rankin_Table
AS (
SELECT
r.city City,
r.restaurant_name Restaurant,
SUM(o.total_amount) Revenue,
DENSE_RANK() OVER(PARTITION BY city ORDER BY SUM(total_amount) DESC)
Rank_of_Restaurant
FROM Orders o
LEFT JOIN Restaurants r
ON r.restaurant_id = o.restaurant_id
WHERE
YEAR(order_date) < 2024
GROUP BY city, r.restaurant_name)
SELECT * FROM Rankin_Table
WHERE Rank_of_Restaurant <= 3
Q8 Customer Churn
Find Customers who haven't placed an order in 2024 but did in 2023
SELECT DISTINCT c.* FROM Orders o
LEFT JOIN Customers c
ON o.customer_id = c.customer_id
WHERE
YEAR(o.order_date) = 2023
AND
c.customer_id NOT IN
(SELECT DISTINCT customer_id FROM Orders
WHERE YEAR(order_date) = 2024)
ORDER BY c.customer_id
WITH CANCELLED_RATE
AS
(
SELECT
o.restaurant_id,
COUNT(o.order_id) AS Total_Orders,
COUNT(CASE WHEN d.delivery_id IS NULL THEN 1 END) AS Nr_of_Cancelled_Orders
FROM Orders o
LEFT JOIN Deliveries d
ON d.order_id = o.order_id
WHERE YEAR(o.order_date) = 2024
GROUP BY o.restaurant_id
)
SELECT
restaurant_id,
Total_Orders,
Nr_of_Cancelled_Orders,
CAST(CAST(Nr_of_Cancelled_Orders AS decimal(10,2))
/ CAST(Total_Orders AS decimal(10,2)) * 100 AS decimal(10,2)) AS
Cancel_rate
FROM CANCELLED_RATE
ORDER BY Total_Orders DESC
WITH CANCELLED_RATE_2023
AS
(
SELECT
o.restaurant_id,
COUNT(o.order_id) AS Total_Orders,
COUNT(CASE WHEN d.delivery_id IS NULL THEN 1 END) AS
Nr_of_Cancelled_Orders
FROM Orders o
LEFT JOIN Deliveries d
ON d.order_id = o.order_id
WHERE YEAR(o.order_date) = 2023
GROUP BY o.restaurant_id
),
CANCELLED_RATE_2024
AS
(
SELECT
o.restaurant_id,
COUNT(o.order_id) AS Total_Orders
COUNT(CASE WHEN d.delivery_id IS NULL THEN 1 END) AS
Nr_of_Cancelled_Orders
FROM Orders o
LEFT JOIN Deliveries d
ON d.order_id = o.order_id
WHERE YEAR(o.order_date) = 2024
GROUP BY o.restaurant_id
),
Last_Year_Data
AS
(
SELECT
restaurant_id,
Total_Orders,
Nr_of_Cancelled_Orders,
CAST(CAST(Nr_of_Cancelled_Orders AS decimal(10,2))
/ CAST(Total_Orders AS decimal(10,2)) * 100 AS decimal(10,2)) AS
Cancel_Percent
FROM CANCELLED_RATE_2023
),
Current_year_Data
AS
(
SELECT
restaurant_id,
Total_Orders,
Nr_of_Cancelled_Orders,
CAST(CAST(Nr_of_Cancelled_Orders AS decimal(10,2))
/ CAST(Total_Orders AS decimal(10,2)) * 100 AS decimal(10,2)) AS
Cancel_Percent
FROM CANCELLED_RATE_2024
)
SELECT
cy.restaurant_id,
ly.Cancel_Percent Cancellation_Percent_of_2023 ,
cy.Cancel_Percent Cancel_Percent_of_2024
FROM Current_year_Data cy
JOIN Last_Year_Data ly
ON cy.restaurant_id = ly.restaurant_id
ORDER BY cy.restaurant_id
Q10 Rider Average Delivery Time
Determine each rider's average delivery time
WITH Riders_Avg_Delivery_Time
AS
(
SELECT
r.rider_id,
r.rider_name,
o.order_time,
d.delivery_time,
CAST(CASE
WHEN d.delivery_time < o.order_time THEN (1440 –
ABS(DATEDIFF(MINUTE,order_time,d.delivery_time)))
ELSE ABS(DATEDIFF(MINUTE,order_time,d.delivery_time))
END as decimal(10,2)) Time_Taken_to_deliver
FROM Orders o
LEFT JOIN Deliveries d ON o.order_id = d.order_id
LEFT JOIN Riders r ON d.rider_id = r.rider_id
WHERE d.delivery_status = 'Delivered'
)
SELECT
rider_id,
rider_name,
CAST(ROUND(AVG(Time_Taken_to_deliver),2) AS decimal(10,2)) AS
Avg_Time_By_Riders_in_MINs
FROM Riders_Avg_Delivery_Time
GROUP BY rider_id, rider_name
ORDER BY rider_id
WITH Growth_Rate_of_Delived_Orders
AS
(
SELECT
o.restaurant_id,
YEAR(o.order_date) Order_year,
MONTH(o.order_date) Order_Month,
FORMAT(o.order_date, 'MMM yyyy') AS Month_year,
CAST(COUNT(d.delivery_id) AS decimal(10,2)) AS
Current_Month_Orders_Delivered,
CAST(LAG(COUNT(d.delivery_id)) OVER(PARTITION BY o.restaurant_id
ORDER BY
YEAR(o.order_date),MONTH(o.order_date)) AS decimal(10,2))
Prev_Month_Orders_delivered
FROM Orders o
LEFT JOIN Deliveries d
ON o.order_id = d.order_id
WHERE d.delivery_status = 'Delivered'
GROUP BY
o.restaurant_id,
YEAR(o.order_date),
MONTH(o.order_date),
FORMAT(o.order_date, 'MMM yyyy')
)
SELECT
restaurant_id,
Month_year,
Current_Month_Orders_Delivered,
Prev_Month_Orders_delivered,
ROUND(CAST((Current_Month_Orders_Delivered -
Prev_Month_Orders_delivered)/Prev_Month_Orders_delivered * 100 AS
decimal(10,2)),2) as Grow_Rate_in_Orders_Delivered
FROM Growth_Rate_of_Delived_Orders
ORDER BY
restaurant_id,
Order_year,
Order_Month
SELECT
Customer_Category,
SUM(Total_Spend) Total_Revenue,
SUM(Nr_of_Orders) Total_Orders
FROM
(
SELECT
c.customer_name,
COUNT(o.order_id) Nr_of_Orders,
SUM(o.total_amount) Total_Spend,
CASE
WHEN SUM(o.total_amount)> (SELECT AVG(total_amount) from Orders) THEN
'Gold'
ELSE 'Silver'
END as Customer_Category
FROM Orders o
JOIN Customers c
ON c.customer_id = o.customer_id
GROUP BY
c.customer_id,
c.customer_name
) as t2
GROUP BY Customer_Category
WITH Riders_Monthly_Earning
AS
(SELECT
rd.rider_id ,
rd.rider_name,
YEAR(o.order_date) Order_year,
Month(o.order_date) Order_Month,
FORMAT(o.order_date,'MMMM yyyy') Month_year,
CAST(SUM(total_amount) * 0.08 AS decimal(10,2)) Total_Earning_of_Rider
FROM Orders o
LEFT JOIN Deliveries d
ON d.order_id = o.order_id
LEFT JOIN Riders rd
ON rd.rider_id = d.rider_id
WHERE d.delivery_status = 'Delivered'
GROUP BY
rd.rider_id,
rd.rider_name,
YEAR(o.order_date),
Month(o.order_date),
FORMAT(o.order_date,'MMMM yyyy')
)
SELECT rider_id, rider_name, Month_year, Total_Earning_of_Rider FROM
Riders_Monthly_Earning
WITH Main_cte
AS
(
SELECT
d.rider_id as Rider_id,
o.order_time Order_time,
d.delivery_time,
CASE
WHEN d.delivery_time < o.order_time THEN (1440 -
ABS(DATEDIFF(MINUTE,order_time,d.delivery_time)))
ELSE ABS(DATEDIFF(MINUTE,order_time,d.delivery_time))
END Time_taken_to_Deliver
FROM Orders o
JOIN Deliveries d
ON o.order_id = d.order_id
WHERE d.delivery_status = 'Delivered'
),
Final
AS
(
SELECT Rider_id,
CASE
WHEN Time_taken_to_Deliver <=15 THEN '5 star'
WHEN Time_taken_to_Deliver > 15 AND Time_taken_to_Deliver <= 20 THEN
'4 Star'
ELSE '3 Star'
END STARS,
Time_taken_to_Deliver
FROM Main_cte
)
SELECT
Rider_id,
STARS,
COUNT(STARS) Total_Stars
FROM Final
GROUP BY
Rider_id,
STARS
ORDER BY
Rider_id,
COUNT(STARS) DESC
WITH Peak_day_for_Restaurant
AS
(
SELECT
o.restaurant_id,
r.restaurant_name as Restaurant,
DATEPART(WEEKDAY,o.order_date) Week_number,
DATENAME(WEEKDAY, o.order_date) Weekday_name,
COUNT(o.order_id) Nr_of_Orders,
DENSE_RANK() OVER(PARTITION BY o.restaurant_id
ORDER BY COUNT(o.order_id) DESC ) as Rank_of_Week_Day
FROM Orders o
LEFT JOIN Restaurants r
ON r.restaurant_id = o.restaurant_id
GROUP BY o.restaurant_id,
r.restaurant_name,
DATENAME(WEEKDAY, o.order_date),
DATEPART(WEEKDAY,o.order_date)
)
SELECT Restaurant, Weekday_name, Nr_of_Orders, Rank_of_Week_Day FROM
Peak_day_for_Restaurant
WHERE Rank_of_Week_Day = 1
ORDER BY Nr_of_Orders DESC
Year, Month_Number
Q 18 Rider Effeciency
Evaluate rider Effeciency by determining Average Delivery times and Identifying
those with lowest And highest Average Delivery time
SELECT -- By this You will get Minimum and Maximum Average time taken to
deliver
MIN(Avg_Time_Taken_to_Deliver) Min_Avg_Time_taken_to_deliver,
MAX(Avg_Time_Taken_to_Deliver) Max_Avg_Time_taken_to_deliver
FROM
( -- By this Subquery you will get Average Time taken by Riders
SELECT Rider_Id, Rider_Name, CAST(ROUND(Avg_Time_taken_to_Deliver,2) AS
decimal(10,2)) AS Avg_Time_Taken_to_Deliver FROM
(
SELECT
r.rider_id as Rider_Id,
rider_name as Rider_Name,
AVG(CAST(CASE
WHEN d.delivery_time < o.order_time THEN (1440 -
ABS(DATEDIFF(MINUTE,order_time,d.delivery_time)))
ELSE ABS(DATEDIFF(MINUTE,order_time,d.delivery_time))
END as decimal(10,2))) Avg_Time_taken_to_Deliver
FROM Orders o
LEFT JOIN Deliveries d
ON o.order_id = d.order_id
LEFT JOIN Riders r
ON r.rider_id = d.rider_id
WHERE
d.delivery_status = 'Delivered'
GROUP BY r.rider_id,rider_name
) as t1
-- ORDER BY Avg_Time_taken_to_Deliver DESC
)t2
Q19 Order Item Popularity :
Track the Popularity of specific order items over time and identify seasonal
demand spike