数据科学家 算法工程师 面试准备 全套-github.com/LongxingTan/Machine-learning-interview
- https://dbeaver.io/download/
- primary key: unique & not null
- foreign key: refer to primary key in another table
>>> Basic
1050. 合作过至少三次的演员和导演
SELECT actor_id, director_id
FROM ActorDirector
GROUP BY actor_id, director_id
HAVING COUNT(*) >= 3;
1076. Project Employees II
SELECT TOP 1 WITH TIES project_id
FROM Project
GROUP BY project_id
ORDER BY COUNT(employee_id) DESC;
1082. Sales Analysis I
SELECT TOP 1 WITH TIES seller_id
FROM Sales
GROUP BY seller_id
ORDER BY SUM(price) DESC;
1141. 查询近30天活跃用户数
SELECT activity_date as day, COUNT(DISTINCT user_id) as active_users
FROM Activity
WHERE activity_date between '2019-06-28' and '2019-07-27'
GROUP BY activity_date;
1148. 文章浏览 I
SELECT DISTINCT author_id as id
FROM Views
WHERE author_id = viewer_id
ORDER BY id;
1149. Article Views II
SELECT DISTINCT viewer_id as id
FROM Views
GROUP BY viewer_id, view_date
HAVING COUNT(DISTINCT article_id) > 1
ORDER BY id;
182. 查找重复的电子邮箱
聚合函数(如 COUNT)通常需要与 GROUP BY 子句一起使用,并且过滤条件应该放在 HAVING 子句中。直接在 WHERE 子句中使用聚合函数会导致语法错误
SELECT email as email
FROM Person
GROUP BY email
HAVING COUNT(email) > 1;
511. 游戏玩法分析 I
处理聚合查询时,MIN 是一个更通用的解决方案,适用于所有 SQL 数据库。TOP 1 则更适合用于非聚合查询中选择排序后的第一行记录
SELECT player_id, MIN(event_date) as first_login
FROM Activity
GROUP BY player_id;
578. Get Highest Answer Rate Question
SELECT TOP 1 question_id as survey_log
FROM survey_log
GROUP BY question_id
ORDER BY COUNT(answer_id) * 1.0 / (COUNT(*) - COUNT(answer_id)) DESC;
584. 寻找用户推荐人
SELECT name
FROM Customer
WHERE referee_id != 2 OR referee_id IS NULL;
586. 订单最多的客户
SELECT customer_number
FROM orders
GROUP BY customer_number
ORDER BY COUNT(*) DESC
LIMIT 1;
595. 大的国家
SELECT name, population, area
FROM World
WHERE area >= 3000000 OR population >= 25000000;
596. 超过5名学生的课
SELECT class
FROM Courses
GROUP BY class
HAVING COUNT(*) >= 5;
619. 只出现一次的最大数字
多一层为了空表格时输出null
SELECT (
SELECT num
FROM MyNumbers
GROUP BY num
HAVING COUNT(*) = 1
ORDER BY num DESC
LIMIT 1
) as num;
620. 有趣的电影
SELECT *
FROM cinema
WHERE description != 'boring' AND id % 2 = 1
ORDER BY rating DESC;
>>> Case-when
610. 判断三角形
SELECT *,
(CASE WHEN x + y > z AND x + z > y AND y + z > x THEN 'Yes' ELSE 'No' END) AS triangle
FROM Triangle;
627. 变更性别
-- SELECT id, name,
-- (CASE WHEN sex = 'f' THEN 'm' ELSE 'f' END) AS sex, salary
-- FROM salary;
UPDATE salary
SET
sex = CASE sex
WHEN 'm' THEN 'f'
ELSE 'm'
END;
1126. 查询活跃业务
WITH tb1 AS (
SELECT *, AVG(occurances * 1.0) OVER (PARTITION BY event_type) AS avg_oc
FROM Events
)
SELECT business_id
FROM tb1
GROUP BY business_id
HAVING SUM(CASE WHEN occurances > avg_oc THEN 1 ELSE 0 END) > 1;
1142_User_Activity_for_the_Past_30_Days_II
1158. 市场分析 I
SELECT u.user_id AS buyer_id, u.join_date AS join_date,
SUM(CASE WHEN YEAR(o.order_date) = 2019 THEN 1 ELSE 0 END) AS orders_in_2019
FROM Users u
LEFT JOIN Orders o
ON u.user_id = o.buyer_id
GROUP BY u.user_id, u.join_date;
1159_Market_Analysis_II
1173_Immediate_Food_Delivery_I
1174. 即时食物配送 II
>>> JOIN
简单回顾下pandas中的merge
- 默认的how=‘inner’, left_on,right_on的行为并不符合我之前的预期
- https://pandas.pydata.org/docs/reference/api/pandas.merge.html
175. 组合两个表
SELECT l.firstName, l.lastName, r.city, r.state
FROM Person l
LEFT JOIN Address r
ON l.personId = r.personID;
181. 超过经理收入的员工
self join需要给自身两个不同的alias
SELECT e1.name AS Employee
FROM Employee e1
LEFT JOIN Employee e2
ON e1.managerId = e2.id
WHERE e1.salary > e2.salary;
183. 从不订购的客户
SELECT c.name AS Customers
FROM Customers c
LEFT JOIN ORDERS o
ON c.id = o.customerId
WHERE o. id IS NULL;
577. 员工奖金
SELECT e.name, b.bonus
FROM Employee e
LEFT JOIN Bonus b
ON e.empId = b.empId
WHERE b.bonus < 1000 OR b.bonus IS NULL;
613. 直线上的最近距离
>>> WINDOW FUNCTION
603. 连续空余座位