Top 50 SQL for Mastery

Recyclable and Low Fat Products

def find_products(products: pd.DataFrame) -> pd.DataFrame:
    # return products[(products["low_fats"] == "Y") & (products["recyclable"] =="Y")].loc[:,["product_id"]]
    return products[(products["low_fats"] == "Y") & (products["recyclable"] =="Y")][["product_id"]]

select product_id from Products where low_fats = "Y" and recyclable = "Y"

Note: can use df.iloc[:, [“column_name”]] or df[[“column_name]] df[“column_name] returns series.

Find Customer Refree

def find_customer_referee(customer: pd.DataFrame) -> pd.DataFrame:
    return customer[(customer["referee_id"] != 2) | (customer["referee_id"].isnull())][["name"]]

def find_customer_referee(customer: pd.DataFrame) -> pd.DataFrame:
    customer.fillna(0, inplace=True)
    return customer.loc[customer["referee_id"] !=2 , ["name"]]

Note: can insert the logic in the loc

select name from Customer where referee_id != 2 or referee_id is Null

Big countries

def big_countries(world: pd.DataFrame) -> pd.DataFrame:
    return world[(world["area"] >= 3000000) | (world["population"] > 25000000)][["name", "population", "area"]]

select name, population, area from world where population >= 25000000 or area >= 3000000

Article View

def article_views(views: pd.DataFrame) -> pd.DataFrame:
    return views[views["viewer_id"] == views["author_id"]][["author_id"]].drop_duplicates()
                .sort_values("author_id").rename(columns={"author_id": "id"})

select distinct(author_id) as id from Views where author_id = viewer_id order by id

invalid tweets

select tweet_id from Tweets where length(content) >15

def invalid_tweets(tweets: pd.DataFrame) -> pd.DataFrame:22
    return tweets.query(f"content.str.len() >15")[["tweet_id"]]

    return tweets.loc[tweets.content.str.len() >15, ["tweet_id"]]

Replace Employee id

def replace_employee_id(employees: pd.DataFrame, employee_uni: pd.DataFrame) -> pd.DataFrame:
    return pd.merge(employees, employee_uni, how="left", on=["id", "id"])[["unique_id", "name"]]

select unique_id, name from Employees left join EmployeeUNI on Employees.id = EmployeeUNI.id

Product Sales Analysis

SELECT product_name, year, price from Sales left join Product on Sales.product_id = Product.product_id

customer-who-visited-but-did-not-make-any-transactions

SELECT 
 customer_id,
 COUNT(*) AS count_no_trans

FROM Visits LEFT JOIN Transactions ON Visits.visit_id = Transactions.visit_id WHERE transaction_id IS NULL GROUP BY customer_id; ``` 9. Rising Temperature

import pandas as pd

def rising_temperature(weather: pd.DataFrame) -> pd.DataFrame:
    if weather.size:
        weather.sort_values(by=['recordDate'], inplace=True)
        weather.reset_index(inplace=True)
        weather.recordDate= pd.to_datetime(weather.recordDate, format="-%Y-%m-%d")
        new_dates = pd.date_range(start=weather['recordDate'][0],
                                  end=weather['recordDate'][len(weather)-1],freq='D')

        weather = weather.set_index("recordDate").reindex(new_dates).reset_index()
        weather.rename(columns={"index": "recordDate"}, inplace=True)
        weather["change"] = weather.temperature - weather.temperature.shift(1, fill_value=weather.temperature[0])
        return weather[weather["change"] > 0.0][["id"]].rename(columns={"id": "Id"})
    else:
        return weather[["id"]].rename(columns={"id":"ID"})

WITH PreviousWeatherData AS
(
    SELECT 
        id,
        recordDate,
        temperature, 
        LAG(temperature, 1) OVER (ORDER BY recordDate) AS PreviousTemperature,
        LAG(recordDate, 1) OVER (ORDER BY recordDate) AS PreviousRecordDate
    FROM 
        Weather
)

SELECT 
    id 
FROM 
    PreviousWeatherData
WHERE 
    temperature > PreviousTemperature
AND 
    recordDate = DATE_ADD(PreviousRecordDate, INTERVAL 1 DAY);

This might be slower, as seen in leetcode.

SELECT 
    w1.id
FROM 
    Weather w1
WHERE 
    w1.temperature > (
        SELECT 
            w2.temperature
        FROM 
            Weather w2
        WHERE 
            w2.recordDate = DATE_SUB(w1.recordDate, INTERVAL 1 DAY)
    );

SELECT 
    w1.id
FROM 
    Weather w1
JOIN 
    Weather w2
ON 
    DATEDIFF(w1.recordDate, w2.recordDate) = 1
WHERE 
    w1.temperature > w2.temperature;

Bonus:

select EmpLastName, EmpFirstName, ProjDesc, BldName, DeptName
from Employee, Building, Department, Project
where DeptBuilding = BldID
    and empDeptID = DeptID
    and EmpID = ProjManager
order by EmpLastName, DeptName, BldName

Multiple Tables could be used to join without using the word. Make sure to add where clause otherwise, data are added with each new row.

from Department
inner join employee
inner join Building
on empDeptID = DeptID
and DeptBuilding = BldId
order by DeptName, EmpSalary desc

Average Time of Process per machine

select a1.machine_id, round(avg(a2.timestamp- a1.timestamp), 3) as processing_time
from Activity a1 
join Activity a2
on a1.machine_id = a2.machine_id and a1.process_id = a2.process_id and a1.timestamp < a2.timestamp 
group by a1.machine_id

Employee Bonus

select name, bonus
from Employee
left join Bonus
on Employee.empId = Bonus.empId
where Bonus.bonus < 1000 or Bonus.bonus is null

Students and Examinations

select S.student_id, S.student_name, SU.subject_name, COUNT(E.student_id) as attended_exams
from Students S
CROSS JOIN Subjects SU
left join Examinations E
on S.student_id = E.student_id 
AND SU.subject_name = E.subject_name
Group By S.student_id, SU.subject_name
ORDER BY s.student_id

Managers with at least 5 direct reports

select b.name
from Employee a
Join Employee b on a.ManagerId = b.id
group by a.ManagerId
having count(*) >= 5

Confirmation rate

select s.user_id, round(avg(if(c.action ="confirmed", 1, 0)), 2 ) as confirmation_rate
from SignUps as s
left join Confirmations as c
on s.user_id = c.user_id
group by user_id

Not Boring Movie

select * 
from Cinema
where description != "boring" and id%2 !=0
order by rating desc

Average Selling Price

select P.product_id, ifnull(round(sum(US.units * P.price)/ sum(US.units), 2), 0) as average_price
from Prices as P
left join UnitsSold as US
on US.product_id = P.product_id and US.purchase_date >= P.start_date
    and US.purchase_date <= P.end_date
group by P.product_id

Project employees

select project_id, ROUND(AVG(experience_years),2) as average_years
from Project P
left join Employee E
on P.employee_id = E.employee_id
group by P.project_id

Percentage of Users attended a contest

select contest_id, Round(Count(user_id)* 100/ (select count(*) from Users), 2) as percentage
from Register
group by contest_id
order by percentage desc, contest_id asc

Queries quality and Percentage

select query_name, ROUND(AVG(Q.rating/ Q.position), 2) AS quality,
    Round(Sum(Q.rating < 3) *100 / Count(Q.rating), 2) as poor_query_percentage
from Queries Q
group by query_name

Monthly Transactions I

SELECT DATE_FORMAT(trans_date, "%Y-%m") as month, 
    country, 
    Count(*) as trans_count, 
    SUM(state = 'approved') as approved_count,
    SUM(amount) as trans_total_amount,
    SUM(Case when state = 'approved' then amount else 0 end) as approved_total_amount
FROM Transactions
GROUP BY month, country

Immediate Food Delivery

Game Play Analysis