1. Recyclable and Low Fat Products
1
2
3
def find_products(products: pd.DataFrame) -> pd.DataFrame:
    # return products[(products["low_fats"] == "Y") & (products["recyclable"] =="Y")].loc[:,["product_id"]]
    return products[(products["low_fats"] == "Y") & (products["recyclable"] =="Y")][["product_id"]]
1
select product_id from Products where low_fats = "Y" and recyclable = "Y"

Note: can use df.iloc[:, [“column_name”]] or df[[“column_name]] df[“column_name] returns series.

  1. Find Customer Refree
1
2
def find_customer_referee(customer: pd.DataFrame) -> pd.DataFrame:
    return customer[(customer["referee_id"] != 2) | (customer["referee_id"].isnull())][["name"]]

or

1
2
3
def find_customer_referee(customer: pd.DataFrame) -> pd.DataFrame:
    customer.fillna(0, inplace=True)
    return customer.loc[customer["referee_id"] !=2 , ["name"]]

Note: can insert the logic in the loc

1
select name from Customer where referee_id != 2 or referee_id is Null
  1. Big countries
1
2
def big_countries(world: pd.DataFrame) -> pd.DataFrame:
    return world[(world["area"] >= 3000000) | (world["population"] > 25000000)][["name", "population", "area"]]
1
select name, population, area from world where population >= 25000000 or area >= 3000000
  1. Article View
1
2
3
def article_views(views: pd.DataFrame) -> pd.DataFrame:
    return views[views["viewer_id"] == views["author_id"]][["author_id"]].drop_duplicates()
                .sort_values("author_id").rename(columns={"author_id": "id"})
1
select distinct(author_id) as id from Views where author_id = viewer_id order by id
  1. invalid tweets
1
select tweet_id from Tweets where length(content) >15 
1
2
def invalid_tweets(tweets: pd.DataFrame) -> pd.DataFrame:22
    return tweets.query(f"content.str.len() >15")[["tweet_id"]]

or

1
    return tweets.loc[tweets.content.str.len() >15, ["tweet_id"]]
  1. Replace Employee id
1
2
def replace_employee_id(employees: pd.DataFrame, employee_uni: pd.DataFrame) -> pd.DataFrame:
    return pd.merge(employees, employee_uni, how="left", on=["id", "id"])[["unique_id", "name"]]
1
select unique_id, name from Employees left join EmployeeUNI on Employees.id = EmployeeUNI.id
  1. Product Sales Analysis
1
SELECT product_name, year, price from Sales left join Product on Sales.product_id = Product.product_id
  1. customer-who-visited-but-did-not-make-any-transactions
    1
    2
    3
    
    SELECT 
     customer_id,
     COUNT(*) AS count_no_trans 
    

FROM Visits LEFT JOIN Transactions ON Visits.visit_id = Transactions.visit_id WHERE transaction_id IS NULL GROUP BY customer_id; ``` 9. Rising Temperature

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
import pandas as pd

def rising_temperature(weather: pd.DataFrame) -> pd.DataFrame:
    if weather.size:
        weather.sort_values(by=['recordDate'], inplace=True)
        weather.reset_index(inplace=True)
        weather.recordDate= pd.to_datetime(weather.recordDate, format="-%Y-%m-%d")
        new_dates = pd.date_range(start=weather['recordDate'][0],
                                  end=weather['recordDate'][len(weather)-1],freq='D')

        weather = weather.set_index("recordDate").reindex(new_dates).reset_index()
        weather.rename(columns={"index": "recordDate"}, inplace=True)
        weather["change"] = weather.temperature - weather.temperature.shift(1, fill_value=weather.temperature[0])
        return weather[weather["change"] > 0.0][["id"]].rename(columns={"id": "Id"})
    else:
        return weather[["id"]].rename(columns={"id":"ID"})
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
WITH PreviousWeatherData AS
(
    SELECT 
        id,
        recordDate,
        temperature, 
        LAG(temperature, 1) OVER (ORDER BY recordDate) AS PreviousTemperature,
        LAG(recordDate, 1) OVER (ORDER BY recordDate) AS PreviousRecordDate
    FROM 
        Weather
)

SELECT 
    id 
FROM 
    PreviousWeatherData
WHERE 
    temperature > PreviousTemperature
AND 
    recordDate = DATE_ADD(PreviousRecordDate, INTERVAL 1 DAY);

This might be slower, as seen in leetcode.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
SELECT 
    w1.id
FROM 
    Weather w1
WHERE 
    w1.temperature > (
        SELECT 
            w2.temperature
        FROM 
            Weather w2
        WHERE 
            w2.recordDate = DATE_SUB(w1.recordDate, INTERVAL 1 DAY)
    );
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
SELECT 
    w1.id
FROM 
    Weather w1
JOIN 
    Weather w2
ON 
    DATEDIFF(w1.recordDate, w2.recordDate) = 1
WHERE 
    w1.temperature > w2.temperature;

Bonus:

1
2
3
4
5
6
select EmpLastName, EmpFirstName, ProjDesc, BldName, DeptName
from Employee, Building, Department, Project
where DeptBuilding = BldID
    and empDeptID = DeptID
    and EmpID = ProjManager
order by EmpLastName, DeptName, BldName

Multiple Tables could be used to join without using the word. Make sure to add where clause otherwise, data are added with each new row.

1
2
3
4
5
6
from Department
inner join employee
inner join Building
on empDeptID = DeptID
and DeptBuilding = BldId
order by DeptName, EmpSalary desc
  1. Average Time of Process per machine
1
2
3
4
5
select a1.machine_id, round(avg(a2.timestamp- a1.timestamp), 3) as processing_time
from Activity a1 
join Activity a2
on a1.machine_id = a2.machine_id and a1.process_id = a2.process_id and a1.timestamp < a2.timestamp 
group by a1.machine_id
  1. Employee Bonus
1
2
3
4
5
select name, bonus
from Employee
left join Bonus
on Employee.empId = Bonus.empId
where Bonus.bonus < 1000 or Bonus.bonus is null
  1. Students and Examinations
1
2
3
4
5
6
7
8
select S.student_id, S.student_name, SU.subject_name, COUNT(E.student_id) as attended_exams
from Students S
CROSS JOIN Subjects SU
left join Examinations E
on S.student_id = E.student_id 
AND SU.subject_name = E.subject_name
Group By S.student_id, SU.subject_name
ORDER BY s.student_id
  1. Managers with at least 5 direct reports
1
2
3
4
5
select b.name
from Employee a
Join Employee b on a.ManagerId = b.id
group by a.ManagerId
having count(*) >= 5
  1. Confirmation rate
1
2
3
4
5
select s.user_id, round(avg(if(c.action ="confirmed", 1, 0)), 2 ) as confirmation_rate
from SignUps as s
left join Confirmations as c
on s.user_id = c.user_id
group by user_id
  1. Not Boring Movie
1
2
3
4
select * 
from Cinema
where description != "boring" and id%2 !=0
order by rating desc
  1. Average Selling Price
1
2
3
4
5
6
select P.product_id, ifnull(round(sum(US.units * P.price)/ sum(US.units), 2), 0) as average_price
from Prices as P
left join UnitsSold as US
on US.product_id = P.product_id and US.purchase_date >= P.start_date
    and US.purchase_date <= P.end_date
group by P.product_id
  1. Project employees
  1. Percentage of Users attended a contest
  1. Queries quality and Percentage
  1. Monthly Transactions I
  1. Immediate Food Delivery
  1. Game Play Analysis