- Recyclable and Low Fat Products
1
2
3
| def find_products(products: pd.DataFrame) -> pd.DataFrame:
# return products[(products["low_fats"] == "Y") & (products["recyclable"] =="Y")].loc[:,["product_id"]]
return products[(products["low_fats"] == "Y") & (products["recyclable"] =="Y")][["product_id"]]
|
1
| select product_id from Products where low_fats = "Y" and recyclable = "Y"
|
Note: can use df.iloc[:, [“column_name”]] or df[[“column_name]]
df[“column_name] returns series.
- Find Customer Refree
1
2
| def find_customer_referee(customer: pd.DataFrame) -> pd.DataFrame:
return customer[(customer["referee_id"] != 2) | (customer["referee_id"].isnull())][["name"]]
|
or
1
2
3
| def find_customer_referee(customer: pd.DataFrame) -> pd.DataFrame:
customer.fillna(0, inplace=True)
return customer.loc[customer["referee_id"] !=2 , ["name"]]
|
Note: can insert the logic in the loc
1
| select name from Customer where referee_id != 2 or referee_id is Null
|
- Big countries
1
2
| def big_countries(world: pd.DataFrame) -> pd.DataFrame:
return world[(world["area"] >= 3000000) | (world["population"] > 25000000)][["name", "population", "area"]]
|
1
| select name, population, area from world where population >= 25000000 or area >= 3000000
|
- Article View
1
2
3
| def article_views(views: pd.DataFrame) -> pd.DataFrame:
return views[views["viewer_id"] == views["author_id"]][["author_id"]].drop_duplicates()
.sort_values("author_id").rename(columns={"author_id": "id"})
|
1
| select distinct(author_id) as id from Views where author_id = viewer_id order by id
|
- invalid tweets
1
| select tweet_id from Tweets where length(content) >15
|
1
2
| def invalid_tweets(tweets: pd.DataFrame) -> pd.DataFrame:22
return tweets.query(f"content.str.len() >15")[["tweet_id"]]
|
or
1
| return tweets.loc[tweets.content.str.len() >15, ["tweet_id"]]
|
- Replace Employee id
1
2
| def replace_employee_id(employees: pd.DataFrame, employee_uni: pd.DataFrame) -> pd.DataFrame:
return pd.merge(employees, employee_uni, how="left", on=["id", "id"])[["unique_id", "name"]]
|
1
| select unique_id, name from Employees left join EmployeeUNI on Employees.id = EmployeeUNI.id
|
- Product Sales Analysis
1
| SELECT product_name, year, price from Sales left join Product on Sales.product_id = Product.product_id
|
- customer-who-visited-but-did-not-make-any-transactions
1
2
3
| SELECT
customer_id,
COUNT(*) AS count_no_trans
|
FROM
Visits
LEFT JOIN
Transactions
ON
Visits.visit_id = Transactions.visit_id
WHERE
transaction_id IS NULL
GROUP BY
customer_id;
```
9. Rising Temperature
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
| import pandas as pd
def rising_temperature(weather: pd.DataFrame) -> pd.DataFrame:
if weather.size:
weather.sort_values(by=['recordDate'], inplace=True)
weather.reset_index(inplace=True)
weather.recordDate= pd.to_datetime(weather.recordDate, format="-%Y-%m-%d")
new_dates = pd.date_range(start=weather['recordDate'][0],
end=weather['recordDate'][len(weather)-1],freq='D')
weather = weather.set_index("recordDate").reindex(new_dates).reset_index()
weather.rename(columns={"index": "recordDate"}, inplace=True)
weather["change"] = weather.temperature - weather.temperature.shift(1, fill_value=weather.temperature[0])
return weather[weather["change"] > 0.0][["id"]].rename(columns={"id": "Id"})
else:
return weather[["id"]].rename(columns={"id":"ID"})
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
| WITH PreviousWeatherData AS
(
SELECT
id,
recordDate,
temperature,
LAG(temperature, 1) OVER (ORDER BY recordDate) AS PreviousTemperature,
LAG(recordDate, 1) OVER (ORDER BY recordDate) AS PreviousRecordDate
FROM
Weather
)
SELECT
id
FROM
PreviousWeatherData
WHERE
temperature > PreviousTemperature
AND
recordDate = DATE_ADD(PreviousRecordDate, INTERVAL 1 DAY);
|
This might be slower, as seen in leetcode.
1
2
3
4
5
6
7
8
9
10
11
12
13
| SELECT
w1.id
FROM
Weather w1
WHERE
w1.temperature > (
SELECT
w2.temperature
FROM
Weather w2
WHERE
w2.recordDate = DATE_SUB(w1.recordDate, INTERVAL 1 DAY)
);
|
1
2
3
4
5
6
7
8
9
10
| SELECT
w1.id
FROM
Weather w1
JOIN
Weather w2
ON
DATEDIFF(w1.recordDate, w2.recordDate) = 1
WHERE
w1.temperature > w2.temperature;
|
Bonus:
1
2
3
4
5
6
| select EmpLastName, EmpFirstName, ProjDesc, BldName, DeptName
from Employee, Building, Department, Project
where DeptBuilding = BldID
and empDeptID = DeptID
and EmpID = ProjManager
order by EmpLastName, DeptName, BldName
|
Multiple Tables could be used to join without using the word.
Make sure to add where clause otherwise, data are added with each new row.
1
2
3
4
5
6
| from Department
inner join employee
inner join Building
on empDeptID = DeptID
and DeptBuilding = BldId
order by DeptName, EmpSalary desc
|
- Average Time of Process per machine
1
2
3
4
5
| select a1.machine_id, round(avg(a2.timestamp- a1.timestamp), 3) as processing_time
from Activity a1
join Activity a2
on a1.machine_id = a2.machine_id and a1.process_id = a2.process_id and a1.timestamp < a2.timestamp
group by a1.machine_id
|
- Employee Bonus
1
2
3
4
5
| select name, bonus
from Employee
left join Bonus
on Employee.empId = Bonus.empId
where Bonus.bonus < 1000 or Bonus.bonus is null
|
- Students and Examinations
1
2
3
4
5
6
7
8
| select S.student_id, S.student_name, SU.subject_name, COUNT(E.student_id) as attended_exams
from Students S
CROSS JOIN Subjects SU
left join Examinations E
on S.student_id = E.student_id
AND SU.subject_name = E.subject_name
Group By S.student_id, SU.subject_name
ORDER BY s.student_id
|
- Managers with at least 5 direct reports
1
2
3
4
5
| select b.name
from Employee a
Join Employee b on a.ManagerId = b.id
group by a.ManagerId
having count(*) >= 5
|
- Confirmation rate
1
2
3
4
5
| select s.user_id, round(avg(if(c.action ="confirmed", 1, 0)), 2 ) as confirmation_rate
from SignUps as s
left join Confirmations as c
on s.user_id = c.user_id
group by user_id
|
- Not Boring Movie
1
2
3
4
| select *
from Cinema
where description != "boring" and id%2 !=0
order by rating desc
|
- Average Selling Price
1
2
3
4
5
6
| select P.product_id, ifnull(round(sum(US.units * P.price)/ sum(US.units), 2), 0) as average_price
from Prices as P
left join UnitsSold as US
on US.product_id = P.product_id and US.purchase_date >= P.start_date
and US.purchase_date <= P.end_date
group by P.product_id
|
- Project employees
- Percentage of Users attended a contest
- Queries quality and Percentage
- Monthly Transactions I
- Immediate Food Delivery
- Game Play Analysis