Recyclable and Low Fat Products 1
2
3
def find_products ( products : pd . DataFrame ) -> pd . DataFrame :
# return products[(products["low_fats"] == "Y") & (products["recyclable"] =="Y")].loc[:,["product_id"]]
return products [( products [ "low_fats" ] == "Y" ) & ( products [ "recyclable" ] == "Y" )][[ "product_id" ]]
copy
1
select product_id from Products where low_fats = "Y" and recyclable = "Y"
copy
Note: can use df.iloc[:, [“column_name”]] or df[[“column_name]]
df[“column_name] returns series.
Find Customer Refree 1
2
def find_customer_referee ( customer : pd . DataFrame ) -> pd . DataFrame :
return customer [( customer [ "referee_id" ] != 2 ) | ( customer [ "referee_id" ] . isnull ())][[ "name" ]]
copy
or
1
2
3
def find_customer_referee ( customer : pd . DataFrame ) -> pd . DataFrame :
customer . fillna ( 0 , inplace = True )
return customer . loc [ customer [ "referee_id" ] != 2 , [ "name" ]]
copy
Note: can insert the logic in the loc
1
select name from Customer where referee_id != 2 or referee_id is Null
copy
Big countries 1
2
def big_countries ( world : pd . DataFrame ) -> pd . DataFrame :
return world [( world [ "area" ] >= 3000000 ) | ( world [ "population" ] > 25000000 )][[ "name" , "population" , "area" ]]
copy
1
select name , population , area from world where population >= 25000000 or area >= 3000000
copy
Article View 1
2
3
def article_views ( views : pd . DataFrame ) -> pd . DataFrame :
return views [ views [ "viewer_id" ] == views [ "author_id" ]][[ "author_id" ]] . drop_duplicates ()
. sort_values ( "author_id" ) . rename ( columns = { "author_id" : "id" })
copy
1
select distinct ( author_id ) as id from Views where author_id = viewer_id order by id
copy
invalid tweets 1
select tweet_id from Tweets where length ( content ) > 15
copy
1
2
def invalid_tweets ( tweets : pd . DataFrame ) -> pd . DataFrame : 22
return tweets . query ( f "content.str.len() >15" )[[ "tweet_id" ]]
copy
or
1
return tweets . loc [ tweets . content . str . len () > 15 , [ "tweet_id" ]]
copy
Replace Employee id 1
2
def replace_employee_id ( employees : pd . DataFrame , employee_uni : pd . DataFrame ) -> pd . DataFrame :
return pd . merge ( employees , employee_uni , how = "left" , on = [ "id" , "id" ])[[ "unique_id" , "name" ]]
copy
1
select unique_id , name from Employees left join EmployeeUNI on Employees . id = EmployeeUNI . id
copy
Product Sales Analysis 1
SELECT product_name , year , price from Sales left join Product on Sales . product_id = Product . product_id
copy
customer-who-visited-but-did-not-make-any-transactions 1
2
3
SELECT
customer_id ,
COUNT ( * ) AS count_no_trans
copy
FROM
Visits
LEFT JOIN
Transactions
ON
Visits.visit_id = Transactions.visit_id
WHERE
transaction_id IS NULL
GROUP BY
customer_id;
```
9. Rising Temperature
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
import pandas as pd
def rising_temperature ( weather : pd . DataFrame ) -> pd . DataFrame :
if weather . size :
weather . sort_values ( by = [ 'recordDate' ], inplace = True )
weather . reset_index ( inplace = True )
weather . recordDate = pd . to_datetime ( weather . recordDate , format = "-%Y-%m- %d " )
new_dates = pd . date_range ( start = weather [ 'recordDate' ][ 0 ],
end = weather [ 'recordDate' ][ len ( weather ) - 1 ], freq = 'D' )
weather = weather . set_index ( "recordDate" ) . reindex ( new_dates ) . reset_index ()
weather . rename ( columns = { "index" : "recordDate" }, inplace = True )
weather [ "change" ] = weather . temperature - weather . temperature . shift ( 1 , fill_value = weather . temperature [ 0 ])
return weather [ weather [ "change" ] > 0.0 ][[ "id" ]] . rename ( columns = { "id" : "Id" })
else :
return weather [[ "id" ]] . rename ( columns = { "id" : "ID" })
copy
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
WITH PreviousWeatherData AS
(
SELECT
id ,
recordDate ,
temperature ,
LAG ( temperature , 1 ) OVER ( ORDER BY recordDate ) AS PreviousTemperature ,
LAG ( recordDate , 1 ) OVER ( ORDER BY recordDate ) AS PreviousRecordDate
FROM
Weather
)
SELECT
id
FROM
PreviousWeatherData
WHERE
temperature > PreviousTemperature
AND
recordDate = DATE_ADD ( PreviousRecordDate , INTERVAL 1 DAY );
copy
This might be slower, as seen in leetcode.
1
2
3
4
5
6
7
8
9
10
11
12
13
SELECT
w1 . id
FROM
Weather w1
WHERE
w1 . temperature > (
SELECT
w2 . temperature
FROM
Weather w2
WHERE
w2 . recordDate = DATE_SUB ( w1 . recordDate , INTERVAL 1 DAY )
);
copy
1
2
3
4
5
6
7
8
9
10
SELECT
w1 . id
FROM
Weather w1
JOIN
Weather w2
ON
DATEDIFF ( w1 . recordDate , w2 . recordDate ) = 1
WHERE
w1 . temperature > w2 . temperature ;
copy
Bonus:
1
2
3
4
5
6
select EmpLastName , EmpFirstName , ProjDesc , BldName , DeptName
from Employee , Building , Department , Project
where DeptBuilding = BldID
and empDeptID = DeptID
and EmpID = ProjManager
order by EmpLastName , DeptName , BldName
copy
Multiple Tables could be used to join without using the word.
Make sure to add where clause otherwise, data are added with each new row.
1
2
3
4
5
6
from Department
inner join employee
inner join Building
on empDeptID = DeptID
and DeptBuilding = BldId
order by DeptName, EmpSalary desc
copy
Average Time of Process per machine 1
2
3
4
5
select a1 . machine_id , round ( avg ( a2 . timestamp - a1 . timestamp ), 3 ) as processing_time
from Activity a1
join Activity a2
on a1 . machine_id = a2 . machine_id and a1 . process_id = a2 . process_id and a1 . timestamp < a2 . timestamp
group by a1 . machine_id
copy
Employee Bonus 1
2
3
4
5
select name , bonus
from Employee
left join Bonus
on Employee . empId = Bonus . empId
where Bonus . bonus < 1000 or Bonus . bonus is null
copy
Students and Examinations 1
2
3
4
5
6
7
8
select S . student_id , S . student_name , SU . subject_name , COUNT ( E . student_id ) as attended_exams
from Students S
CROSS JOIN Subjects SU
left join Examinations E
on S . student_id = E . student_id
AND SU . subject_name = E . subject_name
Group By S . student_id , SU . subject_name
ORDER BY s . student_id
copy
Managers with at least 5 direct reports 1
2
3
4
5
select b . name
from Employee a
Join Employee b on a . ManagerId = b . id
group by a . ManagerId
having count ( * ) >= 5
copy
Confirmation rate 1
2
3
4
5
select s . user_id , round ( avg ( if ( c . action = "confirmed" , 1 , 0 )), 2 ) as confirmation_rate
from SignUps as s
left join Confirmations as c
on s . user_id = c . user_id
group by user_id
copy
Not Boring Movie 1
2
3
4
select *
from Cinema
where description != "boring" and id % 2 != 0
order by rating desc
copy
Average Selling Price 1
2
3
4
5
6
select P . product_id , ifnull ( round ( sum ( US . units * P . price ) / sum ( US . units ), 2 ), 0 ) as average_price
from Prices as P
left join UnitsSold as US
on US . product_id = P . product_id and US . purchase_date >= P . start_date
and US . purchase_date <= P . end_date
group by P . product_id
copy
Project employees 1
2
3
4
5
select project_id , ROUND ( AVG ( experience_years ), 2 ) as average_years
from Project P
left join Employee E
on P . employee_id = E . employee_id
group by P . project_id
copy
Percentage of Users attended a contest 1
2
3
4
select contest_id , Round ( Count ( user_id ) * 100 / ( select count ( * ) from Users ), 2 ) as percentage
from Register
group by contest_id
order by percentage desc , contest_id asc
copy
Queries quality and Percentage 1
2
3
4
select query_name , ROUND ( AVG ( Q . rating / Q . position ), 2 ) AS quality ,
Round ( Sum ( Q . rating < 3 ) * 100 / Count ( Q . rating ), 2 ) as poor_query_percentage
from Queries Q
group by query_name
copy
Monthly Transactions I 1
2
3
4
5
6
7
8
SELECT DATE_FORMAT ( trans_date , "%Y-%m" ) as month ,
country ,
Count ( * ) as trans_count ,
SUM ( state = 'approved' ) as approved_count ,
SUM ( amount ) as trans_total_amount ,
SUM ( Case when state = 'approved' then amount else 0 end ) as approved_total_amount
FROM Transactions
GROUP BY month , country
copy
Immediate Food Delivery Game Play Analysis