def find_products(products: pd.DataFrame) -> pd.DataFrame:
# return products[(products["low_fats"] == "Y") & (products["recyclable"] =="Y")].loc[:,["product_id"]]
return products[(products["low_fats"] == "Y") & (products["recyclable"] =="Y")][["product_id"]]
select product_id from Products where low_fats = "Y" and recyclable = "Y"
Note: can use df.iloc[:, [“column_name”]] or df[[“column_name]] df[“column_name] returns series.
def find_customer_referee(customer: pd.DataFrame) -> pd.DataFrame:
return customer[(customer["referee_id"] != 2) | (customer["referee_id"].isnull())][["name"]]
or
def find_customer_referee(customer: pd.DataFrame) -> pd.DataFrame:
customer.fillna(0, inplace=True)
return customer.loc[customer["referee_id"] !=2 , ["name"]]
Note: can insert the logic in the loc
select name from Customer where referee_id != 2 or referee_id is Null
def big_countries(world: pd.DataFrame) -> pd.DataFrame:
return world[(world["area"] >= 3000000) | (world["population"] > 25000000)][["name", "population", "area"]]
select name, population, area from world where population >= 25000000 or area >= 3000000
def article_views(views: pd.DataFrame) -> pd.DataFrame:
return views[views["viewer_id"] == views["author_id"]][["author_id"]].drop_duplicates()
.sort_values("author_id").rename(columns={"author_id": "id"})
select distinct(author_id) as id from Views where author_id = viewer_id order by id
select tweet_id from Tweets where length(content) >15
def invalid_tweets(tweets: pd.DataFrame) -> pd.DataFrame:22
return tweets.query(f"content.str.len() >15")[["tweet_id"]]
or
return tweets.loc[tweets.content.str.len() >15, ["tweet_id"]]
def replace_employee_id(employees: pd.DataFrame, employee_uni: pd.DataFrame) -> pd.DataFrame:
return pd.merge(employees, employee_uni, how="left", on=["id", "id"])[["unique_id", "name"]]
select unique_id, name from Employees left join EmployeeUNI on Employees.id = EmployeeUNI.id
SELECT product_name, year, price from Sales left join Product on Sales.product_id = Product.product_id
-
customer-who-visited-but-did-not-make-any-transactions
SELECT customer_id, COUNT(*) AS count_no_trans
FROM Visits LEFT JOIN Transactions ON Visits.visit_id = Transactions.visit_id WHERE transaction_id IS NULL GROUP BY customer_id; ``` 9. Rising Temperature
import pandas as pd
def rising_temperature(weather: pd.DataFrame) -> pd.DataFrame:
if weather.size:
weather.sort_values(by=['recordDate'], inplace=True)
weather.reset_index(inplace=True)
weather.recordDate= pd.to_datetime(weather.recordDate, format="-%Y-%m-%d")
new_dates = pd.date_range(start=weather['recordDate'][0],
end=weather['recordDate'][len(weather)-1],freq='D')
weather = weather.set_index("recordDate").reindex(new_dates).reset_index()
weather.rename(columns={"index": "recordDate"}, inplace=True)
weather["change"] = weather.temperature - weather.temperature.shift(1, fill_value=weather.temperature[0])
return weather[weather["change"] > 0.0][["id"]].rename(columns={"id": "Id"})
else:
return weather[["id"]].rename(columns={"id":"ID"})
WITH PreviousWeatherData AS
(
SELECT
id,
recordDate,
temperature,
LAG(temperature, 1) OVER (ORDER BY recordDate) AS PreviousTemperature,
LAG(recordDate, 1) OVER (ORDER BY recordDate) AS PreviousRecordDate
FROM
Weather
)
SELECT
id
FROM
PreviousWeatherData
WHERE
temperature > PreviousTemperature
AND
recordDate = DATE_ADD(PreviousRecordDate, INTERVAL 1 DAY);
This might be slower, as seen in leetcode.
SELECT
w1.id
FROM
Weather w1
WHERE
w1.temperature > (
SELECT
w2.temperature
FROM
Weather w2
WHERE
w2.recordDate = DATE_SUB(w1.recordDate, INTERVAL 1 DAY)
);
SELECT
w1.id
FROM
Weather w1
JOIN
Weather w2
ON
DATEDIFF(w1.recordDate, w2.recordDate) = 1
WHERE
w1.temperature > w2.temperature;
Bonus:
select EmpLastName, EmpFirstName, ProjDesc, BldName, DeptName
from Employee, Building, Department, Project
where DeptBuilding = BldID
and empDeptID = DeptID
and EmpID = ProjManager
order by EmpLastName, DeptName, BldName
Multiple Tables could be used to join without using the word. Make sure to add where clause otherwise, data are added with each new row.
from Department
inner join employee
inner join Building
on empDeptID = DeptID
and DeptBuilding = BldId
order by DeptName, EmpSalary desc
select a1.machine_id, round(avg(a2.timestamp- a1.timestamp), 3) as processing_time
from Activity a1
join Activity a2
on a1.machine_id = a2.machine_id and a1.process_id = a2.process_id and a1.timestamp < a2.timestamp
group by a1.machine_id
select name, bonus
from Employee
left join Bonus
on Employee.empId = Bonus.empId
where Bonus.bonus < 1000 or Bonus.bonus is null
select S.student_id, S.student_name, SU.subject_name, COUNT(E.student_id) as attended_exams
from Students S
CROSS JOIN Subjects SU
left join Examinations E
on S.student_id = E.student_id
AND SU.subject_name = E.subject_name
Group By S.student_id, SU.subject_name
ORDER BY s.student_id
select b.name
from Employee a
Join Employee b on a.ManagerId = b.id
group by a.ManagerId
having count(*) >= 5
select s.user_id, round(avg(if(c.action ="confirmed", 1, 0)), 2 ) as confirmation_rate
from SignUps as s
left join Confirmations as c
on s.user_id = c.user_id
group by user_id
select *
from Cinema
where description != "boring" and id%2 !=0
order by rating desc
select P.product_id, ifnull(round(sum(US.units * P.price)/ sum(US.units), 2), 0) as average_price
from Prices as P
left join UnitsSold as US
on US.product_id = P.product_id and US.purchase_date >= P.start_date
and US.purchase_date <= P.end_date
group by P.product_id
select project_id, ROUND(AVG(experience_years),2) as average_years
from Project P
left join Employee E
on P.employee_id = E.employee_id
group by P.project_id
select contest_id, Round(Count(user_id)* 100/ (select count(*) from Users), 2) as percentage
from Register
group by contest_id
order by percentage desc, contest_id asc
select query_name, ROUND(AVG(Q.rating/ Q.position), 2) AS quality,
Round(Sum(Q.rating < 3) *100 / Count(Q.rating), 2) as poor_query_percentage
from Queries Q
group by query_name
SELECT DATE_FORMAT(trans_date, "%Y-%m") as month,
country,
Count(*) as trans_count,
SUM(state = 'approved') as approved_count,
SUM(amount) as trans_total_amount,
SUM(Case when state = 'approved' then amount else 0 end) as approved_total_amount
FROM Transactions
GROUP BY month, country