Top 50 SQL for Mastery
Recyclable and Low Fat Products 1 2 3 def find_products(products: pd.DataFrame) -> pd.DataFrame: # return products[(products["low_fats"] == "Y") & (products["recyclable"] =="Y")].loc[:,["product_id"]] return products[(products["low_fats"] == "Y") & (products["recyclable"] =="Y")][["product_id"]] 1 select product_id from Products where low_fats = "Y" and recyclable = "Y" Note: can use df.iloc[:, [“column_name”]] or df[[“column_name]] df[“column_name] returns series. Find Customer Refree 1 2 def find_customer_referee(customer: pd.DataFrame) -> pd.DataFrame: return customer[(customer["referee_id"] != 2) | (customer["referee_id"].isnull())][["name"]] or 1 2 3 def find_customer_referee(customer: pd.DataFrame) -> pd.DataFrame: customer.fillna(0, inplace=True) return customer.loc[customer["referee_id"] !=2 , ["name"]] Note: can insert the logic in the loc 1 select name from Customer where referee_id != 2 or referee_id is Null Big countries 1 2 def big_countries(world: pd.DataFrame) -> pd.DataFrame: return world[(world["area"] >= 3000000) | (world["population"] > 25000000)][["name", "population", "area"]] 1 select name, population, area from world where population >= 25000000 or area >= 3000000 Article View 1 2 3 def article_views(views: pd.DataFrame) -> pd.DataFrame: return views[views["viewer_id"] == views["author_id"]][["author_id"]].drop_duplicates() .sort_values("author_id").rename(columns={"author_id": "id"}) 1 select distinct(author_id) as id from Views where author_id = viewer_id order by id invalid tweets 1 select tweet_id from Tweets where length(content) >15 1 2 def invalid_tweets(tweets: pd.DataFrame) -> pd.DataFrame:22 return tweets.query(f"content.str.len() >15")[["tweet_id"]] or ...