Open In Colab

(Almost) All SQL You Need Exercise: Personalized Recommendation

In this exercise, we will use a subset of the Taobao User Behavior (5M rows) to build and evaluate a rule-based recommendation system for users based on their past behaviors. We will โ€œtrainโ€ based on data before 2017-12 and test on data of 2017-12, optimizing for top-10 hit rate based on purchases. We will ask you to write an appropriate query for each task required. Give it your best shot but reveal the answer keys if needed.

To complete the exercise, first run each cell individually (Run all unfortunately does not work) then fill in the query and click Execute query. Use Reveal hint and Reveal answer as needed. Refer to the blog post for SQL techniques you need to complete the exercise and duckdb documentation for the specific quirks of duckdb SQL.

# @title Run this cell to download files and setup environment.

import duckdb
import pandas as pd
pd.set_option('display.max_rows', 1000)
import numpy as np
import random
from tqdm.auto import tqdm
import os
import timeit
import ipywidgets as widgets
from IPython.display import display


#download parquet files for `transaction_tbl`
!wget https://github.com/cstorm125/cstorm125.github.io/releases/download/transaction_tbl/transaction_tbl.zip > /dev/null 2>&1
!unzip transaction_tbl.zip > /dev/null 2>&1

#create table in duckdb
con = duckdb.connect()

#short-hand function to execute query with duckdb easily
def execute_query(query): return con.execute(query).fetchdf()

query = f"""
CREATE OR REPLACE TABLE transaction_tbl AS
SELECT *
FROM read_parquet('transaction_tbl/*/*/*.parquet', hive_partitioning=true);
"""

#load saved, partitioned table as transaction_tbl
execute_query(query)

Sanity Checks

After downloading and creating transaction_tbl in our database, let us first do some sanity checks.

# @title Query to randomly see 100 rows

answer_key = f"""
select * from transaction_tbl limit 100;
"""
hint = 'select * and limit'

#input
text_area = widgets.Textarea(
    value="Write your query here.",
    rows=10,  # Initial number of visible rows
    description="Query:",
    layout={'width': '730px'} # Adjust width as needed
)

#output
query_result = query_result = widgets.Output(
    layout=widgets.Layout(
        border='1px solid lightgray', # Add a border to make the scrollable area visible
        height='300px',              # Fixed height for the output area
        overflow_y='scroll'          # Enable vertical scrolling
    )
)

#button
execute_button = widgets.Button(
    description='Execute query',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    icon='check' # (FontAwesome icons available)
)
def process_query(b):
    query = text_area.value
    with query_result:
        query_result.clear_output()
        display(execute_query(query))
execute_button.on_click(process_query)

answer_button = widgets.Button(
    description='Reveal answer key',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    icon='check' # (FontAwesome icons available)
)
def show_answer_key(b):
    query = text_area.value
    with query_result:
        query_result.clear_output()
        print(f'Answer key is: \n{answer_key}')
answer_button.on_click(show_answer_key)

hint_button = widgets.Button(
    description='Reveal hint',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    icon='check' # (FontAwesome icons available)
)
def show_hint(b):
    with query_result:
        query_result.clear_output()
        print(f'Hint: \n{hint}')
hint_button.on_click(show_hint)

display(text_area, execute_button, hint_button, answer_button, query_result)
# @title Count how many events, how many unique customers, how many unique items, how many unique event types.

answer_key = f"""
select
 count(*) as nb_event
 ,count(distinct user_id) as nb_user
 ,count(distinct item_id) as nb_item
 ,count(distinct behavior_type) as nb_event_type
from transaction_tbl;
"""
hint = 'count and count distinct'

#input
text_area = widgets.Textarea(
    value="Write your query here.",
    rows=10,  # Initial number of visible rows
    description="Query:",
    layout={'width': '730px'} # Adjust width as needed
)

#output
query_result = query_result = widgets.Output(
    layout=widgets.Layout(
        border='1px solid lightgray', # Add a border to make the scrollable area visible
        height='300px',              # Fixed height for the output area
        overflow_y='scroll'          # Enable vertical scrolling
    )
)

#button
execute_button = widgets.Button(
    description='Execute query',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    icon='check' # (FontAwesome icons available)
)
def process_query(b):
    query = text_area.value
    with query_result:
        query_result.clear_output()
        display(execute_query(query))
execute_button.on_click(process_query)

answer_button = widgets.Button(
    description='Reveal answer key',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    icon='check' # (FontAwesome icons available)
)
def show_answer_key(b):
    query = text_area.value
    with query_result:
        query_result.clear_output()
        print(f'Answer key is: \n{answer_key}')
answer_button.on_click(show_answer_key)

hint_button = widgets.Button(
    description='Reveal hint',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    icon='check' # (FontAwesome icons available)
)
def show_hint(b):
    with query_result:
        query_result.clear_output()
        print(f'Hint: \n{hint}')
hint_button.on_click(show_hint)

display(text_area, execute_button, hint_button, answer_button, query_result)
# @title How many events and unique users per event type?

answer_key = f"""
select
 behavior_type
 ,count(*) as nb_event
 ,count(distinct user_id) as nb_user
from transaction_tbl
group by 1;
"""
hint = 'group by'

#input
text_area = widgets.Textarea(
    value="Write your query here.",
    rows=10,  # Initial number of visible rows
    description="Query:",
    layout={'width': '730px'} # Adjust width as needed
)

#output
query_result = query_result = widgets.Output(
    layout=widgets.Layout(
        border='1px solid lightgray', # Add a border to make the scrollable area visible
        height='300px',              # Fixed height for the output area
        overflow_y='scroll'          # Enable vertical scrolling
    )
)

#button
execute_button = widgets.Button(
    description='Execute query',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    icon='check' # (FontAwesome icons available)
)
def process_query(b):
    query = text_area.value
    with query_result:
        query_result.clear_output()
        display(execute_query(query))
execute_button.on_click(process_query)

answer_button = widgets.Button(
    description='Reveal answer key',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    icon='check' # (FontAwesome icons available)
)
def show_answer_key(b):
    query = text_area.value
    with query_result:
        query_result.clear_output()
        print(f'Answer key is: \n{answer_key}')
answer_button.on_click(show_answer_key)

hint_button = widgets.Button(
    description='Reveal hint',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    icon='check' # (FontAwesome icons available)
)
def show_hint(b):
    with query_result:
        query_result.clear_output()
        print(f'Hint: \n{hint}')
hint_button.on_click(show_hint)

display(text_area, execute_button, hint_button, answer_button, query_result)
# @title Convert integer timestamp into human-readable timestamp `event_timestamp` and date `event_date`, then save as `transaction_tbl_x` view.

answer_key = f"""
create or replace view transaction_tbl_x as (
select
 a.*
 ,to_timestamp(a.timestamp) as event_timestamp
 ,strftime(to_timestamp(a.timestamp), '%Y-%m-%d') as event_date
 ,substring(cast(to_timestamp(a.timestamp)as varchar),1,7) as year_month --optional
from transaction_tbl a
);

select * from transaction_tbl_x limit 10;
"""
hint = 'to_timestamp and date_trunc'

#input
text_area = widgets.Textarea(
    value="Write your query here.",
    rows=10,  # Initial number of visible rows
    description="Query:",
    layout={'width': '730px'} # Adjust width as needed
)

#output
query_result = query_result = widgets.Output(
    layout=widgets.Layout(
        border='1px solid lightgray', # Add a border to make the scrollable area visible
        height='300px',              # Fixed height for the output area
        overflow_y='scroll'          # Enable vertical scrolling
    )
)

#button
execute_button = widgets.Button(
    description='Execute query',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    icon='check' # (FontAwesome icons available)
)
def process_query(b):
    query = text_area.value
    with query_result:
        query_result.clear_output()
        display(execute_query(query))
execute_button.on_click(process_query)

answer_button = widgets.Button(
    description='Reveal answer key',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    icon='check' # (FontAwesome icons available)
)
def show_answer_key(b):
    query = text_area.value
    with query_result:
        query_result.clear_output()
        print(f'Answer key is: \n{answer_key}')
answer_button.on_click(show_answer_key)

hint_button = widgets.Button(
    description='Reveal hint',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    icon='check' # (FontAwesome icons available)
)
def show_hint(b):
    with query_result:
        query_result.clear_output()
        print(f'Hint: \n{hint}')
hint_button.on_click(show_hint)

display(text_area, execute_button, hint_button, answer_button, query_result)
# @title Check distribution of `event_date`. Is there anything weird?

answer_key = f"""
select
 event_date
 ,count(*) as nb_event
from transaction_tbl_x
group by 1
order by 1;
"""
hint = 'Do not forget we are using `transaction_tbl_x` now. Order by `event_date` to see if there is anything odd.'

#input
text_area = widgets.Textarea(
    value="Write your query here.",
    rows=10,  # Initial number of visible rows
    description="Query:",
    layout={'width': '730px'} # Adjust width as needed
)

#output
query_result = widgets.Output(
    layout=widgets.Layout(
        border='1px solid lightgray', # Add a border to make the scrollable area visible
        height='300px',              # Fixed height for the output area
        overflow_y='scroll'          # Enable vertical scrolling
    )
)

#button
execute_button = widgets.Button(
    description='Execute query',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    icon='check' # (FontAwesome icons available)
)
def process_query(b):
    query = text_area.value
    with query_result:
        query_result.clear_output()
        display(execute_query(query))
execute_button.on_click(process_query)

answer_button = widgets.Button(
    description='Reveal answer key',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    icon='check' # (FontAwesome icons available)
)
def show_answer_key(b):
    query = text_area.value
    with query_result:
        query_result.clear_output()
        print(f'Answer key is: \n{answer_key}')
answer_button.on_click(show_answer_key)

hint_button = widgets.Button(
    description='Reveal hint',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    icon='check' # (FontAwesome icons available)
)
def show_hint(b):
    with query_result:
        query_result.clear_output()
        print(f'Hint: \n{hint}')
hint_button.on_click(show_hint)

display(text_area, execute_button, hint_button, answer_button, query_result)

It is quite clear from exploration that most of the events come from 2017-11-25 to 2017-12-03, so let us use 2017-11-25 to 2017-11-30 as training set and 2017-12-01 to 2017-12-03 as test set. If this were your production database, having events from the 1920s mixed in there should raise a flag but we will ignore them in this educational setting.

# @title Create `train_tbl` with events from `2017-11-25` to `2017-11-30` and `test_tbl` with events from `2017-12-01` to `2017-12-03`

answer_key = f"""
create or replace view train_tbl as (
select
 *
from transaction_tbl_x
where event_date between '2017-11-25' and '2017-11-30'
);

create or replace view test_tbl as (
select
 *
from transaction_tbl_x
where event_date between '2017-12-01' and '2017-12-03'
);

select 'train' split, count(*) nb_event, count(distinct user_id) nb_user from train_tbl
union all
select 'test' split, count(*) nb_event, count(distinct user_id) nb_user from test_tbl;
"""
hint = 'where-clause and `between`'

#input
text_area = widgets.Textarea(
    value="Write your query here.",
    rows=10,  # Initial number of visible rows
    description="Query:",
    layout={'width': '730px'} # Adjust width as needed
)

#output
query_result = query_result = widgets.Output(
    layout=widgets.Layout(
        border='1px solid lightgray', # Add a border to make the scrollable area visible
        height='300px',              # Fixed height for the output area
        overflow_y='scroll'          # Enable vertical scrolling
    )
)

#button
execute_button = widgets.Button(
    description='Execute query',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    icon='check' # (FontAwesome icons available)
)
def process_query(b):
    query = text_area.value
    with query_result:
        query_result.clear_output()
        display(execute_query(query))
execute_button.on_click(process_query)

answer_button = widgets.Button(
    description='Reveal answer key',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    icon='check' # (FontAwesome icons available)
)
def show_answer_key(b):
    query = text_area.value
    with query_result:
        query_result.clear_output()
        print(f'Answer key is: \n{answer_key}')
answer_button.on_click(show_answer_key)

hint_button = widgets.Button(
    description='Reveal hint',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    icon='check' # (FontAwesome icons available)
)
def show_hint(b):
    with query_result:
        query_result.clear_output()
        print(f'Hint: \n{hint}')
hint_button.on_click(show_hint)

display(text_area, execute_button, hint_button, answer_button, query_result)

Success Measurement: Metric and Baseline

Our task is to recommend 10 items that each user will most likely purchase during the test set period. The only information we have is what they viewed (pv), added to cart (cart) or favorited (fav) during the training set period.

Metric

Our metric is top-10 hit rate based on purchases. This means that for each customer, we will recommend 10 items and if the user purchased any of those items during the test set period, the user is marked as 1 else as 0. We then take an average across all users to get our score.

Example: top-3 hit rate calculation

user_id pred is_purchased_in_test_tbl
A X 0
A Y 0
A Z 1
B W 0
B T 0
B R 0
C Q 0
C T 1
C X 1
user_id hit hit_flag
A 1 1
B 0 0
C 2 1

top-3 hit rate = 1+0+1 / 3 = 66%

Baseline

The most simple baseline we can compare our recommendation system with is recommending top-10 best-selling items to all users.

Let us try to measure the performance of this simple baseline.

# @title Create view `eval_tbl` containing `user_id`, `item_id` only for items the users in `test_tbl` purchased (`buy`)

answer_key = f"""
create or replace view eval_tbl as (
select
 user_id
 ,item_id
from test_tbl
where behavior_type = 'buy'
group by 1,2
);

select * from eval_tbl;
"""
hint = 'group by and where'

#input
text_area = widgets.Textarea(
    value="Write your query here.",
    rows=10,  # Initial number of visible rows
    description="Query:",
    layout={'width': '730px'} # Adjust width as needed
)

#output
query_result = query_result = widgets.Output(
    layout=widgets.Layout(
        border='1px solid lightgray', # Add a border to make the scrollable area visible
        height='300px',              # Fixed height for the output area
        overflow_y='scroll'          # Enable vertical scrolling
    )
)

#button
execute_button = widgets.Button(
    description='Execute query',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    icon='check' # (FontAwesome icons available)
)
def process_query(b):
    query = text_area.value
    with query_result:
        query_result.clear_output()
        display(execute_query(query))
execute_button.on_click(process_query)

answer_button = widgets.Button(
    description='Reveal answer key',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    icon='check' # (FontAwesome icons available)
)
def show_answer_key(b):
    query = text_area.value
    with query_result:
        query_result.clear_output()
        print(f'Answer key is: \n{answer_key}')
answer_button.on_click(show_answer_key)

hint_button = widgets.Button(
    description='Reveal hint',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    icon='check' # (FontAwesome icons available)
)
def show_hint(b):
    with query_result:
        query_result.clear_output()
        print(f'Hint: \n{hint}')
hint_button.on_click(show_hint)

display(text_area, execute_button, hint_button, answer_button, query_result)
# @title Get top 10 most-interacted items in training set

answer_key = f"""
select
 item_id
 ,count(*) as nb_event
from train_tbl
group by 1
order by 2 desc
limit 10;
"""
hint = 'group by and limit'

#input
text_area = widgets.Textarea(
    value="Write your query here.",
    rows=10,  # Initial number of visible rows
    description="Query:",
    layout={'width': '730px'} # Adjust width as needed
)

#output
query_result = query_result = widgets.Output(
    layout=widgets.Layout(
        border='1px solid lightgray', # Add a border to make the scrollable area visible
        height='300px',              # Fixed height for the output area
        overflow_y='scroll'          # Enable vertical scrolling
    )
)

#button
execute_button = widgets.Button(
    description='Execute query',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    icon='check' # (FontAwesome icons available)
)
def process_query(b):
    query = text_area.value
    with query_result:
        query_result.clear_output()
        display(execute_query(query))
execute_button.on_click(process_query)

answer_button = widgets.Button(
    description='Reveal answer key',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    icon='check' # (FontAwesome icons available)
)
def show_answer_key(b):
    query = text_area.value
    with query_result:
        query_result.clear_output()
        print(f'Answer key is: \n{answer_key}')
answer_button.on_click(show_answer_key)

hint_button = widgets.Button(
    description='Reveal hint',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    icon='check' # (FontAwesome icons available)
)
def show_hint(b):
    with query_result:
        query_result.clear_output()
        print(f'Hint: \n{hint}')
hint_button.on_click(show_hint)

display(text_area, execute_button, hint_button, answer_button, query_result)
# @title Create view `pred_top10_tbl` containing all `user_id` from `test_tbl`. Each `user_id` should have 10 rows each row with the 10 recommendations, in this case top-10 most interacted items (same for all `user_id`).
answer_key = f"""
create or replace view pred_top10_tbl as (

with test_user_tbl as (
select
 user_id
 ,max(1) as joining_column
from test_tbl
group by 1),

top10_tbl as (
select
 item_id
 ,count(*) as nb_buy
 ,max(1) as joining_column
from train_tbl
group by 1
order by 2 desc
limit 10)

select
 a.user_id
 ,b.item_id
from test_user_tbl a
inner join top10_tbl b
on a.joining_column = b.joining_column
);

select count(*) nb_pred, count(distinct user_id) nb_user from pred_top10_tbl;
"""
hint = '''
You need to create a new `joining_column` that is all the same values max(1) to `inner join` on.
Or use `cross join`: https://www.geeksforgeeks.org/sql/sql-cross-join/
count(distinct user_id) and count(*) again to make sure that each user has 10 recommendations.
'''

#input
text_area = widgets.Textarea(
    value="Write your query here. This is a tough one so you might want to use the hint.",
    rows=10,  # Initial number of visible rows
    description="Query:",
    layout={'width': '730px'} # Adjust width as needed
)

#output
query_result = query_result = widgets.Output(
    layout=widgets.Layout(
        border='1px solid lightgray', # Add a border to make the scrollable area visible
        height='300px',              # Fixed height for the output area
        overflow_y='scroll'          # Enable vertical scrolling
    )
)

#button
execute_button = widgets.Button(
    description='Execute query',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    icon='check' # (FontAwesome icons available)
)
def process_query(b):
    query = text_area.value
    with query_result:
        query_result.clear_output()
        display(execute_query(query))
execute_button.on_click(process_query)

answer_button = widgets.Button(
    description='Reveal answer key',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    icon='check' # (FontAwesome icons available)
)
def show_answer_key(b):
    query = text_area.value
    with query_result:
        query_result.clear_output()
        print(f'Answer key is: \n{answer_key}')
answer_button.on_click(show_answer_key)

hint_button = widgets.Button(
    description='Reveal hint',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    icon='check' # (FontAwesome icons available)
)
def show_hint(b):
    with query_result:
        query_result.clear_output()
        print(f'Hint: \n{hint}')
hint_button.on_click(show_hint)

display(text_area, execute_button, hint_button, answer_button, query_result)
# @title With `pred_top10_tbl` and `eval_tbl`, calculate top-10 hit rate

answer_key = f"""
select
 avg(case when nb_hit > 0 then 1 else 0 end) as top_10_hit_rate
from
(select
 a.user_id
 ,sum(case when b.item_id is not null then 1 else 0 end) as nb_hit
from eval_tbl a
left join pred_top10_tbl b
on a.user_id = b.user_id
 and a.item_id = b.item_id
group by 1) c;
"""
hint = f"""
Remember that we average 1/0s across users, not items.
"""

#input
text_area = widgets.Textarea(
    value="Write your query here.",
    rows=10,  # Initial number of visible rows
    description="Query:",
    layout={'width': '730px'} # Adjust width as needed
)

#output
query_result = query_result = widgets.Output(
    layout=widgets.Layout(
        border='1px solid lightgray', # Add a border to make the scrollable area visible
        height='300px',              # Fixed height for the output area
        overflow_y='scroll'          # Enable vertical scrolling
    )
)

#button
execute_button = widgets.Button(
    description='Execute query',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    icon='check' # (FontAwesome icons available)
)
def process_query(b):
    query = text_area.value
    with query_result:
        query_result.clear_output()
        display(execute_query(query))
execute_button.on_click(process_query)

answer_button = widgets.Button(
    description='Reveal answer key',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    icon='check' # (FontAwesome icons available)
)
def show_answer_key(b):
    query = text_area.value
    with query_result:
        query_result.clear_output()
        print(f'Answer key is: \n{answer_key}')
answer_button.on_click(show_answer_key)

hint_button = widgets.Button(
    description='Reveal hint',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    icon='check' # (FontAwesome icons available)
)
def show_hint(b):
    with query_result:
        query_result.clear_output()
        print(f'Hint: \n{hint}')
hint_button.on_click(show_hint)

display(text_area, execute_button, hint_button, answer_button, query_result)

Considering we have 1,253,465 items in total, it is not surprising that our top-10 hit rate is only 0.0923% of all users in the test set. Let us see if we can do better.

Recommend what they have interacted with but have yet bought

One of the most powerful recommendation algorithm is to recommend what users have interacted with the most. In fact, this is what you would see in worldโ€™s best online retail sites, especially those dealing with groceries, beauty, and health and personal care. We add a small twist by only ordering the top-10 recommendation for each customer by all interactions (pv, cart, fav) EXCEPT buy. This is because if a user has bought the item before, it might not be likely for them to buy again in such as short period of time between training set and test set.

# @title Create view `pred_mfp_tbl` containing all `user_id` from `train_tbl`. Each user should have 10 recommendations based on the top-10 items, ordered by the number of times they have interacted (`pv`, `cart`, `fav`) with, but have yet bought (`buy`).

answer_key = f"""
create or replace view pred_mfp_tbl as (
select * from
(select
 user_id
 ,item_id
 ,row_number() over (partition by user_id order by nb_nonbuy_event desc) as rnk
from
(select
 user_id
 ,item_id
 ,count(*) as nb_nonbuy_event
from train_tbl
where behavior_type <> 'buy'
group by 1,2) a
) b
where rnk <=10);

select
 nb_pred
 ,count(*) as nb_user
from
(select
 user_id
 ,count(*) as nb_pred
from pred_mfp_tbl
group by 1) a
group by 1 order by 2 desc;
"""
hint = f"""
We need window function to make sure we have at most 10 items per customer.
"""

#input
text_area = widgets.Textarea(
    value="Write your query here.",
    rows=10,  # Initial number of visible rows
    description="Query:",
    layout={'width': '730px'} # Adjust width as needed
)

#output
query_result = query_result = widgets.Output(
    layout=widgets.Layout(
        border='1px solid lightgray', # Add a border to make the scrollable area visible
        height='300px',              # Fixed height for the output area
        overflow_y='scroll'          # Enable vertical scrolling
    )
)

#button
execute_button = widgets.Button(
    description='Execute query',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    icon='check' # (FontAwesome icons available)
)
def process_query(b):
    query = text_area.value
    with query_result:
        query_result.clear_output()
        display(execute_query(query))
execute_button.on_click(process_query)

answer_button = widgets.Button(
    description='Reveal answer key',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    icon='check' # (FontAwesome icons available)
)
def show_answer_key(b):
    query = text_area.value
    with query_result:
        query_result.clear_output()
        print(f'Answer key is: \n{answer_key}')
answer_button.on_click(show_answer_key)

hint_button = widgets.Button(
    description='Reveal hint',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    icon='check' # (FontAwesome icons available)
)
def show_hint(b):
    with query_result:
        query_result.clear_output()
        print(f'Hint: \n{hint}')
hint_button.on_click(show_hint)

display(text_area, execute_button, hint_button, answer_button, query_result)
# @title Check if we have recommendation in `pred_mfp_tbl` for every user in `test_tbl`?

answer_key = f"""
select
 avg(case when b.user_id is not null then 1 else 0 end) as has_pred
from (select distinct user_id from test_tbl) a
left join (select distinct user_id from pred_mfp_tbl) b
on a.user_id = b.user_id;
"""
hint = f"""
left join then count the nulls
"""

#input
text_area = widgets.Textarea(
    value="Write your query here.",
    rows=10,  # Initial number of visible rows
    description="Query:",
    layout={'width': '730px'} # Adjust width as needed
)

#output
query_result = query_result = widgets.Output(
    layout=widgets.Layout(
        border='1px solid lightgray', # Add a border to make the scrollable area visible
        height='300px',              # Fixed height for the output area
        overflow_y='scroll'          # Enable vertical scrolling
    )
)

#button
execute_button = widgets.Button(
    description='Execute query',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    icon='check' # (FontAwesome icons available)
)
def process_query(b):
    query = text_area.value
    with query_result:
        query_result.clear_output()
        display(execute_query(query))
execute_button.on_click(process_query)

answer_button = widgets.Button(
    description='Reveal answer key',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    icon='check' # (FontAwesome icons available)
)
def show_answer_key(b):
    query = text_area.value
    with query_result:
        query_result.clear_output()
        print(f'Answer key is: \n{answer_key}')
answer_button.on_click(show_answer_key)

hint_button = widgets.Button(
    description='Reveal hint',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    icon='check' # (FontAwesome icons available)
)
def show_hint(b):
    with query_result:
        query_result.clear_output()
        print(f'Hint: \n{hint}')
hint_button.on_click(show_hint)

display(text_area, execute_button, hint_button, answer_button, query_result)
# @title For users who we do not have recommendations for in `pred_mfp_tbl`, give them recommendations from `pred_top10_tbl`. Create a new view for this called `pred_final_tbl`.

answer_key = f"""
create or replace view pred_top10_tbl_missing_only as (
select
 a.user_id
 ,a.item_id
from pred_top10_tbl a
left join pred_mfp_tbl b
on a.user_id = b.user_id
where b.user_id is null);

create or replace view pred_final_tbl as (
select user_id,item_id from pred_mfp_tbl
union all
select user_id,item_id from pred_top10_tbl_missing_only);

select 'pred' split, count(*) nb_pred, count(distinct user_id) nb_user from pred_final_tbl
union all
select 'test' split, count(*) nb_pred, count(distinct user_id) nb_user from test_tbl;
"""
hint = f"""
One way you can do this is:

1) Find out who did not get recommendation in `pred_mfp_tbl`
2) Filter `pred_top10_tbl` to have only those users
3) Concatenate `pred_top10_tbl` containing the missing users and `pred_mfp_tbl` to create view `pred_final_tbl`. Make sure columns are the same when you concatenate.
"""

#input
text_area = widgets.Textarea(
    value="Write your query here.",
    rows=10,  # Initial number of visible rows
    description="Query:",
    layout={'width': '730px'} # Adjust width as needed
)

#output
query_result = query_result = widgets.Output(
    layout=widgets.Layout(
        border='1px solid lightgray', # Add a border to make the scrollable area visible
        height='300px',              # Fixed height for the output area
        overflow_y='scroll'          # Enable vertical scrolling
    )
)

#button
execute_button = widgets.Button(
    description='Execute query',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    icon='check' # (FontAwesome icons available)
)
def process_query(b):
    query = text_area.value
    with query_result:
        query_result.clear_output()
        display(execute_query(query))
execute_button.on_click(process_query)

answer_button = widgets.Button(
    description='Reveal answer key',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    icon='check' # (FontAwesome icons available)
)
def show_answer_key(b):
    query = text_area.value
    with query_result:
        query_result.clear_output()
        print(f'Answer key is: \n{answer_key}')
answer_button.on_click(show_answer_key)

hint_button = widgets.Button(
    description='Reveal hint',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    icon='check' # (FontAwesome icons available)
)
def show_hint(b):
    with query_result:
        query_result.clear_output()
        print(f'Hint: \n{hint}')
hint_button.on_click(show_hint)

display(text_area, execute_button, hint_button, answer_button, query_result)
# @title Calculate top-10 hit rate for `pred_final_tbl` and see if it is better than `pred_top10_tbl`

answer_key = f"""
select
 'pred_top10_tbl' model
 ,avg(case when nb_hit > 0 then 1 else 0 end) as top_10_hit_rate
from
(select
 a.user_id
 ,sum(case when b.item_id is not null then 1 else 0 end) as nb_hit
from eval_tbl a
left join pred_top10_tbl b
on a.user_id = b.user_id
 and a.item_id = b.item_id
group by 1) c

union all

select
 'pred_final_tbl' model
 ,avg(case when nb_hit > 0 then 1 else 0 end) as top_10_hit_rate
from
(select
 a.user_id
 ,sum(case when b.item_id is not null then 1 else 0 end) as nb_hit
from eval_tbl a
left join pred_final_tbl b
on a.user_id = b.user_id
 and a.item_id = b.item_id
group by 1) c;
"""
hint = f"""
Do the same thing as what you did for `pred_top10_tbl` then concatenate the result together.
"""

#input
text_area = widgets.Textarea(
    value="Write your query here.",
    rows=10,  # Initial number of visible rows
    description="Query:",
    layout={'width': '730px'} # Adjust width as needed
)

#output
query_result = query_result = widgets.Output(
    layout=widgets.Layout(
        border='1px solid lightgray', # Add a border to make the scrollable area visible
        height='300px',              # Fixed height for the output area
        overflow_y='scroll'          # Enable vertical scrolling
    )
)

#button
execute_button = widgets.Button(
    description='Execute query',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    icon='check' # (FontAwesome icons available)
)
def process_query(b):
    query = text_area.value
    with query_result:
        query_result.clear_output()
        display(execute_query(query))
execute_button.on_click(process_query)

answer_button = widgets.Button(
    description='Reveal answer key',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    icon='check' # (FontAwesome icons available)
)
def show_answer_key(b):
    query = text_area.value
    with query_result:
        query_result.clear_output()
        print(f'Answer key is: \n{answer_key}')
answer_button.on_click(show_answer_key)

hint_button = widgets.Button(
    description='Reveal hint',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    icon='check' # (FontAwesome icons available)
)
def show_hint(b):
    with query_result:
        query_result.clear_output()
        print(f'Hint: \n{hint}')
hint_button.on_click(show_hint)

display(text_area, execute_button, hint_button, answer_button, query_result)

๐ŸŽŠ Congratulations! You have made 21x improvement on the baseline to get almost 2% top-10 hit rate. This marks the end of this exercise but if you want to further improve our algorithm, feel free to do so with the console below.

# @title Run anything

answer_key = f"""
There is no correcrt answer in life.
"""
hint = f"""
You just need to mess around and find out.
"""

#input
text_area = widgets.Textarea(
    value="Write your query here.",
    rows=10,  # Initial number of visible rows
    description="Query:",
    layout={'width': '730px'} # Adjust width as needed
)

#output
query_result = query_result = widgets.Output(
    layout=widgets.Layout(
        border='1px solid lightgray', # Add a border to make the scrollable area visible
        height='300px',              # Fixed height for the output area
        overflow_y='scroll'          # Enable vertical scrolling
    )
)

#button
execute_button = widgets.Button(
    description='Execute query',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    icon='check' # (FontAwesome icons available)
)
def process_query(b):
    query = text_area.value
    with query_result:
        query_result.clear_output()
        display(execute_query(query))
execute_button.on_click(process_query)

answer_button = widgets.Button(
    description='Reveal answer key',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    icon='check' # (FontAwesome icons available)
)
def show_answer_key(b):
    query = text_area.value
    with query_result:
        query_result.clear_output()
        print(f'Answer key is: \n{answer_key}')
answer_button.on_click(show_answer_key)

hint_button = widgets.Button(
    description='Reveal hint',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    icon='check' # (FontAwesome icons available)
)
def show_hint(b):
    with query_result:
        query_result.clear_output()
        print(f'Hint: \n{hint}')
hint_button.on_click(show_hint)

display(text_area, execute_button, hint_button, answer_button, query_result)