- Published on
SaaS Cohorts data prep with Postgres
Prepare your SaaS data for Cohort Analysis with Postgres.
If your business deals with monthly & yearly subscriptions and you need a simple and easy way to prepare your data for cohort analysis to calculate the customer retention & churn as well as MRR retention and churn then this tutorial will help you get you from data prep to visualisation.
The dataset
Let's take a sample product table that looks like this.
Product
id | name | price |
---|---|---|
1 | monthly | 125 |
2 | annual | 1200 |
Users who pay monthly would pay $125
& users who pay annualy pay $1200
once So, the annual plan is 25%
cheaper.
Customer subscriptions
id | customer_id | product_id | start date | end date |
---|---|---|---|---|
1 | C2448 | prd_1 | 2017-01-01 | NULL |
2 | C2449 | prd_1 | 2017-01-01 | 2021-09-0 |
3 | C2450 | prd_1 | 2017-01-01 | 2019-01-1 |
4 | C2451 | prd_2 | 2017-01-01 | NULL |
The table of customer subscriptions has the following fields:
customer_id
: unique id of the customerproduct_id
: id of the product that matches the id in the product tablestart_date
: date when the subscription startedend_date
: date when the subscription ends,NULL
shows an active subscription
Now, to prepare a dataset for cohorts we need a record for every month a user was active along with the revenue generated by that user in that month
CREATE TABLE customers_subscriptions.activity_cohort
AS
(
WITH RECURSIVE
user_months
AS
( SELECT customer_id AS user_id
, DATE_TRUNC('month', signup_date)::date AS signup_month
, DATE_TRUNC('month', signup_date)::date AS active_month
, DATE_TRUNC('month', cancel_date)::date AS end_month
, CASE
WHEN product = 'prd_1'
THEN 100
WHEN product = 'prd_2'
THEN 125
END AS revenue
FROM customers_subscriptions
WHERE signup_date <= cancel_date
UNION ALL
SELECT user_id
, signup_month
, active_month + INTERVAL '1 month'
, end_month
, revenue
FROM user_months
WHERE active_month + INTERVAL '1 month' <= end_month
)
SELECT DISTINCT user_id AS customer_id
, signup_month
, DATE_TRUNC('month', active_month)::date AS active_month
, revenue
FROM user_months
ORDER BY user_id
, active_month
);
Now, Let's prepare the data model
CREATE TABLE customers_subscriptions.retention_cohorts AS (
WITH
signups
AS
( SELECT signup_month
, signup_month AS active_month
, 0 AS month
, COUNT (customer_id) AS users
, SUM (revenue) AS revenue
, COUNT (customer_id) /
COUNT (customer_id) AS users_percent
, SUM (revenue) /
SUM (revenue) AS revenue_percent
, 0 AS churned_users
, 0 AS user_churn
, 0 AS revenue_churn
, 0 AS churned_revenue
, 0 AS churned_users_percent
, 0 AS churned_revenue_percent
, 0 AS last_month_users
, 0 AS churned_users_percent_last_month
, 0 AS last_month_revenue
, 0 AS churned_revenue_percent_last_month
FROM ( SELECT DISTINCT customer_id
, revenue
, signup_month
FROM customers_subscriptions.activity_cohort
)
AS l
GROUP BY signup_month
)
,
actives
AS
( SELECT signup_month
, active_month
, RANK() OVER ( PARTITION BY signup_month
ORDER BY active_month ASC
) AS month
, COUNT (customer_id) AS users
, SUM (revenue) AS revenue
FROM customers_subscriptions.activity_cohort
GROUP BY signup_month
, active_month
)
,
retention
AS
( SELECT a.signup_month
, a.active_month
, a.month
, a.users
, s.users-a.users AS churned_users
, a.users /
s.users::numeric AS users_percent
, (s.users-a.users) /
s.users::numeric AS churned_users_percent
, a.revenue
, s.revenue-a.revenue AS churned_revenue
, a.revenue /
s.revenue::numeric AS revenue_percent
, (s.revenue-a.revenue) /
s.revenue::numeric AS churned_revenue_percent
FROM actives a
JOIN signups s
ON a.signup_month = s.signup_month
)
,
churn
AS
( SELECT *
, COALESCE ( LAG ( users, 1)
OVER ( PARTITION BY signup_month
ORDER BY active_month ASC
) , users ) AS last_month_users
, COALESCE ( LAG (revenue, 1)
OVER ( PARTITION BY signup_month
ORDER BY active_month ASC
) , revenue ) AS last_month_revenue
FROM retention
)
,
churn_rates
AS
( SELECT signup_month
, active_month
, month
, users
, churned_users
, last_month_users
, last_month_users-users as user_churn
, users_percent
, churned_users_percent
, ( (last_month_users-users) /
NULLIF(last_month_users::numeric, 0)
) AS churned_users_percent_last_month
, revenue
, churned_revenue
, last_month_revenue
, last_month_revenue-revenue AS revenue_churn
, revenue_percent
, churned_revenue_percent
, ( (last_month_revenue-revenue) /
NULLIF(last_month_revenue::numeric, 0)
) AS churned_revenue_percent_last_month
FROM churn
)
SELECT signup_month
, active_month
, month
, users
, last_month_users
, user_churn
, churned_users
, users_percent
, churned_users_percent
, churned_users_percent_last_month
, revenue
, churned_revenue
, last_month_revenue
, revenue_churn
, revenue_percent
, churned_revenue_percent
, churned_revenue_percent_last_month
FROM signups
UNION ALL
SELECT signup_month
, active_month
, month
, users
, last_month_users
, user_churn
, churned_users
, users_percent
, churned_users_percent
, churned_users_percent_last_month
, revenue
, churned_revenue
, last_month_revenue
, revenue_churn
, revenue_percent
, churned_revenue_percent
, churned_revenue_percent_last_month
FROM churn_rates
);
Metabase
Here's the sample data visualised on Metabase