Published on

SaaS Cohorts data prep with Postgres

Prepare your SaaS data for Cohort Analysis with Postgres.

If your business deals with monthly & yearly subscriptions and you need a simple and easy way to prepare your data for cohort analysis to calculate the customer retention & churn as well as MRR retention and churn then this tutorial will help you get you from data prep to visualisation.

The dataset

Let's take a sample product table that looks like this.

Product

idnameprice
1monthly125
2annual1200

Users who pay monthly would pay $125 & users who pay annualy pay $1200 once So, the annual plan is 25% cheaper.

Customer subscriptions

idcustomer_idproduct_idstart dateend date
1C2448prd_12017-01-01NULL
2C2449prd_12017-01-012021-09-0
3C2450prd_12017-01-012019-01-1
4C2451prd_22017-01-01NULL

The table of customer subscriptions has the following fields:

  • customer_id: unique id of the customer
  • product_id: id of the product that matches the id in the product table
  • start_date: date when the subscription started
  • end_date: date when the subscription ends, NULL shows an active subscription

Now, to prepare a dataset for cohorts we need a record for every month a user was active along with the revenue generated by that user in that month

CREATE  TABLE customers_subscriptions.activity_cohort
AS
(
WITH RECURSIVE
  user_months
    AS
      ( SELECT  customer_id                                 AS  user_id
            ,   DATE_TRUNC('month', signup_date)::date      AS  signup_month
            ,   DATE_TRUNC('month', signup_date)::date      AS  active_month
            ,   DATE_TRUNC('month', cancel_date)::date      AS  end_month
            ,   CASE
                    WHEN  product = 'prd_1'
                    THEN  100 
                    WHEN  product = 'prd_2'
                    THEN  125
                END                                         AS  revenue
        FROM    customers_subscriptions
        WHERE   signup_date <= cancel_date

  UNION ALL

  SELECT  user_id
      ,   signup_month
      ,   active_month + INTERVAL '1 month'
      ,   end_month
      ,   revenue
  FROM    user_months
  WHERE   active_month + INTERVAL '1 month' <= end_month
)
SELECT  DISTINCT user_id                                    AS  customer_id
    ,   signup_month
    ,   DATE_TRUNC('month', active_month)::date             AS active_month
    ,   revenue
FROM    user_months
ORDER BY  user_id
    ,     active_month
);

Now, Let's prepare the data model

CREATE TABLE  customers_subscriptions.retention_cohorts AS  (

WITH
  signups
    AS
      ( SELECT  signup_month
            ,   signup_month              AS  active_month
            ,   0                         AS  month
            ,   COUNT (customer_id)       AS  users        
            ,   SUM   (revenue)           AS  revenue
            ,   COUNT (customer_id) /
                COUNT (customer_id)       AS  users_percent    
            ,   SUM   (revenue) /
                SUM   (revenue)           AS  revenue_percent 
            ,   0                         AS  churned_users
            ,   0                         AS  user_churn
            ,   0                         AS  revenue_churn    
            ,   0                         AS  churned_revenue
            ,   0                         AS  churned_users_percent
            ,   0                         AS  churned_revenue_percent
            ,   0                         AS  last_month_users
            ,   0                         AS  churned_users_percent_last_month
            ,   0                         AS  last_month_revenue
            ,   0                         AS  churned_revenue_percent_last_month
    
        FROM    ( SELECT  DISTINCT  customer_id
                      ,   revenue
                      ,   signup_month 
                  FROM    customers_subscriptions.activity_cohort
                ) 
        AS  l 
        GROUP BY signup_month
      )
      ,
  actives
    AS
      ( SELECT  signup_month
            ,   active_month
            , 	RANK() OVER ( PARTITION BY  signup_month
                              ORDER BY      active_month ASC
                            )                               AS  month
            ,   COUNT (customer_id)                         AS  users
            ,   SUM   (revenue)                             AS  revenue
        FROM    customers_subscriptions.activity_cohort
        GROUP BY  signup_month
              ,   active_month
      )
      ,
retention
  AS 
    ( SELECT  a.signup_month
          ,   a.active_month
          ,   a.month
          ,   a.users    
          ,   s.users-a.users           AS  churned_users    
          ,   a.users /
              s.users::numeric          AS  users_percent    
          ,   (s.users-a.users) /
              s.users::numeric          AS  churned_users_percent        
          ,   a.revenue   
          ,   s.revenue-a.revenue       AS  churned_revenue
          ,   a.revenue /
              s.revenue::numeric        AS  revenue_percent
          ,   (s.revenue-a.revenue) /
              s.revenue::numeric        AS  churned_revenue_percent    
      FROM    actives   a
      JOIN    signups   s
      ON      a.signup_month  = s.signup_month

    )
    ,
churn
  AS
    ( SELECT  *
          ,   COALESCE  ( LAG ( users, 1)
              OVER      ( PARTITION BY  signup_month 
                          ORDER BY      active_month ASC
                        ) , users )     AS  last_month_users
          ,   COALESCE  ( LAG (revenue, 1) 
              OVER      ( PARTITION BY  signup_month
                          ORDER BY      active_month ASC
                        ) , revenue )   AS  last_month_revenue
    FROM    retention
    )
    ,
churn_rates
  AS
    ( SELECT  signup_month
          ,   active_month
          ,   month
          ,   users
          ,   churned_users 
          ,   last_month_users
          ,   last_month_users-users as user_churn    
          ,   users_percent
          ,   churned_users_percent    
          ,   ( (last_month_users-users) /
                NULLIF(last_month_users::numeric, 0)
              )               AS  churned_users_percent_last_month   
          ,   revenue
          ,   churned_revenue
          ,   last_month_revenue
          ,   last_month_revenue-revenue AS revenue_churn    
          ,   revenue_percent
          ,   churned_revenue_percent
          ,   ( (last_month_revenue-revenue)  /
                NULLIF(last_month_revenue::numeric, 0)
              )               AS  churned_revenue_percent_last_month    
        FROM  churn
    )
SELECT  signup_month
    ,   active_month
    ,   month
    ,   users      
    ,   last_month_users    
    ,   user_churn    
    ,   churned_users    
    ,   users_percent
    ,   churned_users_percent
    ,   churned_users_percent_last_month 
    ,   revenue
    ,   churned_revenue
    ,   last_month_revenue
    ,   revenue_churn
    ,   revenue_percent
    ,   churned_revenue_percent
    ,   churned_revenue_percent_last_month
FROM    signups
UNION ALL
SELECT  signup_month
    ,   active_month
    ,   month
    ,   users
    ,   last_month_users    
    ,   user_churn    
    ,   churned_users  
    ,   users_percent
    ,   churned_users_percent
    ,   churned_users_percent_last_month    
    ,   revenue
    ,   churned_revenue
    ,   last_month_revenue
    ,   revenue_churn    
    ,   revenue_percent
    ,   churned_revenue_percent
    ,   churned_revenue_percent_last_month 
FROM    churn_rates
);

Metabase

Here's the sample data visualised on Metabase

Tableau

Link to Tableau public visulisation