Follow

Keep Up to Date with the Most Important News

By pressing the Subscribe button, you confirm that you have read and are agreeing to our Privacy Policy and Terms of Use
Contact

Add column count of occurrences

I have table in SQL Server called df found here:

-- Parameters
DECLARE @Year INT = 2020; --, @Country varchar(50)= 'Brazil';

WITH ModeData AS (
    SELECT country, 
           a.Mode
    FROM df
    CROSS APPLY (
        SELECT TOP 1 Mode, COUNT(*) AS cnt
        FROM (VALUES (val1), (val2), (val3)) AS t(Mode)
        GROUP BY Mode
        ORDER BY COUNT(*) DESC
    ) a
  where year=@year --and  country=@country 
)

-- Calculate proportions and map modes to labels
, Proportions AS (
    SELECT country, 
           CASE 
               WHEN Mode = 1 THEN 'Very Dissatisfied'
               WHEN Mode = 2 THEN 'Dissatisfied'
               WHEN Mode = 3 THEN 'Neutral'
               WHEN Mode = 4 THEN 'Satisfied'
               WHEN Mode = 5 THEN 'Very Satisfied'
           END AS SatisfactionLevel,
           COUNT(*) * 1.0 / SUM(COUNT(*)) OVER (PARTITION BY country) AS Proportion
    FROM ModeData
    GROUP BY country, Mode
)

-- Pivot the results to get each satisfaction level as a column
SELECT country, 
       [Very Dissatisfied], 
       [Dissatisfied], 
       [Neutral], 
       [Satisfied], 
       [Very Satisfied]
FROM Proportions
PIVOT (
    MAX(Proportion)
    FOR SatisfactionLevel IN ([Very Dissatisfied], [Dissatisfied], [Neutral], [Satisfied], [Very Satisfied])
) AS p
ORDER BY country;

The resulted table is :

Country Very Dissatisfied Dissatisfied Neutral Satisfied Very Satisfied
Brazil 0.285714285714 0.142857142857 0.142857142857 0.142857142857 0.285714285714
Canada 0.111111111111 0.111111111111 0.333333333333 0.222222222222 0.222222222222
France 0.250000000000 0.125000000000 0.250000000000 0.250000000000 0.125000000000
Italy 0.166666666666 0.166666666666 0.166666666666 0.166666666666 0.333333333333
USA 0.222222222222 0.111111111111 0.111111111111 0.333333333333 0.222222222222

I want to calculate the count of each country.How many rows has each country in the table df and add this count as an extra column in the resulted table. Ideally and based on the toy example data the result I want to look like:

MEDevel.com: Open-source for Healthcare and Education

Collecting and validating open-source software for healthcare, education, enterprise, development, medical imaging, medical records, and digital pathology.

Visit Medevel

Country Very Dissatisfied Dissatisfied Neutral Satisfied Very Satisfied Count
Brazil 0.285714285714 0.142857142857 0.142857142857 0.142857142857 0.285714285714 7
Canada 0.111111111111 0.111111111111 0.333333333333 0.222222222222 0.222222222222 9
France 0.250000000000 0.125000000000 0.250000000000 0.250000000000 0.125000000000 8
Italy 0.166666666666 0.166666666666 0.166666666666 0.166666666666 0.333333333333 6
USA 0.222222222222 0.111111111111 0.111111111111 0.333333333333 0.222222222222 9

>Solution :

Adding an extra CTE to resolve the country counts, and then including it in the final results.

Sample Table

-- Create the table
CREATE TABLE df (
    country VARCHAR(50),
    year INT,
    val1 INT,
    val2 INT,
    val3 INT
);

-- Insert 10 rows of data
INSERT INTO df (country, year, val1, val2, val3) VALUES
('USA', 2020, 4, 4, 5),
  ('USA', 2020, 4, 4, 5),
  ('USA', 2020, 5, 5, 5),
  ('USA', 2020, 5, 5, 5),
  ('USA', 2020, 1, 1, 5),
  ('USA', 2020, 3, 3, 5),
  ('USA', 2020, 4, 2, 5),
  ('USA', 2020, 1, 1, 5),
  ('USA', 2020, 2, 2, 5),
  ('Canada', 2020, 1, 1, 3),
  ('Canada', 2020, 2, 2, 3),
  ('Canada', 2020, 4, 4, 3),
  ('Canada', 2020, 3, 4, 3),
  ('Canada', 2020, 3, 4, 3),
  ('Canada', 2020, 3, 4, 3),
  ('Canada', 2020, 5, 4, 3),
  ('Canada', 2020, 5, 4, 5),
  ('Canada', 2020, 5, 4, 5),
('Germany', 2022, 5, 5, 4),
('France', 2020, 1,1, 2),
  ('France', 2020, 1,1, 2),
  ('France', 2020, 3, 2, 2),
  ('France', 2020, 3, 4, 2),
  ('France', 2020, 3, 5, 5),
  ('France', 2020, 3, 4, 4),
  ('France', 2020, 3, 4, 4),
  ('France', 2020, 3, 4, 3),
('UK', 2021, 4, 2, 3),
('Australia', 2022, 3, 3, 4),
('Italy', 2020, 5, 5, 5),
  ('Italy', 2020, 5, 5, 5),
  ('Italy', 2020, 5, 1,1),
   ('Italy', 2020, 4, 4,1),
   ('Italy', 2020, 2, 1,2),
  ('Italy', 2020, 3, 5, 3),
('Spain', 2021, 1, 2, 3),
('Mexico', 2022, 4, 4, 4),
    ('Brazil', 2020, 4, 1, 1),
  ('Brazil', 2020, 4, 1, 1),
  ('Brazil', 2020, 4, 3, 4),
  ('Brazil', 2020, 5, 3, 5),
    ('Brazil', 2020, 5, 3, 5),
  ('Brazil', 2020, 3, 3, 1),
('Brazil', 2020, 2, 3, 1);
-- Add the mode column to the table
select * from df;

Query

-- Parameters
DECLARE @Year INT = 2020; --, @Country varchar(50)= 'Brazil';

WITH ModeData AS (
    SELECT country, 
           a.Mode
    FROM df
    CROSS APPLY (
        SELECT TOP 1 Mode, COUNT(*) AS cnt
        FROM (VALUES (val1), (val2), (val3)) AS t(Mode)
        GROUP BY Mode
        ORDER BY COUNT(*) DESC
    ) a
  where year=@year --and  country=@country 
)

-- Calculate proportions and map modes to labels
, Proportions AS (
    SELECT country, 
           CASE 
               WHEN Mode = 1 THEN 'Very Dissatisfied'
               WHEN Mode = 2 THEN 'Dissatisfied'
               WHEN Mode = 3 THEN 'Neutral'
               WHEN Mode = 4 THEN 'Satisfied'
               WHEN Mode = 5 THEN 'Very Satisfied'
           END AS SatisfactionLevel,
           COUNT(*) * 1.0 / SUM(COUNT(*)) OVER (PARTITION BY country) AS Proportion
    FROM ModeData
    GROUP BY country, Mode
)

, Pivoted AS (
-- Pivot the results to get each satisfaction level as a column
SELECT Country, 
       [Very Dissatisfied], 
       [Dissatisfied], 
       [Neutral], 
       [Satisfied], 
       [Very Satisfied]
FROM Proportions
PIVOT (
    MAX(Proportion)
    FOR SatisfactionLevel IN ([Very Dissatisfied], [Dissatisfied], [Neutral], [Satisfied], [Very Satisfied])
) AS p
),

CountryCounts AS (
-- Count of countries from original df table
SELECT Country,
       COUNT(Country) AS Total
FROM df
GROUP BY Country)

SELECT Pivoted.Country,
    [Very Dissatisfied],
    Dissatisfied,
    Neutral,
    Satisfied,
    [Very Satisfied],
    Total
FROM Pivoted
INNER JOIN CountryCounts
ON Pivoted.Country = CountryCounts.Country
ORDER BY Pivoted.Country;
Add a comment

Leave a Reply

Keep Up to Date with the Most Important News

By pressing the Subscribe button, you confirm that you have read and are agreeing to our Privacy Policy and Terms of Use

Discover more from Dev solutions

Subscribe now to keep reading and get access to the full archive.

Continue reading