I have table in SQL Server called df found here:
-- Parameters
DECLARE @Year INT = 2020; --, @Country varchar(50)= 'Brazil';
WITH ModeData AS (
SELECT country,
a.Mode
FROM df
CROSS APPLY (
SELECT TOP 1 Mode, COUNT(*) AS cnt
FROM (VALUES (val1), (val2), (val3)) AS t(Mode)
GROUP BY Mode
ORDER BY COUNT(*) DESC
) a
where year=@year --and country=@country
)
-- Calculate proportions and map modes to labels
, Proportions AS (
SELECT country,
CASE
WHEN Mode = 1 THEN 'Very Dissatisfied'
WHEN Mode = 2 THEN 'Dissatisfied'
WHEN Mode = 3 THEN 'Neutral'
WHEN Mode = 4 THEN 'Satisfied'
WHEN Mode = 5 THEN 'Very Satisfied'
END AS SatisfactionLevel,
COUNT(*) * 1.0 / SUM(COUNT(*)) OVER (PARTITION BY country) AS Proportion
FROM ModeData
GROUP BY country, Mode
)
-- Pivot the results to get each satisfaction level as a column
SELECT country,
[Very Dissatisfied],
[Dissatisfied],
[Neutral],
[Satisfied],
[Very Satisfied]
FROM Proportions
PIVOT (
MAX(Proportion)
FOR SatisfactionLevel IN ([Very Dissatisfied], [Dissatisfied], [Neutral], [Satisfied], [Very Satisfied])
) AS p
ORDER BY country;
The resulted table is :
| Country | Very Dissatisfied | Dissatisfied | Neutral | Satisfied | Very Satisfied |
|---|---|---|---|---|---|
| Brazil | 0.285714285714 | 0.142857142857 | 0.142857142857 | 0.142857142857 | 0.285714285714 |
| Canada | 0.111111111111 | 0.111111111111 | 0.333333333333 | 0.222222222222 | 0.222222222222 |
| France | 0.250000000000 | 0.125000000000 | 0.250000000000 | 0.250000000000 | 0.125000000000 |
| Italy | 0.166666666666 | 0.166666666666 | 0.166666666666 | 0.166666666666 | 0.333333333333 |
| USA | 0.222222222222 | 0.111111111111 | 0.111111111111 | 0.333333333333 | 0.222222222222 |
I want to calculate the count of each country.How many rows has each country in the table df and add this count as an extra column in the resulted table. Ideally and based on the toy example data the result I want to look like:
| Country | Very Dissatisfied | Dissatisfied | Neutral | Satisfied | Very Satisfied | Count |
|---|---|---|---|---|---|---|
| Brazil | 0.285714285714 | 0.142857142857 | 0.142857142857 | 0.142857142857 | 0.285714285714 | 7 |
| Canada | 0.111111111111 | 0.111111111111 | 0.333333333333 | 0.222222222222 | 0.222222222222 | 9 |
| France | 0.250000000000 | 0.125000000000 | 0.250000000000 | 0.250000000000 | 0.125000000000 | 8 |
| Italy | 0.166666666666 | 0.166666666666 | 0.166666666666 | 0.166666666666 | 0.333333333333 | 6 |
| USA | 0.222222222222 | 0.111111111111 | 0.111111111111 | 0.333333333333 | 0.222222222222 | 9 |
>Solution :
Adding an extra CTE to resolve the country counts, and then including it in the final results.
Sample Table
-- Create the table
CREATE TABLE df (
country VARCHAR(50),
year INT,
val1 INT,
val2 INT,
val3 INT
);
-- Insert 10 rows of data
INSERT INTO df (country, year, val1, val2, val3) VALUES
('USA', 2020, 4, 4, 5),
('USA', 2020, 4, 4, 5),
('USA', 2020, 5, 5, 5),
('USA', 2020, 5, 5, 5),
('USA', 2020, 1, 1, 5),
('USA', 2020, 3, 3, 5),
('USA', 2020, 4, 2, 5),
('USA', 2020, 1, 1, 5),
('USA', 2020, 2, 2, 5),
('Canada', 2020, 1, 1, 3),
('Canada', 2020, 2, 2, 3),
('Canada', 2020, 4, 4, 3),
('Canada', 2020, 3, 4, 3),
('Canada', 2020, 3, 4, 3),
('Canada', 2020, 3, 4, 3),
('Canada', 2020, 5, 4, 3),
('Canada', 2020, 5, 4, 5),
('Canada', 2020, 5, 4, 5),
('Germany', 2022, 5, 5, 4),
('France', 2020, 1,1, 2),
('France', 2020, 1,1, 2),
('France', 2020, 3, 2, 2),
('France', 2020, 3, 4, 2),
('France', 2020, 3, 5, 5),
('France', 2020, 3, 4, 4),
('France', 2020, 3, 4, 4),
('France', 2020, 3, 4, 3),
('UK', 2021, 4, 2, 3),
('Australia', 2022, 3, 3, 4),
('Italy', 2020, 5, 5, 5),
('Italy', 2020, 5, 5, 5),
('Italy', 2020, 5, 1,1),
('Italy', 2020, 4, 4,1),
('Italy', 2020, 2, 1,2),
('Italy', 2020, 3, 5, 3),
('Spain', 2021, 1, 2, 3),
('Mexico', 2022, 4, 4, 4),
('Brazil', 2020, 4, 1, 1),
('Brazil', 2020, 4, 1, 1),
('Brazil', 2020, 4, 3, 4),
('Brazil', 2020, 5, 3, 5),
('Brazil', 2020, 5, 3, 5),
('Brazil', 2020, 3, 3, 1),
('Brazil', 2020, 2, 3, 1);
-- Add the mode column to the table
select * from df;
Query
-- Parameters
DECLARE @Year INT = 2020; --, @Country varchar(50)= 'Brazil';
WITH ModeData AS (
SELECT country,
a.Mode
FROM df
CROSS APPLY (
SELECT TOP 1 Mode, COUNT(*) AS cnt
FROM (VALUES (val1), (val2), (val3)) AS t(Mode)
GROUP BY Mode
ORDER BY COUNT(*) DESC
) a
where year=@year --and country=@country
)
-- Calculate proportions and map modes to labels
, Proportions AS (
SELECT country,
CASE
WHEN Mode = 1 THEN 'Very Dissatisfied'
WHEN Mode = 2 THEN 'Dissatisfied'
WHEN Mode = 3 THEN 'Neutral'
WHEN Mode = 4 THEN 'Satisfied'
WHEN Mode = 5 THEN 'Very Satisfied'
END AS SatisfactionLevel,
COUNT(*) * 1.0 / SUM(COUNT(*)) OVER (PARTITION BY country) AS Proportion
FROM ModeData
GROUP BY country, Mode
)
, Pivoted AS (
-- Pivot the results to get each satisfaction level as a column
SELECT Country,
[Very Dissatisfied],
[Dissatisfied],
[Neutral],
[Satisfied],
[Very Satisfied]
FROM Proportions
PIVOT (
MAX(Proportion)
FOR SatisfactionLevel IN ([Very Dissatisfied], [Dissatisfied], [Neutral], [Satisfied], [Very Satisfied])
) AS p
),
CountryCounts AS (
-- Count of countries from original df table
SELECT Country,
COUNT(Country) AS Total
FROM df
GROUP BY Country)
SELECT Pivoted.Country,
[Very Dissatisfied],
Dissatisfied,
Neutral,
Satisfied,
[Very Satisfied],
Total
FROM Pivoted
INNER JOIN CountryCounts
ON Pivoted.Country = CountryCounts.Country
ORDER BY Pivoted.Country;