SQL Group by Month - group-by

I have a table of Sales data with each line as a sale date mm/dd/yy.
I'm trying to create a query so I can see total sales for each month I have.
Would I have to create a column separate that dictates only the month? Or is there a way that it can take the month from that date format?

The short answer is: You don't need a separate column. You can group by the result of a function call.
The details of what that function might depend on your database, how you want results formatted, and performance considerations.
The following both work in Oracle:
SELECT extract(YEAR FROM ae.saledate), extract(MONTH FROM ae.saledate), count(*)
FROM mytable ae
GROUP BY extract(YEAR FROM ae.saledate), extract(MONTH FROM ae.saledate);
SELECT TO_CHAR(ae.saledate, 'YYYY-MM'), count(*)
FROM mytable ae
GROUP BY TO_CHAR(ae.saledate, 'YYYY-MM');
Edited to add versions that ignore year and only look at month (since I was making an assumption above that wasn't actually in the question):
SELECT extract(MONTH FROM ae.saledate), count(*)
FROM mytable ae
GROUP BY extract(MONTH FROM ae.saledate);
SELECT TO_CHAR(ae.saledate, 'MM'), count(*)
FROM mytable ae
GROUP BY TO_CHAR(ae.saledate, 'MM');

The following Query will helpful.
CREATE TABLE #TEMP
(SalesDate DATETime,
Amount float
)
INSERT INTO #TEMP
SELECT '2016-01-12', 12
UNION
SELECT '2016-01-13', 12
UNION
SELECT '2016-02-12', 12
UNION
SELECT '2016-03-12', 12
SELECT CONVERT(VARCHAR(7), SalesDate, 120) AS 'YYYY-MM',
SUM(Amount) as 'Amount'
FROM #Temp
GROUP BY CONVERT(VARCHAR(7), SalesDate, 120)
OUTPUT :
YYYY-MM Amount
------- ----------------------
2016-01 24
2016-02 12
2016-03 12
(3 row(s) affected)
For only Month Wise
SELECT RIGHT(CONVERT(VARCHAR(7), SalesDate, 120), 2) AS 'MM',
sum(Amount) as 'Amount'
from #Temp
group by RIGHT(CONVERT(VARCHAR(7), SalesDate, 120), 2)
Output:
MM Amount
---- ----------------------
01 24
02 12
03 12
(3 row(s) affected)

Related

Cohort Analysis with RedShift by Month

I am trying to build a cohort analysis for monthly retention but experiencing challenge getting the Month Number column right. The month number is supposed to return month(s) user transacted i.e 0 for registration month, 1 for the first month after registration month, 2 for the second month until the last month but currently, it returns negative month numbers in some cells.
It should be like this table:
cohort_month total_users month_number percentage
---------- ----------- -- ------------ ---------
January 100 0 40
January 341 1 90
January 115 2 90
February 103 0 73
February 100 1 40
March 90 0 90
Here is the SQL:
with cohort_items as (
select
extract(month from insert_date) as cohort_month,
msisdn as user_id
from mfscore.t_um_user_detail where extract(year from insert_date)=2020
order by 1, 2
),
user_activities as (
select
A.sender_msisdn,
extract(month from A.insert_date)-C.cohort_month as month_number
from mfscore.t_wm_transaction_logs A
left join cohort_items C ON A.sender_msisdn = C.user_id
where extract(year from A.insert_date)=2020
group by 1, 2
),
cohort_size as (
select cohort_month, count(1) as num_users
from cohort_items
group by 1
order by 1
),
B as (
select
C.cohort_month,
A.month_number,
count(1) as num_users
from user_activities A
left join cohort_items C ON A.sender_msisdn = C.user_id
group by 1, 2
)
select
B.cohort_month,
S.num_users as total_users,
B.month_number,
B.num_users * 100 / S.num_users as percentage
from B
left join cohort_size S ON B.cohort_month = S.cohort_month
where B.cohort_month IS NOT NULL
order by 1, 3
I think the RANK window function is the right solution. So the idea is to assigne a rank to months of user activities for each user, order by year and month.
Something like:
WITH activity_per_user AS (
SELECT
user_id,
event_date,
RANK() OVER (PARTITION BY user_id ORDER BY DATE_PART('year', event_date) , DATE_PART('month', event_date) ASC) AS month_number
FROM user_activities_table
)
RANK number starts from 1, so you may want to substract 1.
Then, you can group by user_id and month_number to get the number of interactions for each user per month from the subscription (adapt to your use case accordingly).
SELECT
user_id,
month_number,
COUNT(1) AS n_interactions
FROM activity_per_user
GROUP BY 1, 2
Here is the documentation:
https://docs.aws.amazon.com/redshift/latest/dg/r_WF_RANK.html

How to show sum per day AND year postgresql

I want to get sum row values per day and per year, and showing on the same row.
The database that the first and second queries get results from from include a table like this (ltg_data):
time lon lat geom
2018-01-30 11:20:21 -105.4333 32.3444 01010....
And then some geometries that I'm joining to.
One query:
SELECT to_char(time, 'MM/DD/YYYY') as day, count(*) as strikes FROM counties JOIN ltg_data on ST_contains(counties.the_geom, ltg_data.ltg_geom) WHERE cwa = 'MFR' and time >= (now() at time zone 'utc') - interval '50500 hours' group by 1;
Results are like:
day strikes
01/28/2018 22
03/23/2018 15
12/19/2017 20
12/20/2017 12
Second query:
SELECT to_char(time, 'YYYY') as year, count(*) as strikes FROM counties JOIN ltg_data on ST_contains(counties.the_geom, ltg_data.ltg_geom) WHERE cwa = 'MFR' and time >= (now() at time zone 'utc') - interval '50500 hours' group by 1;
Results are like:
year strikes
2017 32
2018 37
What I'd like is:
day daily_strikes year yearly_strikes
01/28/2018 22 2018 37
03/23/2018 15 2018 37
12/19/2017 20 2017 32
12/20/2017 12 2017 32
I found that union all shows the year totals at the very bottom, but I'd like to have the results horizontally, even if there are repeat yearly totals. Thanks for any help!
You can try this kind of approach. It's not very optimal but at lease works:
I have a test table like this:
postgres=# select * from test;
d | v
------------+---
2001-02-16 | a
2002-02-16 | a
2002-02-17 | a
2002-02-17 | a
(4 wiersze)
And query:
select
q.year,
sum(q.countPerDay) over (partition by extract(year from q.day)),
q.day,
q.countPerDay
from (
select extract('year' from d) as year, date_trunc('day', d) as day, count(*) as countPerDay from test group by day, year
) as q
So the result looks like this:
2001 | 1 | 2001-02-16 00:00:001 | 1
2002 | 3 | 2002-02-16 00:00:001 | 1
2002 | 3 | 2002-02-17 00:00:001 | 2
create table strikes (game_date date,
strikes int
) ;
insert into strikes (game_date, strikes)
values ('01/28/2018', 22),
('03/23/2018', 15),
('12/19/2017', 20),
('12/20/2017', 12)
;
select * from strikes ;
select game_date, strikes, sum(strikes) over(partition by extract(year from game_date) ) as sum_stikes_by_year
from strikes ;
"2017-12-19" 20 "32"
"2017-12-20" 12 "32"
"2018-01-28" 22 "37"
"2018-03-23" 15 "37"
This application of aggregation is known as "windowing" functions or analytic functions:
PostgreSQL Docs
---- EDIT --- based on comments...
create table strikes_tally (strike_time timestamp,
lat varchar(10),
long varchar(10),
geom varchar(10)
) ;
insert into strikes_tally (strike_time, lat, long, geom)
values ('2018-01-01 12:43:00', '100.1', '50.8', '1234'),
('2018-01-01 12:44:00', '100.1', '50.8', '1234'),
('2018-01-01 12:45:00', '100.1', '50.8', '1234'),
('2018-01-02 20:01:00', '100.1', '50.8', '1234'),
('2018-01-02 20:02:00', '100.1', '50.8', '1234'),
('2018-01-02 22:03:00', '100.1', '50.8', '1234') ;
select to_char(strike_time, 'dd/mm/yyyy') as strike_date,
count(strike_time) over(partition by to_char(strike_time, 'dd/mm/yyyy')) as daily_strikes,
to_char(strike_time, 'yyyy') as year,
count(strike_time) over(partition by to_char(strike_time, 'yyyy') ) as yearly_strikes
from strikes_tally
;

postgresql: Group select by year and month from epoch time

I need to get an output from the postgresql db as follows:
2016
Nov
Dec
2017
Jan
Feb
Mar
I've been googling, testing, playing but hasn't come close.
So I tuen to you guys that will show me what nord I am!
I do know how to do this in mysql but postgres no.
Oh yeah it is Postgres 9.6.
Not sure, but may be you need this ?
with your_table(dt) as(
select '2016-11-01'::date union all
select '2016-11-02'::date union all
select '2016-12-01'::date union all
select '2017-01-01'::date union all
select '2017-02-01'::date union all
select '2017-02-02'::date union all
select '2017-03-01'::date union all
select '2017-10-01'::date union all
select '2017-11-01'::date union all
select '2018-02-01'::date
)
-- Above is just table simulation, actual query is below:
select case when month_name is null then y::text else month_name end from (
select extract(year from dt) as y, 0 as month_number, null as month_name from your_table group by extract(year from dt)
union all
select extract(year from dt), extract(month from dt) as month_number, '--'||to_char(min(dt), 'Mon') as month_name from your_table
group by extract(year from dt), extract(month from dt)
) t
order by y, month_number

Insert subquery date according to day

I would like to insert subquery a date based on it day. Plus, each date can only be used four times. Once it reached fourth times, the fifth value will use another date of same day. In other word, use date of Monday of next week. Example, Monday with 6 JUNE 2016 to Monday with 13 JUNE 2016 (you may check the calendar).
I have a query of getting a list of date based on presentationdatestart and presentationdateend from presentation table:
select a.presentationid,
a.presentationday,
to_char (a.presentationdatestart + delta, 'DD-MM-YYYY', 'NLS_CALENDAR=GREGORIAN') list_date
from presentation a,
(select level - 1 as delta
from dual
connect by level - 1 <= (select max (presentationdateend - presentationdatestart)
from presentation))
where a.presentationdatestart + delta <= a.presentationdateend
and a.presentationday = to_char(a.presentationdatestart + delta, 'fmDay')
order by a.presentationdatestart + delta,
a.presentationid; --IMPORTANT!!!--
For example,
presentationday presentationdatestart presentationdateend
Monday 01-05-2016 04-06-2016
Tuesday 01-05-2016 04-06-2016
Wednesday 01-05-2016 04-06-2016
Thursday 01-05-2016 04-06-2016
The query result will list all possible dates between 01-05-2016 until 04-06-2016:
Monday 02-05-2016
Tuesday 03-05-2016
Wednesday 04-05-2016
Thursday 05-05-2016
....
Monday 30-05-2016
Tuesday 31-05-2016
Wednesday 01-06-2016
Thursday 02-06-2016 (20 rows)
This is my INSERT query :
insert into CSP600_SCHEDULE (studentID,
studentName,
projectTitle,
supervisorID,
supervisorName,
examinerID,
examinerName,
exavailableID,
availableday,
availablestart,
availableend,
availabledate)
select '2013816591',
'mong',
'abc',
'1004',
'Sue',
'1002',
'hazlifah',
2,
'Monday', //BASED ON THIS DAY
'12:00:00',
'2:00:00',
to_char (a.presentationdatestart + delta, 'DD-MM-YYYY', 'NLS_CALENDAR=GREGORIAN') list_date //FOR AVAILABLEDATE
from presentation a,
(select level - 1 as delta
from dual
connect by level - 1 <= (select max (presentationdateend - presentationdatestart)
from presentation))
where a.presentationdatestart + delta <= a.presentationdateend
and a.presentationday = to_char(a.presentationdatestart + delta, 'fmDay')
order by a.presentationdatestart + delta,
a.presentationid;
This query successfully added 20 rows because all possible dates were 20 rows. I would like modify the query to be able to insert based on availableDay and each date can only be used four times for each different studentID.
Possible outcome in CSP600_SCHEDULE (I am removing unrelated columns to ease readability):
StudentID StudentName availableDay availableDate
2013 abc Monday 01-05-2016
2014 def Monday 01-05-2016
2015 ghi Monday 01-05-2016
2016 klm Monday 01-05-2016
2010 nop Tuesday 02-05-2016
2017 qrs Tuesday 02-05-2016
2018 tuv Tuesday 02-05-2016
2019 wxy Tuesday 02-05-2016
.....
2039 rrr Monday 09-05-2016
.....
You may check the calendar :)
I think what you're asking for is to list your students and then batch them up in groups of 4 - each batch is then allocated to a date. Is that right?
In which case something like this should work (I'm using a list of tables as the student names just so I don't need to insert any data into a custom table) :
WITH students AS
(SELECT table_name
FROM all_tables
WHERE rownum < 100
)
SELECT
table_name
,SYSDATE + (CEIL(rownum/4) -1)
FROM
students
;
I hope that helps you
...okay, following your comments, I think this might be a better solution :
WITH students AS
(SELECT table_name student_name
FROM all_tables
WHERE rownum < 100
)
, dates AS
(SELECT TRUNC(sysdate) appointment_date from dual UNION
SELECT TRUNC(sysdate+2) from dual UNION
SELECT TRUNC(sysdate+4) from dual UNION
SELECT TRUNC(sysdate+6) from dual UNION
SELECT TRUNC(sysdate+8) from dual UNION
SELECT TRUNC(sysdate+10) from dual UNION
SELECT TRUNC(sysdate+12) from dual UNION
SELECT TRUNC(sysdate+14) from dual
)
SELECT
s.student_name
,d.appointment_date
FROM
--get a list of students each with a sequential row number, ordered by student name
(SELECT
student_name
,ROW_NUMBER() OVER (ORDER BY student_name) rn
FROM students
) s
--get a list of available dates with a sequential row number, ordered by date
,(SELECT
appointment_date
,ROW_NUMBER() OVER (ORDER BY appointment_date) rn
FROM dates
) d
WHERE 1=1
--allocate the first four students to date rownumber1, next four students to date rownumber 2...
AND CEIL(s.rn/4) = d.rn
;

deriving calendar month from week number

I've had a hunt around for something similar to this but can't find anything.
I have a query that provides the number of transactions that have occurred each day and need to group by year, month, week BUT of course some months span multiple week numbers, eg. Sept. & Oct. 2009.
Take for example week 39 last year (September & October). Thursday is the 1st October therefore 4 days of that week fall in Oct., therefore the volume of transactions for the last 3 days of Sept. should be added to the first week of October's totals? Clear?
For example:
VOLUME----TRANSACTION----YEAR----MONTH----WEEK
1264.1730----53----2009----September----37
2739.7200---109----2009----September----38
522.5500-----21----2009----October----39
1196.6450----51----2009----September----39
2827.9550---113----2009----October----40
2730.4050---110----2009----October----41
3763.7200---154----2009----October----42
3425.6250---137----2009----October----43
3551.8100---143----2009----November--44
2788.0150---113----2009----November--45
The problem is that the calendar is awkward, and there's not much you can do about it. As far as I can see, you have three choices:
Group by year and month. Display the week or weeks in the result but don't group by them.
Group by year and weeks. Display the month or months in the result but don't group by them.
Group by year, month, week, and accept that some of the groups contain less than one week's data. (i.e. what you have now)
From your description it seems like you want option 2:
SELECT year, MIN(month), week, SUM(transaction)
FROM Table1
GROUP BY year, week
Something like this would do:
-- For weeks starting Sunday and ending Saturday, the US default:
SET DATEFIRST 7
-- Alternatively, for weeks starting Saturday and ending Friday:
--SET DATEFIRST 6
SELECT
[Date]
, DATENAME(WEEKDAY,[Date]) AS [DayOfWeek]
, DATEADD(DAY,1-DATEPART(WEEKDAY,[Date]),[Date]) AS WeekStarting
, DATEADD(DAY,7-DATEPART(WEEKDAY,[Date]),[Date]) AS WeekEnding
FROM (
SELECT CONVERT(DATETIME,'20100124') UNION ALL
SELECT CONVERT(DATETIME,'20100125') UNION ALL
SELECT CONVERT(DATETIME,'20100126') UNION ALL
SELECT CONVERT(DATETIME,'20100127') UNION ALL
SELECT CONVERT(DATETIME,'20100128') UNION ALL
SELECT CONVERT(DATETIME,'20100129') UNION ALL
SELECT CONVERT(DATETIME,'20100130') UNION ALL
SELECT CONVERT(DATETIME,'20100131') UNION ALL
SELECT CONVERT(DATETIME,'20100201') UNION ALL
SELECT CONVERT(DATETIME,'20100202') UNION ALL
SELECT CONVERT(DATETIME,'20100203') UNION ALL
SELECT CONVERT(DATETIME,'20100204') UNION ALL
SELECT CONVERT(DATETIME,'20100205') UNION ALL
SELECT CONVERT(DATETIME,'20100206')
) a ([Date])
Then, convert your week start or end date to a month:
SELECT *
, WeekStartingMonthStart = DATEADD(DAY,1-DAY(WeekStarting),WeekStarting)
, WeekStartingMonthEnd = DATEADD(DAY,-1,DATEADD(MONTH,1,DATEADD(DAY,1-DAY(WeekStarting),WeekStarting)))
, WeekEndingMonthStart = DATEADD(DAY,1-DAY(WeekEnding),WeekEnding)
, WeekEndingMonthEnd = DATEADD(DAY,-1,DATEADD(MONTH,1,DATEADD(DAY,1-DAY(WeekEnding),WeekEnding)))
FROM (
SELECT
[Date]
, DATENAME(WEEKDAY,[Date]) AS [DayOfWeek]
, DATEADD(DAY,1-DATEPART(WEEKDAY,[Date]),[Date]) AS WeekStarting
, DATEADD(DAY,7-DATEPART(WEEKDAY,[Date]),[Date]) AS WeekEnding
FROM (
SELECT CONVERT(DATETIME,'20100124') UNION ALL
SELECT CONVERT(DATETIME,'20100125') UNION ALL
SELECT CONVERT(DATETIME,'20100126') UNION ALL
SELECT CONVERT(DATETIME,'20100127') UNION ALL
SELECT CONVERT(DATETIME,'20100128') UNION ALL
SELECT CONVERT(DATETIME,'20100129') UNION ALL
SELECT CONVERT(DATETIME,'20100130') UNION ALL
SELECT CONVERT(DATETIME,'20100131') UNION ALL
SELECT CONVERT(DATETIME,'20100201') UNION ALL
SELECT CONVERT(DATETIME,'20100202') UNION ALL
SELECT CONVERT(DATETIME,'20100203') UNION ALL
SELECT CONVERT(DATETIME,'20100204') UNION ALL
SELECT CONVERT(DATETIME,'20100205') UNION ALL
SELECT CONVERT(DATETIME,'20100206')
) a ([Date])
) a