Data from last 12 months each month with trailing 12 months - tsql

This is TSQL and I'm trying to calculate repeat purchase rate for last 12 months. This is achieved by looking at sum of customers who have bought more than 1 time last 12 months and the total number of customers last 12 months.
The SQL code below will give me just that; but i would like to dynamically do this for the last 12 months. This is the part where i'm stuck and not should how to best achieve this.
Each month should include data going back 12 months. I.e. June should hold data between June 2018 and June 2018, May should hold data from May 2018 till May 2019.
[Order Date] is a normal datefield (yyyy-mm-dd hh:mm:ss)
DECLARE #startdate1 DATETIME
DECLARE #enddate1 DATETIME
SET #enddate1 = DATEADD(MONTH, DATEDIFF(MONTH, 0, GETDATE())-1, 0) -- Starting June 2018
SET #startdate1 = DATEADD(mm,DATEDIFF(mm,0,GETDATE())-13,0) -- Ending June 2019
;
with dataset as (
select [Phone No_] as who_identifier,
count(distinct([Order No_])) as mycount
from [MyCompany$Sales Invoice Header]
where [Order Date] between #startdate1 and #enddate1
group by [Phone No_]
),
frequentbuyers as (
select who_identifier, sum(mycount) as frequentbuyerscount
from dataset
where mycount > 1
group by who_identifier),
allpurchases as (
select who_identifier, sum(mycount) as allpurchasescount
from dataset
group by who_identifier
)
select sum(frequentbuyerscount) as frequentbuyercount, (select sum(allpurchasescount) from allpurchases) as allpurchasecount
from frequentbuyers
I'm hoping to achieve end result looking something like this:
...Dec, Jan, Feb, March, April, May, June each month holding both values for frequentbuyercount and allpurchasescount.

Here is the code. I made a little modification for the frequentbuyerscount and allpurchasescount. If you use a sumif like expression you don't need a second cte.
if object_id('tempdb.dbo.#tmpMonths') is not null drop table #tmpMonths
create table #tmpMonths ( MonthID datetime, StartDate datetime, EndDate datetime)
declare #MonthCount int = 12
declare #Month datetime = DATEADD(MONTH, DATEDIFF(MONTH, 0, GETDATE()), 0)
while #MonthCount > 0 begin
insert into #tmpMonths( MonthID, StartDate, EndDate )
select #Month, dateadd(month, -12, #Month), #Month
set #Month = dateadd(month, -1, #Month)
set #MonthCount = #MonthCount - 1
end
;with dataset as (
select m.MonthID as MonthID, [Phone No_] as who_identifier,
count(distinct([Order No_])) as mycount
from [MyCompany$Sales Invoice Header]
inner join #tmpMonths m on [Order Date] between m.StartDate and m.EndDate
group by m.MonthID, [Phone No_]
),
buyers as (
select MonthID, who_identifier
, sum(iif(mycount > 1, mycount, 0)) as frequentbuyerscount --sum only if count > 1
, sum(mycount) as allpurchasescount
from dataset
group by MonthID, who_identifier
)
select
b.MonthID
, max(tm.StartDate) StartDate, max(tm.EndDate) EndDate
, sum(b.frequentbuyerscount) as frequentbuyercount
, sum(b.allpurchasescount) as allpurchasecount
from buyers b inner join #tmpMonths tm on tm.MonthID = b.MonthID
group by b.MonthID
Be aware, that the code was tested only syntax-wise.
After the test data, this is the result:
MonthID | StartDate | EndDate | frequentbuyercount | allpurchasecount
-----------------------------------------------------------------------------
2018-08-01 | 2017-08-01 | 2018-08-01 | 340 | 3702
2018-09-01 | 2017-09-01 | 2018-09-01 | 340 | 3702
2018-10-01 | 2017-10-01 | 2018-10-01 | 340 | 3702
2018-11-01 | 2017-11-01 | 2018-11-01 | 340 | 3702
2018-12-01 | 2017-12-01 | 2018-12-01 | 340 | 3703
2019-01-01 | 2018-01-01 | 2019-01-01 | 340 | 3703
2019-02-01 | 2018-02-01 | 2019-02-01 | 2 | 8
2019-03-01 | 2018-03-01 | 2019-03-01 | 2 | 3
2019-04-01 | 2018-04-01 | 2019-04-01 | 2 | 3
2019-05-01 | 2018-05-01 | 2019-05-01 | 2 | 3
2019-06-01 | 2018-06-01 | 2019-06-01 | 2 | 3
2019-07-01 | 2018-07-01 | 2019-07-01 | 2 | 3

Related

Calculate duration of time ranges without overlap in PostgreSQL

I'm on Postgres 13 and have a table like this
| key | from | to
-------------------------------------------
| A | 2022-11-27T08:00 | 2022-11-27T09:00
| B | 2022-11-27T09:00 | 2022-11-27T10:00
| C | 2022-11-27T08:30 | 2022-11-27T10:30
I want to calculate the duration of each record, but without overlaps. So the desired result would be
| key | from | to | duration
----------------------------------------------------------
| A | 2022-11-27T08:00 | 2022-11-27T09:00 | '1 hour'
| B | 2022-11-27T09:00 | 2022-11-27T09:45 | '45 minutes'
| C | 2022-11-27T08:30 | 2022-11-27T10:00 | '15 minutes'
I guess, I need a subquery and subtract the overlap somehow, but how would I factor in multiple overlaps? In the example above C overlaps A and B, so I must subtract 30 minutes from A and then 45 minute from B... But I'm stuck here:
SELECT key, (("to" - "from")::interval - s.overlap) as duration
FROM time_entries, (
SELECT (???) as overlap
) s
select
key,
fromDT,
toDT,
(toDT-fromDT)::interval -
COALESCE((SELECT SUM(LEAST(te2.toDT,te1.toDT)-GREATEST(te2.fromDT,te1.fromDT))::interval
FROM time_entries te2
WHERE (te2.fromDT<te1.toDT or te2.toDT>te1.fromDT)
AND te2.key<te1.key),'0 minutes') as duration
from time_entries te1;
output:
key
fromdt
todt
duration
A
2022-11-27 08:00:00
2022-11-27 09:00:00
01:00:00
B
2022-11-27 09:00:00
2022-11-27 10:00:00
01:00:00
C
2022-11-27 08:30:00
2022-11-27 10:30:00
00:30:00
I renamed the columns from and to to fromDT and toDT to avoid using reserved words.
a, step by step, explanation is in the DBFIDDLE
Another approach.
WITH DATA AS
(SELECT KEY,
FROMDT,
TODT,
MIN(FROMDT) OVER(PARTITION BY FROMDT::DATE
ORDER BY KEY) AS START_DATE,
MAX(TODT) OVER(PARTITION BY FROMDT::DATE
ORDER BY KEY) AS END_DATE
FROM TIME_ENTRIES
ORDER BY KEY) ,STAGING_DATA AS
(SELECT KEY,
FROMDT,
TODT,
COALESCE(LAG(START_DATE) OVER (PARTITION BY FROMDT::DATE
ORDER BY KEY),FROMDT) AS T1_DATE,
COALESCE(LAG(END_DATE) OVER (PARTITION BY FROMDT::DATE
ORDER BY KEY),TODT) AS T2_DATE
FROM DATA)
SELECT KEY,
FROMDT,
TODT,
CASE
WHEN FROMDT = T1_DATE
AND TODT = T2_DATE THEN (TODT - FROMDT) ::Interval
WHEN T2_DATE < TODT THEN (TODT - T2_DATE)::Interval
ELSE (T2_DATE - TODT)::interval
END
FROM STAGING_DATA;

Show complete date range with NULL in PostgreSQL

I'm trying to create this query to get all complete date on range and data with nulls if the date is not exist on the table
For example this is my tbl_example
Original data:
id | userid(str) | comment(str) | mydate(date)
1 0001 sample1 2019-06-20T16:00:00.000Z
2 0002 sample2 2019-06-21T16:00:00.000Z
3 0003 sample3 2019-06-24T16:00:00.000Z
4 0004 sample4 2019-06-25T16:00:00.000Z
5 0005 sample5 2019-06-26T16:00:00.000Z
Then:
select * from tbl_example where mydate between '2019-06-20' AND
DATE('2019-06-20') + interval '5 day')
how to output all the dates on range with possible null like this
Expected output:
id | userid(str) | comment(str) | mydate(date)
1 0001 sample1 2019-06-20T16:00:00.000Z
2 0002 sample2 2019-06-21T16:00:00.000Z
null null null 2019-06-22T16:00:00.000Z
null null null 2019-06-23T16:00:00.000Z
4 0003 sample3 2019-06-24T16:00:00.000Z
5 0004 sample4 2019-06-25T16:00:00.000Z
This is my sample test environment: http://www.sqlfiddle.com/#!17/f5285/2
OK, just see my SQL as below:
with all_dates as (
select generate_series(min(mydate),max(mydate),'1 day'::interval) as dates from tbl_example
)
,null_dates as (
select
a.dates
from
all_dates a
left join
tbl_example t on a.dates = t.mydate
where
t.mydate is null
)
select null as id, null as userid, null as comment, dates as mydate from null_dates
union
select * from tbl_example order by mydate;
id | userid | comment | mydate
----+--------+---------+---------------------
1 | 0001 | sample1 | 2019-06-20 16:00:00
2 | 0002 | sample1 | 2019-06-21 16:00:00
| | | 2019-06-22 16:00:00
| | | 2019-06-23 16:00:00
3 | 0003 | sample1 | 2019-06-24 16:00:00
4 | 0004 | sample1 | 2019-06-25 16:00:00
5 | 0005 | sample1 | 2019-06-26 16:00:00
(7 rows)
Or the generate_series clause you can just write the date arguments you want ,as below:
select generate_series('2019-06-20 16:00:00','2019-06-20 16:00:00'::timestamp + '5 days'::interval,'1 day'::interval) as dates
SELECT id, userid, "comment", d.mydate
FROM generate_series('2019-06-20'::date, '2019-06-25'::date, INTERVAL '1 day') d (mydate)
LEFT JOIN tbl_example ON d.mydate = tbl_example.mydate
Result

How to count rows using a variable date range provided by a table in PostgreSQL

I suspect I require some sort of windowing function to do this. I have the following item data as an example:
count | date
------+-----------
3 | 2017-09-15
9 | 2017-09-18
2 | 2017-09-19
6 | 2017-09-20
3 | 2017-09-21
So there are gaps in my data first off, and I have another query here:
select until_date, until_date - (lag(until_date) over ()) as delta_days from ranges
Which I have generated the following data:
until_date | delta_days
-----------+-----------
2017-09-08 |
2017-09-11 | 3
2017-09-13 | 2
2017-09-18 | 5
2017-09-21 | 3
2017-09-22 | 1
So I'd like my final query to produce this result:
start_date | ending_date | total_items
-----------+-------------+--------------
2017-09-08 | 2017-09-10 | 0
2017-09-11 | 2017-09-12 | 0
2017-09-13 | 2017-09-17 | 3
2017-09-18 | 2017-09-20 | 15
2017-09-21 | 2017-09-22 | 3
Which tells me the total count of items from the first table, per day, based on the custom ranges from the second table.
In this particular example, I would be summing up total_items BETWEEN start AND end (since there would be overlap on the dates, I'd subtract 1 from the end date to not count duplicates)
Anyone know how to do this?
Thanks!
Use the daterange type. Note that you do not have to calculate delta_days, just convert ranges to dataranges and use the operator <# - element is contained by.
with counts(count, date) as (
values
(3, '2017-09-15'::date),
(9, '2017-09-18'),
(2, '2017-09-19'),
(6, '2017-09-20'),
(3, '2017-09-21')
),
ranges (until_date) as (
values
('2017-09-08'::date),
('2017-09-11'),
('2017-09-13'),
('2017-09-18'),
('2017-09-21'),
('2017-09-22')
)
select daterange, coalesce(sum(count), 0) as total_items
from (
select daterange(lag(until_date) over (order by until_date), until_date)
from ranges
) s
left join counts on date <# daterange
where not lower_inf(daterange)
group by 1
order by 1;
daterange | total_items
-------------------------+-------------
[2017-09-08,2017-09-11) | 0
[2017-09-11,2017-09-13) | 0
[2017-09-13,2017-09-18) | 3
[2017-09-18,2017-09-21) | 17
[2017-09-21,2017-09-22) | 3
(5 rows)
Note, that in the dateranges above lower bounds are inclusive while upper bound are exclusive.
If you want to calculate items per day in the dateranges:
select
daterange, total_items,
round(total_items::dec/(upper(daterange)- lower(daterange)), 2) as items_per_day
from (
select daterange, coalesce(sum(count), 0) as total_items
from (
select daterange(lag(until_date) over (order by until_date), until_date)
from ranges
) s
left join counts on date <# daterange
where not lower_inf(daterange)
group by 1
) s
order by 1
daterange | total_items | items_per_day
-------------------------+-------------+---------------
[2017-09-08,2017-09-11) | 0 | 0.00
[2017-09-11,2017-09-13) | 0 | 0.00
[2017-09-13,2017-09-18) | 3 | 0.60
[2017-09-18,2017-09-21) | 17 | 5.67
[2017-09-21,2017-09-22) | 3 | 3.00
(5 rows)

SQL calculating stock per month

I have specific task, and don't know how to realize it. I hope someone can help me =)
I have stock_move table:
product_id |location_id |location_dest_id |product_qty |date_expected |
-----------|------------|-----------------|------------|--------------------|
327 |80 |84 |10 |2014-05-28 00:00:00 |
327 |80 |84 |10 |2014-05-23 00:00:00 |
327 |80 |84 |10 |2014-02-26 00:00:00 |
327 |80 |85 |10 |2014-02-21 00:00:00 |
327 |80 |84 |10 |2014-02-12 00:00:00 |
327 |84 |85 |20 |2014-02-06 00:00:00 |
322 |84 |80 |120 |2015-12-16 00:00:00 |
322 |80 |84 |30 |2015-12-10 00:00:00 |
322 |80 |84 |30 |2015-12-04 00:00:00 |
322 |80 |84 |15 |2015-11-26 00:00:00 |
i.e. it's table of product moves from one warehouse to second.
I can calculate stock at custom date if I use something like this:
select
coalesce(si.product_id, so.product_id) as "Product",
(coalesce(si.stock, 0) - coalesce(so.stock, 0)) as "Stock"
from
(
select
product_id
,sum(product_qty * price_unit) as stock
from stock_move
where
location_dest_id = 80
and date_expected < now()
group by product_id
) as si
full outer join (
select
product_id
,sum(product_qty * price_unit) as stock
from stock_move
where
location_id = 80
and date_expected < now()
group by product_id
) as so
on si.product_id = so.product_id
Result I have current stock:
Product |Stock |
--------|------|
325 |1058 |
313 |34862 |
304 |2364 |
BUT what to do if I need stock per month?
something like this?
Month |Total Stock |
--------|------------|
Jan |130238 |
Feb |348262 |
Mar |2323364 |
How can I sum product qty from start period to end of each month?
I have just one idea - it's use 24 sub queries for get stock per each month (ex. below)
Jan |Feb | Mar |
----|----|-----|
123 |234 |345 |
End after this rotate rows and columns?
I think this's stupid, but I don't know another way... Help me pls =)
Something like this could give you monthly "ending" inventory snapshots. The trick is your data may omit certain months for certain parts, but that part will still have a balance (ie 50 received in January, nothing happened in February, but you still want to show February with a running total of 50).
One way to handle this is to come up with all possible combinations part/dates. I assumed 1/1/14 + 24 months in this example, but that's easily changed in the all_months subquery. For example, you may only want to start with the minimum date from the stock_move table.
with all_months as (
select '2014-01-01'::date + interval '1 month' * generate_series(0, 23) as month_begin
),
stock_calc as (
select
product_id, date_expected,
date_trunc ('month', date_expected)::date as month_expected,
case
when location_id = 80 then -product_qty * price_unit
when location_dest_id = 80 then product_qty * price_unit
else 0
end as qty
from stock_move
union all
select distinct
s.product_id, m.month_begin::date, m.month_begin::date, 0
from
stock_move s
cross join all_months m
),
running_totals as (
select
product_id, date_expected, month_expected,
sum (qty) over (partition by product_id order by date_expected) as end_qty,
row_number() over (partition by product_id, month_expected
order by date_expected desc) as rn
from stock_calc
)
select
product_id, month_expected, end_qty
from running_totals
where
rn = 1

function to calculate aggregate sum count in postgresql

Is there a function that calculates the total count of the complete month like below? I am not sure if postgres. I am looking for the grand total value.
2012-08=# select date_trunc('day', time), count(distinct column) from table_name group by 1 order by 1;
date_trunc | count
---------------------+-------
2012-08-01 00:00:00 | 22
2012-08-02 00:00:00 | 34
2012-08-03 00:00:00 | 25
2012-08-04 00:00:00 | 30
2012-08-05 00:00:00 | 27
2012-08-06 00:00:00 | 31
2012-08-07 00:00:00 | 23
2012-08-08 00:00:00 | 28
2012-08-09 00:00:00 | 28
2012-08-10 00:00:00 | 28
2012-08-11 00:00:00 | 24
2012-08-12 00:00:00 | 36
2012-08-13 00:00:00 | 28
2012-08-14 00:00:00 | 23
2012-08-15 00:00:00 | 23
2012-08-16 00:00:00 | 30
2012-08-17 00:00:00 | 20
2012-08-18 00:00:00 | 30
2012-08-19 00:00:00 | 20
2012-08-20 00:00:00 | 24
2012-08-21 00:00:00 | 20
2012-08-22 00:00:00 | 17
2012-08-23 00:00:00 | 23
2012-08-24 00:00:00 | 25
2012-08-25 00:00:00 | 35
2012-08-26 00:00:00 | 18
2012-08-27 00:00:00 | 16
2012-08-28 00:00:00 | 11
2012-08-29 00:00:00 | 22
2012-08-30 00:00:00 | 26
2012-08-31 00:00:00 | 17
(31 rows)
--------------------------------
Total | 12345
As best I can guess from your question and comments you want sub-totals of the distinct counts by month. You can't do this with group by date_trunc('month',time) because that'll do a count(distinct column) that's distinct across all days.
For this you need a subquery or CTE:
WITH day_counts(day,day_col_count) AS (
select date_trunc('day', time), count(distinct column)
from table_name group by 1
)
SELECT 'Day', day, day_col_count
FROM day_counts
UNION ALL
SELECT 'Month', date_trunc('month', day), sum(day_col_count)
FROM day_counts
GROUP BY 2
ORDER BY 2;
My earlier guess before comments was: Group by month?
select date_trunc('month', time), count(distinct column)
from table_name
group by date_trunc('month', time)
order by time
Or are you trying to include running totals or subtotal lines? For running totals you need to use sum as a window function. Subtotals are just a pain, as SQL doesn't really lend its self to them; you need to UNION two queries then wrap them in an outer ORDER BY.
select
date_trunc('day', time)::text as "date",
count(distinct column) as count
from table_name
group by 1
union
select
'Total',
count(distinct column)
from table_name
group by 1, date_trunc('month', time)
order by "date" = 'Total', 1