Group By - Using Absolute Values - tsql

I'm trying to display an accounting report where I show total transactions, voids, the transaction fee, and a total amount for each transaction type.
TransactionType Amount TransactionCount TotalAmount
AgentCredit -$1.00 49 -$49.00
MailFee -$1.25 11 -$13.75
MailFee $1.25 531 $663.75
HardCardFee -$5.00 7 -$35.00
HardCardFee $5.00 239 $1,195.00
QuotaHuntFee -$2.00 1 -$2.00
QuotaHuntFee $2.00 202 $404.00
But what I want to display would look like the following:
TransactionType Amount TransactionCount TotalAmount TotalTrans Voids
AgentCredit -$1.00 49 -$49.00 49 0
MailFee $1.25 520 $650.00 531 11
HardCardFee $5.00 232 $1,160.00 239 7
QuotaHuntFee $2.00 201 $402.00 202 1
Would it be possible to group the transaction types using the absolute value of the Amount and calculate the grand total along with the transaction count & void counts?
This is on SQL Server 2014.
Thanks,

I think this does it
declare #T table (nm varchar(20), prc smallmoney, amt int);
insert into #T values
('AgentCredit', -1.00, 49)
, ('MailFee', -1.25, 11)
, ('MailFee', 1.25, 531)
, ('HardCardFee', -5.00, 7)
, ('HardCardFee', 5.00, 239)
, ('QuotaHuntFee', -2.00, 1)
, ('QuotaHuntFee', 2.00, 202);
with cte as
(
select t.*, (t.prc * t.amt) as net
, count(*) over (partition by t.nm, abs(t.prc)) as cnt
, row_number() over (partition by t.nm, abs(t.prc) order by t.prc) as rn
, lag(t.prc) over (partition by t.nm, abs(t.prc) order by t.prc) as prPrc
, lag(t.amt) over (partition by t.nm, abs(t.prc) order by t.prc) as prAmt
, case when lag(t.prc) over (partition by t.nm, abs(t.prc) order by t.prc) < 0 then t.amt - lag(t.amt) over (partition by t.nm, abs(t.prc) order by t.prc)
else t.amt
end as bal
from #T t
)
select *, ISNULL(t.prAmt, 0) as void
, bal*prc as nnet
from cte t
where t.cnt = 1
or t.rn = 2
order by t.nm, t.prc;

There's a bit of confusion around your results with the data you've provided. HardCardFee has 7 and 23 in the sample you provided, but you want to return 232 for the total?.. MailFee also has some inconsistent math. Also, your 'Voids' returns 0 for the first row; however, it seems as if there are 49?
Perhaps this query could get you started down the right path:
DECLARE #Table TABLE (TransactionType varchar(20), Amount decimal(10,2), TransactionCount int, TotalAmount decimal(10,2))
INSERT #Table
VALUES ('AgentCredit' ,-$1.00 ,49 ,-$49.00 ),
('MailFee' ,-$1.25 ,11 ,-$13.75 ),
('MailFee' ,$1.25 ,531 ,$663.75 ),
('HardCardFee' ,-$5.00 ,7 ,-$35.00 ),
('HardCardFee' ,$5.00 ,23 ,$1195.00 ),
('QuotaHuntFee' ,-$2.00 ,1 ,-$2.00 ),
('QuotaHuntFee' ,$2.00 ,202 ,$404.00 )
;WITH c AS (
SELECT TransactionType, Amount, TransactionCount, TotalAmount,
CASE WHEN t.Amount + ABS(t.Amount) = 0 THEN '-' ELSE '' END +
CAST(t.TransactionCount AS VARCHAR(10)) AS TCount
FROM #Table t
)
SELECT t.TransactionType
,MAX(t.Amount) AS Amount
,SUM(CAST(t.TCount AS INT)) AS TransactionCount
,SUM(t.TotalAmount) AS TotalAmount
,SUM(ABS(t.TransactionCount)) AS TotalTrans
,ABS(MIN(t.TCount)) AS Voids
FROM c t
GROUP BY TransactionType
Again, not sure about some of the values provided.

Related

Moving grouped MEDIAN / Get the MEDIAN of specific months from the past IN T-SQL

Let's say I have a table:
DATE
ID
VALUE
01.2010
1
100
02.2010
1
200
...
...
...
12.2010
1
300
01.2011
1
150
02.2011
1
250
...
...
...
12.2011
1
350
01.2012
1
200
02.2012
1
300
...
...
...
12.2012
1
400
I want to get a median of VALUE grouped by months i.e. get something like
DATE
ID
VALUE
MEDIAN
01.2010
1
100
100
02.2010
1
200
200
...
...
...
...
12.2010
1
300
300
01.2011
1
150
125 = (100+150)/2
02.2011
1
250
225 = (200+250)/2
...
...
...
...
12.2011
1
350
325 = (300+350)/2
01.2012
1
200
150
02.2012
1
300
250
...
...
...
...
12.2012
1
400
350
I have more ID in table so I would like to get this result for every ID.
I have tried doing
SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY VALUE) OVER (PARTITION BY Id, MONTH(Date) ORDER BY Date ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
but I get "The function 'PERCENTILE_CONT' may not have a window frame.
I've also tried the following (but also without any results):
SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY VALUE)
OVER (PARTITION BY Id, MONTH(Date))
FROM tab1 LEFT JOIN tab2
ON tab1.key = tab2.key
WHERE tab1.Date BETWEEN Min(Date) AND tab2.Date
EDIT
So far I have resolved it with
SELECT (CASE WHEN Date =2010 THEN PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY CASE WHEN Date = 2010 THEN VALUE ELSE NULL) OVER (PARTITION BY Id, MONTH(Date)) ELSE 0 END) +
(CASE WHEN Date =2011 THEN PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY CASE WHEN Date <= 2011 THEN VALUE ELSE NULL) OVER (PARTITION BY Id, MONTH(Date)) ELSE 0 END) +
(CASE WHEN Date =2012 THEN PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY CASE WHEN Date <= 2012 THEN VALUE ELSE NULL) OVER (PARTITION BY Id, MONTH(Date)) ELSE 0 END)
FROM tab1
But to be honest, I would like to have an resolution without assumption of a priori knowledge of dates. I've thought about WHILE LOOP and updating column while #MinYear <= #MaxYear where in every iteration #MinYear = #MinYear+1 but in this case I would have to create temporary tables which I'm trying to avoid.
My idea is to use (Value1+value2)/2 as median as your requirement is little complicated.
CREATE TABLE MedianData
(
[Date] VARCHAR(100)
,ID INT
,[Value] INT
)
INSERT INTO MedianData VALUES ('01.2010', 1, 100)
,('02.2010', 1, 200)
,('12.2010', 1, 300)
,('01.2011', 1, 150)
,('02.2011', 1, 250)
,('12.2011', 1, 350)
,('01.2012', 1, 200)
,('02.2012', 1, 300)
,('12.2012', 1, 400)
SELECT *
,ROW_NUMBER() OVER ( PARTITION BY Substring([Date],1,2 ) ORDER BY [Date] ) AS [row]
,Substring([Date],1,2 ) as [MONTH]
INTO #Temp_tbl2
FROM MedianData
SELECT
A.Date
,A.ID
,A.[Value]
--Logic is applied here. I used (Value1+value2)/2 as median
,CASE WHEN A.[row] = 3 THEN ( A.[Value] + ( SELECT T.[Value] FROM #Temp_tbl2
T where T.[MONTH] = Substring(A.[Date],1,2 ) AND T.[row] = 1 ) )/2
WHEN A.[row] != 1 THEN (A.total/2)
ELSE A.total END as [Median]
INTO #Temp_table
FROM
(
SELECT *
,ROW_NUMBER() OVER ( PARTITION BY Substring([Date],1,2 ) ORDER BY [Date] ) AS [row]
,SUM ([Value] ) OVER ( PARTITION BY Substring([Date],1,2 ) ORDER BY [Date] ) AS [total]
FROM MedianData
) AS A
--to make the table data order
SELECT MedianData.*, #Temp_table.Median
FROM MedianData
INNER JOIN #Temp_table
ON MedianData.[Date] = #Temp_table.[Date]
drop table #Temp_table
drop table #Temp_tbl2

Postgres - Update running count whenever row meets a certain condition

I have a table with the following entries in them
id price quantity
1. 10 75
2. 10 75
3. 10 -150
4. 10 75
5. 10 -75
What I need to do is to update each row with a number that is the number of times the running total has been 0. In the above example, the cumulative totals would be
id. cum_total
1. 750
2. 1500
3. 0
4. 750
5. 0
Desired result
id price quantity seq
1. 10 75 1
2. 10 75 1
3. 10 -150 1
4. 10 75 2
5. 10 -75 2
I'm now lost in a spiral of CTEs and window functions and figured I'd ask the experts.
Thanks in advance :-)
Here is one option using analytic functions:
WITH cte AS (
SELECT *, CASE WHEN SUM(price*quantity) OVER (ORDER BY id) = 0 THEN 1 ELSE 0 END AS price_sum
FROM yourTable
),
cte2 AS (
SELECT *, LAG(price_sum, 1, 0) OVER (ORDER BY id) price_sum_lag
FROM cte
)
SELECT id, price, quantity, 1 + SUM(price_sum_lag) OVER (ORDER BY id) cumulative_total
FROM cte2
ORDER BY id;
Demo
You may try running each CTE in succession to see how the logic is working.
With window functions:
SELECT id, price, quantity,
coalesce(
sum(CASE WHEN iszero THEN 1 ELSE 0 END)
OVER (ORDER BY id
ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING),
0
) + 1 AS batch
FROM (SELECT id, price, quantity,
sum(price * quantity) OVER (ORDER BY id) = 0 AS iszero
FROM mytable) AS subq;

Find date sequence in PostgreSQL

I'm trying to find the maximum sequence of days by customer in my data. I want to understand what is the max sequence of days that specific customer made. If someone enter to my app in the 25/8/16 AND 26/08/16 AND 27/08/16 AND 01/09/16 AND 02/09/16 - The max sequence will be 3 days (25,26,27).
In the end (The output) I want to get two fields: custid | MaxDaySequence
I have the following fields in my data table: custid | orderdate(timestemp)
For exmple:
custid orderdate
1 25/08/2007
1 03/10/2007
1 13/10/2007
1 15/01/2008
1 16/03/2008
1 09/04/2008
2 18/09/2006
2 08/08/2007
2 28/11/2007
2 04/03/2008
3 27/11/2006
3 15/04/2007
3 13/05/2007
3 19/06/2007
3 22/09/2007
3 25/09/2007
3 28/01/2008
I'm using PostgreSQL 2014.
Thanks
Trying:
select custid, max(num_days) as longest
from (
select custid,rn, count (*) as num_days
from (
select custid, date(orderdate),
cast (row_number() over (partition by custid order by date(orderdate)) as varchar(5)) as rn
from table_
) x group by custid, CURRENT_DATE - INTERVAL rn|| ' day'
) y group by custid
Try:
SELECT custid, max( abc ) as max_sequence_of_days
FROM (
SELECT custid, yy, count(*) abc
FROM (
SELECT * ,
SUM( xx ) OVER (partition by custid order by orderdate ) yy
FROM (
select * ,
CASE WHEN
orderdate - lag( orderdate ) over (partition by custid order by orderdate )
<= 1
THEN 0 ELSE 1 END xx
from mytable
) x
) z
GROUP BY custid, yy
) q
GROUP BY custid
Demo: http://sqlfiddle.com/#!15/00422/11
===== EDIT ===========
Got "operator does not exist: interval <= integer"
This means that orderdate column is of type timestamp, not date.
In this case you need to use <= interval '1' day condition instead of <= 1:
Please see this link: https://www.postgresql.org/docs/9.0/static/functions-datetime.html to learn more about date arithmetic in PostgreSQL
Please see this demo:
http://sqlfiddle.com/#!15/7c2200/2
SELECT custid, max( abc ) as max_sequence_of_days
FROM (
SELECT custid, yy, count(*) abc
FROM (
SELECT * ,
SUM( xx ) OVER (partition by custid order by orderdate ) yy
FROM (
select * ,
CASE WHEN
orderdate - lag( orderdate ) over (partition by custid order by orderdate )
<= interval '1' day
THEN 0 ELSE 1 END xx
from mytable
) x
) z
GROUP BY custid, yy
) q
GROUP BY custid

Selecting only the values that are in the 75th percent tile and are above a constraint

I'm trying to get this query to work properly...
select salary from agent
where salary > 75000
ORDER BY salary ASC
LIMIT (select ROUND(count(salary) * .75) as TwentyFifthTile from agent)
some addition information about the rows:
166 rows – 25%
331 rows – 50%
497 rows – 75%
662 rows – 100%
These rows have salary 75,000 plus:
235 / 662 = ~.35
.35 * 662 = ~235 rows.
I'm trying to get the above query to return back all the rows that have salary greater than 75,000 but are still in the first 497 rows. When I run the above query it returns all the rows starting at 75,000 and limited by a 497 row return constraint.
I'm not sure how I can just return salaries of greater than 75,000 that are in the first 497 rows of the limit constraint.
You can divide the total number of rows by the current row number to get this:
select salary
from (
select salary,
count(*) over () as total_count,
row_number() over (order by salary) as rn
from agent
where salary > 75000
) t
where (rn / total_count::numeric) <= 0.75
order by salary asc
Use row_number:
select salary, row_number() over (order by salary) row_num
from agent
where row_num < (select ROUND(count(salary) * .75) from agent)
and salary > 75000

TSQL Insert additional rows

What is the most efficient way to identify and insert rows for the following problem?
Here's my sample data
vId StartDate EndDate Distance
------------------------------------
256 2015-03-04 2015-03-05 365
271 2015-03-04 2015-03-04 86
315 2015-03-05 2015-03-06 254
256 2015-03-07 2015-03-09 150
458 2015-03-10 2015-03-12 141
458 2015-03-15 2015-03-17 85
315 2015-03-15 2015-03-16 76
I want to add additional rows for each vId where the StartDate <> EndDate like follows, so instead of just
315 2015-03-05 2015-03-06 254
256 2015-03-07 2015-03-09 150
I want to show the following
315 2015-03-05 2015-03-06 254
315 2015-03-06 2015-03-06 0
256 2015-03-07 2015-03-09 150
256 2015-03-08 2015-03-09 0
256 2015-03-09 2015-03-09 0
Thanks in advance.
Just a simple insert:
Insert Into Table(vId, StartDate, EndDate, Distance)
Select vId, DateAdd(dd, 1, StartDate), EndDate, 0
From TableName
Where StartDate <> EndDate
If you want just select but not insert then:
Select vId, StartDate, EndDate, Distance
From TableName
Union All
Select vId, DateAdd(dd, 1, StartDate), EndDate, 0
From TableName
Where StartDate <> EndDate
EDIT
This assumes that there are maximum 100 day difference. If you have longer intervals you can add more cross joins to increase possible values:
declare #t table(vId int, StartDate date, EndDate date, Distance int)
insert into #t values
(315, '2015-03-05', '2015-03-06', 254),
(256, '2015-03-07', '2015-03-09', 150)
;with cte as(select row_number() over(order by (select 1)) as rn
from (values(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) t1(n)
cross join (values(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) t2(n)
)
select * from #t
union all
select t1.vId, ca.StartDate, t1.EndDate, 0
from #t t1
cross apply(select dateadd(dd, c.rn, StartDate) as StartDate
from cte c
where dateadd(dd, c.rn, t1.StartDate) <= t1.EndDate) as ca
where t1.StartDate <> t1.EndDate
order by vId, StartDate
See fiddle http://sqlfiddle.com/#!6/9eecb/4641