Postgres - Return the results from multiple rows

Postgres - Return the results from multiple rows - postgresql

I have the below tables:
CREATE SCHEMA pulse;
CREATE TABLE pulse.event(
id integer,
url text,
name text,
event_start date,
event_end date,
sub_type text,
preference jsonb
);
CREATE TABLE pulse.event_meta(
event_id integer,
data json
);
CREATE TABLE pulse.slot_archive(
id integer,
event_id integer,
location_id integer,
date text,
start_time text,
end_time text,
current_registration integer,
preference json
);
CREATE TABLE pulse.event_booking_archive(
id integer,
event_id integer,
slot_id integer,
status text,
data json
);
The below query retrieves the event when the status is canceled (From the event table, preference column) along with additional data from other tables.
Query:
Select
COALESCE((evnt.preference::json #>> '{eventStatus,status}' is not null)::boolean, false) as "eventCancelled",
COALESCE(attendancecount,0) as "attendanceCount",
COALESCE((meta.data ->> 'walkins')::int, 0) as walkins,
COALESCE((meta.data ->> 'attendanceSubmitted')::boolean, false) as "attendanceSubmitted",
meta.data -> 'heroImage' as "heroImage",
meta.data -> 'tileContent' -> 'registrationPage' ->> 'title' as title,
evnt.id as "eventId",
evnt.url as "eventUrl",
evnt.name as name,
evnt.event_start AT TIME ZONE 'America/New_York' as "startTime",
evnt.event_end AT TIME ZONE 'America/New_York' as "endTime",
evnt.sub_type as "subType",
agg_slot.slotDates as slots,
agg_slot.registrationcount as "registrationCount" from pulse.event as evnt
inner join pulse.event_meta meta on evnt.id = meta.event_id
left join (select event_id, COALESCE(sum((data ->> 'attendanceCount')::int),0) as attendancecount from pulse.event_booking_archive where status = 'SUCCESS' group by event_id) as eb
on evnt.id = eb.event_id,
(select event_id, location_id, array_agg(CONCAT_WS(' ', slot.date,slot.start_time,slot.end_time)) as slotDates, sum(current_registration) as registrationCount from pulse.slot_archive as slot
group by slot.event_id, slot.location_id) as agg_slot
where evnt.id = agg_slot.event_id
and evnt.id in (select id from pulse.event where event_end + interval '48h' < now())
and agg_slot.location_id = '3305';
But, I need help in finding out the event whose event location is canceled. i.e All the slots for that particular event location_id in the slot_archive table have "statusMeta": "CS". I'm stuck here. Can someone please assist?
https://www.db-fiddle.com/f/iqxvqrjDrqb8B3tG1xzpHN/14

I rewrote your request to makes it more clear.
So we have :
With booking_archive_success as
(select event_id, COALESCE(sum((data ->> 'attendanceCount')::int),0) as attendancecount
from pulse.event_booking_archive where status = 'SUCCESS'
group by event_id)
, agg_slot as (
select slot.event_id, slot.location_id
, array_agg(CONCAT_WS(' ', slot.date,slot.start_time,slot.end_time)) as slotDates
, sum(slot.current_registration) as registrationCount
, array_agg(slot.preference->'slotStatus'->>'statusMeta') as statuses
from pulse.slot_archive as slot
group by slot.event_id, slot.location_id)
select COALESCE((evnt.preference::json #>> '{eventStatus,status}' is not null)::boolean, false) as "eventCancelled"
, COALESCE(attendancecount,0) as "attendanceCount"
, COALESCE((meta.data ->> 'walkins')::int, 0) as walkins
, COALESCE((meta.data ->> 'attendanceSubmitted')::boolean, false) as "attendanceSubmitted"
, meta.data -> 'heroImage' as "heroImage"
, meta.data -> 'tileContent' -> 'registrationPage' ->> 'title' as title
, evnt.id as "eventId"
, evnt.url as "eventUrl"
, evnt.name as name
, evnt.event_start AT TIME ZONE 'America/New_York' as "startTime"
,evnt.event_end AT TIME ZONE 'America/New_York' as "endTime"
--, evnt.type as type
, evnt.sub_type as "subType"
, agg_slot.slotDates as slots
, agg_slot.registrationcount as "registrationCount"
, agg_slot.statuses
from pulse.event as evnt
inner join pulse.event_meta meta on evnt.id = meta.event_id
left join booking_archive_success as eb on evnt.id = eb.event_id
inner join agg_slot on evnt.id = agg_slot.event_id
where evnt.id in
(select id from pulse.event where event_end + interval '48h' < now())
and not exists (Select 1 from (Select unnest(agg_slot.statuses) as statusMeta from agg_slot a Where a.event_id = evnt.id) t
where statusMeta not like 'CS' or statusMeta is null
)
and agg_slot.location_id = '3305';
Several good habits to take :
Prefer the using of CTE when it is possible. The code is more readable, and easier to conceive
Avoid Cartesian Product in the from with the join condition in the where, so I fixed it by using an inner join clause
To solve (I hope) your problem, I used the unnest function in a "not exists" condition in the where. The goal is to forbid every eventid where one of the statusMeta is not CS

Related

How to get number of consecutive days from current date using postgres?

I want to get the number of consecutive days from the current date using Postgres SQL.
enter image description here
Above is the scenario in which I have highlighted consecutive days count should be like this.
Below is the SQL query which I have created but it's not returning the expected result
with grouped_dates as (
select user_id, created_at::timestamp::date,
(created_at::timestamp::date - (row_number() over (partition by user_id order by created_at::timestamp::date) || ' days')::interval)::date as grouping_date
from watch_history
)
select * , dense_rank() over (partition by grouping_date order by created_at::timestamp::date) as in_streak
from grouped_dates where user_id = 702
order by created_at::timestamp::date
Can anyone please help me to resolve this issue?
If anyhow we can able to apply distinct for created_at field to below query then I will get solutions for my issue.
WITH list AS
(
SELECT user_id,
(created_at::timestamp::date - (row_number() over (partition by user_id order by created_at::timestamp::date) || ' days')::interval)::date as next_day
FROM watch_history
)
SELECT user_id, count(*) AS number_of_consecutive_days
FROM list
WHERE next_day IS NOT NULL
GROUP BY user_id
Does anyone have an idea how to apply distinct to created_at for the above mentioned query ?

To get the "number of consecutive days" for the same user_id :
WITH list AS
(
SELECT user_id
, array_agg(created_at) OVER (PARTITION BY user_id ORDER BY created_at RANGE BETWEEN CURRENT ROW AND '1 day' FOLLOWING) AS consecutive_days
FROM watch_history
)
SELECT user_id, count(DISTINCT d.day) AS number_of_consecutive_days
FROM list
CROSS JOIN LATERAL unnest(consecutive_days) AS d(day)
WHERE array_length(consecutive_days, 1) > 1
GROUP BY user_id
To get the list of "consecutive days" for the same user_id :
WITH list AS
(
SELECT user_id
, array_agg(created_at) OVER (PARTITION BY user_id ORDER BY created_at RANGE BETWEEN CURRENT ROW AND '1 day' FOLLOWING) AS consecutive_days
FROM watch_history
)
SELECT user_id
, array_agg(DISTINCT d.day ORDER BY d.day) AS list_of_consecutive_days
FROM list
CROSS JOIN LATERAL unnest(consecutive_days) AS d(day)
WHERE array_length(consecutive_days, 1) > 1
GROUP BY user_id
full example & result in dbfiddle

PostgreSQL query to detect overlapping time ranges for INTERVAL

I have a table in Postgres which looks like below:
CREATE TABLE my_features
(
id uuid NOT NULL,
feature_id uuid NOT NULL,
begin_time timestamptz NOT NULL,
duration integer NOT NULL
)
For each feature_id there may be multiple rows with time ranges specified by begin_time .. (begin_time + duration). duration is in milliseconds. They may overlap. I'm looking for a fast way to find all feature_ids that have any overlaps.
I have referred to this - Query Overlapping time range which is similar but works on a fixed time end time.
I have tried the below query but it is throwing an error.
Query:
select c1.*
from my_features c1
where exists (select 1
from my_features c2
where tsrange(c2.begin_time, c2.begin_time + '30 minutes'::INTERVAL, '[]') && tsrange(c1.begin_time, c1.begin_time + '30 minutes'::INTERVAL, '[]')
and c2.feature_id = c1.feature_id
and c2.id <> c1.id);
Error:
ERROR: function tsrange(timestamp with time zone, timestamp with time zone, unknown) does not exist
LINE 5: where tsrange(c2.begin_time, c2.begin_time...
I have used a default time interval here because I did not understand how to convert the time into minutes and substitute it with 'n minutes'.

If you need a solution faster than O(n²), then you can use constraints on ranges with btree_gist extension, possibly on a temporary table:
CREATE TEMPORARY TABLE my_features_ranges (
id uuid NOT NULL,
feature_id uuid NOT NULL,
range tstzrange NOT NULL,
EXCLUDE USING GIST (feature_id WITH =, range WITH &&)
);
INSERT INTO my_features_ranges (id, feature_id, range)
select id, feature_id, tstzrange(begin_time, begin_time+duration*'1ms'::interval)
from my_features
on conflict do nothing;
select id from my_features except select id from my_features_ranges;

Using OVERLAPS predicate:
SELECT * -- DISTINCT f1.*
FROM my_features f1
JOIN my_features f2
ON f1.feature_id = f2.feature_id
AND f1.id <> f2.id
AND (f1.begin_time::date, f1.begin_time::date + '30 minutes'::INTERVAL)
OVERLAPS (f2.begin_time::date, f2.begin_time::date + '30 minutes'::INTERVAL);
db<>fiddle demo

Or try this
select c1.*
from jak.my_features c1
where exists (select 1
from jak.my_features c2
where tsrange(c2.begin_time::date, c2.begin_time::date + '30 minutes'::INTERVAL, '[]') && tsrange(c1.begin_time::date, c1.begin_time::date + '30 minutes'::INTERVAL, '[]') and
c2.feature_id = c1.feature_id
and c2.id <> c1.id);

The problem was, I was using tsrange on a column with timezone and for timestamp with timezone, there exist another function called tstzrange
Below worked for me:
EDIT: Added changes suggested by #a_horse_with_no_name
select c1.*
from my_features c1
where exists (select 1
from my_features c2
where tstzrange(c2.begin_time, c2.begin_time + make_interval(secs => c2.duration / 1000), '[]') && tstzrange(c1.begin_time, c1.begin_time + make_interval(secs => c1.duration / 1000), '[]')
and c2.feature_id = c1.feature_id
and c2.id <> c1.id);
However, the part of calculating interval dynamically is still pending

Postgres very hard dynamic select statement with COALESCE

Having a table and data like this
CREATE TABLE solicitations
(
id SERIAL PRIMARY KEY,
name text
);
CREATE TABLE donations
(
id SERIAL PRIMARY KEY,
solicitation_id integer REFERENCES solicitations, -- can be null
created_at timestamp without time zone NOT NULL DEFAULT (now() at time zone 'utc'),
amount bigint NOT NULL DEFAULT 0
);
INSERT INTO solicitations (name) VALUES
('solicitation1'), ('solicitation2');
INSERT INTO donations (created_at, solicitation_id, amount) VALUES
('2018-06-26', null, 10), ('2018-06-26', 1, 20), ('2018-06-26', 2, 30),
('2018-06-27', null, 10), ('2018-06-27', 1, 20),
('2018-06-28', null, 10), ('2018-06-28', 1, 20), ('2018-06-28', 2, 30);
How to make solicitation id's dynamic in following select statement using only postgres???
SELECT
"created_at"
-- make dynamic this begins
, COALESCE("no_solicitation", 0) AS "no_solicitation"
, COALESCE("1", 0) AS "1"
, COALESCE("2", 0) AS "2"
-- make dynamic this ends
FROM crosstab(
$source_sql$
SELECT
created_at::date as row_id
, COALESCE(solicitation_id::text, 'no_solicitation') as category
, SUM(amount) as value
FROM donations
GROUP BY row_id, category
ORDER BY row_id, category
$source_sql$
, $category_sql$
-- parametrize with ids from here begins
SELECT unnest('{no_solicitation}'::text[] || ARRAY(SELECT DISTINCT id::text FROM solicitations ORDER BY id))
-- parametrize with ids from here ends
$category_sql$
) AS ct (
"created_at" date
-- make dynamic this begins
, "no_solicitation" bigint
, "1" bigint
, "2" bigint
-- make dynamic this ends
)
The select should return data like this
created_at no_solicitation 1 2
____________________________________
2018-06-26 10 20 30
2018-06-27 10 20 0
2018-06-28 10 20 30
The solicitation ids that should parametrize select are the same as in
SELECT unnest('{no_solicitation}'::text[] || ARRAY(SELECT DISTINCT id::text FROM solicitations ORDER BY id))
One can fiddle the code here

I decided to use json, which is much simpler then crosstab
WITH
all_solicitation_ids AS (
SELECT
unnest('{no_solicitation}'::text[] ||
ARRAY(SELECT DISTINCT id::text FROM solicitations ORDER BY id))
AS col
)
, all_days AS (
SELECT
-- TODO: compute days ad hoc, from min created_at day of donations to max created_at day of donations
generate_series('2018-06-26', '2018-06-28', '1 day'::interval)::date
AS col
)
, all_days_and_all_solicitation_ids AS (
SELECT
all_days.col AS created_at
, all_solicitation_ids.col AS solicitation_id
FROM all_days, all_solicitation_ids
ORDER BY all_days.col, all_solicitation_ids.col
)
, donations_ AS (
SELECT
created_at::date as created_at
, COALESCE(solicitation_id::text, 'no_solicitation') as solicitation_id
, SUM(amount) as amount
FROM donations
GROUP BY created_at, solicitation_id
ORDER BY created_at, solicitation_id
)
, donations__ AS (
SELECT
all_days_and_all_solicitation_ids.created_at
, all_days_and_all_solicitation_ids.solicitation_id
, COALESCE(donations_.amount, 0) AS amount
FROM all_days_and_all_solicitation_ids
LEFT JOIN donations_
ON all_days_and_all_solicitation_ids.created_at = donations_.created_at
AND all_days_and_all_solicitation_ids.solicitation_id = donations_.solicitation_id
)
SELECT
jsonb_object_agg(solicitation_id, amount) ||
jsonb_object_agg('date', created_at)
AS data
FROM donations__
GROUP BY created_at
which results
data
______________________________________________________________
{"1": 20, "2": 30, "date": "2018-06-28", "no_solicitation": 10}
{"1": 20, "2": 30, "date": "2018-06-26", "no_solicitation": 10}
{"1": 20, "2": 0, "date": "2018-06-27", "no_solicitation": 10}
Thought its not the same that I requested.
It returns only data column, instead of date, no_solicitation, 1, 2, ...., to do so I need to use json_to_record, but I dont know how to produce its as argument dynamically

SQL Dynamic partition in count function

I have a stored procedure that is being used with reporting services and the report has a few user filters that are passing values into the stored procedure. Because of these parameters the count for what is displayed changes so I want to use them in the partition to change the count. I tried the following which does not work.
EDIT:
The count is done on ss_number but not on #SearchBy. So if I have
SS# | Name | City | Amount
123456789 | Mike Smith | Trenton | 100.00
123456789 | Mike Smith | Trenton | 200.00
123456789 | Mike Smith | Jackson | 100.00
My count is 3 even though #SearchBy = City and I am filtering on Trenton.
CASE WHEN #SearchBy = 'Product Name' THEN count(ss_number) OVER (PARTITION BY ss_number, #SearchBy)
Right now I am using a case statement but it has slowed my query down considerably.
Here is the code without the case statement attempting to use the variable in my partition.
CREATE PROCEDURE [dbo].[sp_My_sp]
#SearchBy VARCHAR(MAX),
#SearchString VARCHAR(MAX),
#SearchNum Int,
#ClaimDate Datetime2
AS
WITH MyCTE AS
(
SELECT val.Claim_date
, val.Claim_Status
, val.Status_Desc
, ES_Claim_Status
, val.ss_number
, val.name_field1
, val.street_add1
, val.street_add2
, val.city
, val.state
, val.zip_code_pre
, val.reference_number
, val.Game_Name
, val.val_agent
, val.home_number
, val.work_phone
, val.county_desc
, o.agent_num
, count(ss_number) OVER (PARTITION BY ss_number, #SearchBy) as count
, prize_amount
FROM Sec_Claims val left outer join vw_owners_concat_agent_num o
ON val.SS_NUMBER = convert(varchar(15),o.SS_NO)
where convert(numeric,prize_amount) >= 600)
SELECT Claim_date
, CASE WHEN agent_num IS NULL THEN 'NO' ELSE 'YES' END as "IsRetailer"
, Claim_Status
, Status_Desc
, ES_Claim_Status
, ss_number
, name_field1
, street_add1
, street_add2
, city
, state
, zip_code_pre
, reference_number
, Game_Name
, val_agent
, home_number
, work_phone
, county_desc
, agent_num
, count
, ROW_NUMBER() OVER(PARTITION BY Name_Field1 ORDER BY Name_Field1) As RowNumber
, convert(decimal(10,2),prize_amount) as prize_amount
, sum(Convert(decimal(9,2),prize_amount)) OVER (PARTITION BY ss_number, Name_Field1) AS prizesum
FROM MyCTE
WHERE
(CASE
WHEN #SearchBy = 'Agent Number' THEN agent_num
WHEN #SearchBy = 'SS#' THEN SS_NUMBER
WHEN #SearchBy = 'Name' THEN Name_Field1
WHEN #SearchBy = 'Address' THEN STREET_ADD1
WHEN #SearchBy = 'City' THEN City
WHEN #SearchBy = 'Claim#' THEN convert(varchar(max),REFERENCE_NUMBER)
WHEN #SearchBy = 'Validating Retailer' THEN convert(varchar(max),VAL_AGENT)
WHEN #SearchBy = 'County' THEN COUNTY_DESC
WHEN #SearchBy = 'Home Phone' THEN convert(varchar(max),HOME_NUMBER)
WHEN #SearchBy = 'Work Phone' THEN convert(varchar(max),WORK_PHONE)
WHEN #SearchBy = 'Game Name' THEN GAME_NAME
END
like (#SearchString))
and
count>= #SearchNum
and
claim_date > #ClaimDate
ORDER BY ss_number

your query is slowing down because, as you can imagine, the "case" statement needs to be evaluated for every row, being in a Select statement.
This means that if your query is loading many rows you put a lot of overhead.
I see two different approaches to solve your issue. I don't love any of them but they works :)
FIRST SOLUTION
use a IF ... ELSE to cover all you possible inputs.
This means that you'll have to write many times the same query just to differentiate it in one line of code.
This means code becomes rendundant and basically not maintaniable
SECOND SOLUTION
Build a dynamic query.
You can write something like this
DECLARE #myQuery nvarchar(max)
SET #myQuery = 'Select '
if #SearchBy = 'Product Name'
SET #myQuery= #myQuery + 'count(ss_number) OVER (PARTITION BY ss_number, #SearchBy)'
ELSE
.... --put here your second case
END
SET #myQuery= #myQuery + 'FROM ... WHERE ...'
EXECUTE spexecute_sql #myQuery

Try use "Case" inside Over statement:
Declare #SearchBy varchar(255)
SET #SearchBy = 'Product Name'
Select count(ss_number)
OVER (PARTITION BY ss_number,
Case #SearchBy
When 'Product Name' then [Name]
When 'City Name' then [City]
When 'Product Name' then cast([Amount] as varchar(255))
END) t
from (VALUES (123456789,'Mike Smith','Trenton',100.00)
,(123456789,'Mike Smith','Trenton',200.00)
,(123456789,'Mike Smith','Jackson',100.00)
) as t([ss_number],Name,City,Amount)

Judging by what you want, you are first going to need Dynamic SQL. With that being said, you can use row_number(). I still don't understand your extra partition by, but you can just add it to the example below. You can run this, and it will print out the command that is going to be executed.
--create procedure yourProc(
-- #SearchBY varchar(64) --Column to search / filter by
-- ,#SearchString varchar(256) --Values to limit column above by
-- ,#Count int --limit the count per each SSN
-- ,#Date datetime)
--as
--begin
declare #SearchBY varchar(64) = 'City' --Column to search / filter by
declare #SearchString varchar(256) = 'Trenton' --Values to limit column above by
declare #Count int = 3 --limit the count per each SSN
declare #Date datetime = '12/25/2017'
declare #SQL nvarchar(max) =
'with cte as(
select
ss_number
,Name
,City
,Amount
,row_number() over (partition by ss_number order by (select 1)) as RN
from
YourTable
where
SomeDateColumn >= ''' + convert(varchar(10),#Date,112) +
''' and ' + #SearchBY + ' = ''' + #SearchString +
''')
select *
from cte
where ss_number in
(select distinct ss_number from cte where RN >= ' + cast(#Count as varchar(16)) + ')
'
print(#SQL)
--exec(#SQL)
--end
Here is a working example: http://rextester.com/BPWOXH7777

Redshift PostgreSQL Distinct ON Operator

I have a data set that I want to parse for to see multi-touch attribution. The data set is made up by leads who responded to a marketing campaign and their marketing source.
Each lead can respond to multiple campaigns and I want to get their first marketing source and their last marketing source in the same table.
I was thinking I could create two tables and use a select statement from both.
The first table would attempt to create a table with the most recent marketing source from every person (using email as their unique ID).
create table temp.multitouch1 as (
select distinct on (email) email, date, market_source as last_source
from sf.campaignmember
where date >= '1/1/2016' ORDER BY DATE DESC);
Then I would create a table with deduped emails but this time for the first source.
create table temp.multitouch2 as (
select distinct on (email) email, date, market_source as first_source
from sf.campaignmember
where date >= '1/1/2016' ORDER BY DATE ASC);
Finally I wanted to simply select the email and join the first and last market sources to it each in their own column.
select a.email, a.last_source, b.first_source, a.date
from temp.multitouch1 a
left join temp.multitouch b on b.email = a.email
Since distinct on doesn't work on redshift's postgresql version I was hoping someone had an idea to solve this issue in another way.
EDIT 2/22: For more context I'm dealing with people and campaigns they've responded to. Each record is a "campaign response" and every person can have more than one campaign response with multiple sources. I'm trying make a select statement which would dedupe by person and then have columns for the first campaign/marketing source they've responded to and the last campaign/marketing source they've responded to respectively.
EDIT 2/24: Ideal output is a table with 4 columns: email, last_source, first_source, date.
The first and last source columns would be the same for people with only 1 campaign member record and different for everyone who has more than 1 campaign member record.

I believe you could use row_number() inside case expressions like this:
SELECT
email
, MIN(first_source) AS first_source
, MIN(date) first_date
, MAX(last_source) AS last_source
, MAX(date) AS last_date
FROM (
SELECT
email
, date
, CASE
WHEN ROW_NUMBER() OVER (PARTITION BY email ORDER BY date ASC) = 1 THEN market_source
ELSE NULL
END AS first_source
, CASE
WHEN ROW_NUMBER() OVER (PARTITION BY email ORDER BY date DESC) = 1 THEN market_source
ELSE NULL
END AS last_source
FROM sf.campaignmember
WHERE date >= '2016-01-01'
) s
WHERE first_source IS NOT NULL
OR last_source IS NOT NULL
GROUP BY
email
tested here: SQL Fiddle
PostgreSQL 9.3 Schema Setup:
CREATE TABLE campaignmember
(email varchar(3), date timestamp, market_source varchar(1))
;
INSERT INTO campaignmember
(email, date, market_source)
VALUES
('a#a', '2016-01-02 00:00:00', 'x'),
('a#a', '2016-01-03 00:00:00', 'y'),
('a#a', '2016-01-04 00:00:00', 'z'),
('b#b', '2016-01-02 00:00:00', 'x')
;
Query 1:
SELECT
email
, MIN(first_source) AS first_source
, MIN(date) first_date
, MAX(last_source) AS last_source
, MAX(date) AS last_date
FROM (
SELECT
email
, date
, CASE
WHEN ROW_NUMBER() OVER (PARTITION BY email ORDER BY date ASC) = 1 THEN market_source
ELSE NULL
END AS first_source
, CASE
WHEN ROW_NUMBER() OVER (PARTITION BY email ORDER BY date DESC) = 1 THEN market_source
ELSE NULL
END AS last_source
FROM campaignmember
WHERE date >= '2016-01-01'
) s
WHERE first_source IS NOT NULL
OR last_source IS NOT NULL
GROUP BY
email
Results:
| email | first_source | first_date | last_source | last_date |
|-------|--------------|---------------------------|-------------|---------------------------|
| a#a | x | January, 02 2016 00:00:00 | z | January, 04 2016 00:00:00 |
| b#b | x | January, 02 2016 00:00:00 | x | January, 02 2016 00:00:00 |
& a small extension to the request, count the number of contact points.
SELECT
email
, MIN(first_source) AS first_source
, MIN(date) first_date
, MAX(last_source) AS last_source
, MAX(date) AS last_date
, MAX(numof) AS Numberof_Contacts
FROM (
SELECT
email
, date
, CASE
WHEN ROW_NUMBER() OVER (PARTITION BY email ORDER BY date ASC) = 1 THEN market_source
ELSE NULL
END AS first_source
, CASE
WHEN ROW_NUMBER() OVER (PARTITION BY email ORDER BY date DESC) = 1 THEN market_source
ELSE NULL
END AS last_source
, COUNT(*) OVER (PARTITION BY email) as numof
FROM campaignmember
WHERE date >= '2016-01-01'
) s
WHERE first_source IS NOT NULL
OR last_source IS NOT NULL
GROUP BY
email

You can use the good old left join groupwise maximum.
SELECT DISTINCT c1.email, c1.date, c1.market_source
FROM sf.campaignmember c1
LEFT JOIN sf.campaignmember c2
ON c1.email = c2.email AND c1.date > c2.date AND c1.id > c2.id
LEFT JOIN sf.campaignmember c3
ON c1.email = c3.email AND c1.date < c3.date AND c1.id > c3.id
WHERE c1.date >= '1/1/2016' AND c2.date >= '1/1/2016'
AND (c2.email IS NULL OR c3.email IS NULL)
This assumes you have an unique id column, if (date, email) is unique id is not needed.

We Keep Coding

iphone swift flutter scala powershell matlab mongodb postgresql perl eclipse