group by clause in bigquery not working - group-by

When running this query:
SELECT
date( time_ts ),
count(*)
FROM
[bigquery-public-data:hacker_news.comments]
GROUP BY
date( time_ts)
I get following error:
expression STRFTIME_UTC_USEC(INT64(PARSE_UTC_USEC(STRING([time_ts]))), '%Y-%m-%d') in GROUP BY is invalid
How do i rectify this?

You need to provide an alias to the date(time_ts) field and then group by the alias instead so:
SELECT
date( time_ts ) as date,
count(*) as count
FROM [bigquery-public-data:hacker_news.comments]
group by date

Related

How to get number of consecutive days from current date using postgres?

I want to get the number of consecutive days from the current date using Postgres SQL.
enter image description here
Above is the scenario in which I have highlighted consecutive days count should be like this.
Below is the SQL query which I have created but it's not returning the expected result
with grouped_dates as (
select user_id, created_at::timestamp::date,
(created_at::timestamp::date - (row_number() over (partition by user_id order by created_at::timestamp::date) || ' days')::interval)::date as grouping_date
from watch_history
)
select * , dense_rank() over (partition by grouping_date order by created_at::timestamp::date) as in_streak
from grouped_dates where user_id = 702
order by created_at::timestamp::date
Can anyone please help me to resolve this issue?
If anyhow we can able to apply distinct for created_at field to below query then I will get solutions for my issue.
WITH list AS
(
SELECT user_id,
(created_at::timestamp::date - (row_number() over (partition by user_id order by created_at::timestamp::date) || ' days')::interval)::date as next_day
FROM watch_history
)
SELECT user_id, count(*) AS number_of_consecutive_days
FROM list
WHERE next_day IS NOT NULL
GROUP BY user_id
Does anyone have an idea how to apply distinct to created_at for the above mentioned query ?
To get the "number of consecutive days" for the same user_id :
WITH list AS
(
SELECT user_id
, array_agg(created_at) OVER (PARTITION BY user_id ORDER BY created_at RANGE BETWEEN CURRENT ROW AND '1 day' FOLLOWING) AS consecutive_days
FROM watch_history
)
SELECT user_id, count(DISTINCT d.day) AS number_of_consecutive_days
FROM list
CROSS JOIN LATERAL unnest(consecutive_days) AS d(day)
WHERE array_length(consecutive_days, 1) > 1
GROUP BY user_id
To get the list of "consecutive days" for the same user_id :
WITH list AS
(
SELECT user_id
, array_agg(created_at) OVER (PARTITION BY user_id ORDER BY created_at RANGE BETWEEN CURRENT ROW AND '1 day' FOLLOWING) AS consecutive_days
FROM watch_history
)
SELECT user_id
, array_agg(DISTINCT d.day ORDER BY d.day) AS list_of_consecutive_days
FROM list
CROSS JOIN LATERAL unnest(consecutive_days) AS d(day)
WHERE array_length(consecutive_days, 1) > 1
GROUP BY user_id
full example & result in dbfiddle

Big Query Order By Grouped Field

I have a query that groups by date which works fine.
SELECT EXTRACT(date FROM DATETIME(timestamp, 'US/Eastern')) date, SUM(users) total_users FROM `mydataset.mytable`
GROUP BY EXTRACT(date FROM DATETIME(timestamp, 'US/Eastern'))
but when I try to order by date:
SELECT EXTRACT(date FROM DATETIME(timestamp, 'US/Eastern')) date, SUM(users) total_users FROM `mydataset.mytable`
GROUP BY EXTRACT(date FROM DATETIME(timestamp, 'US/Eastern'))
ORDER BY EXTRACT(date FROM DATETIME(timestamp, 'US/Eastern'));
I get the following error:
SELECT list expression references column timestamp which is neither grouped nor aggregated at [1:35]
The timestamp column is clearly part of the group by and even stranger still is that it works without the ORDER BY clause... What's going on here?
#standardSQL
SELECT
EXTRACT(DATE FROM DATETIME(timestamp, 'US/Eastern')) date,
SUM(users) total_users
FROM `mydataset.mytable`
GROUP BY 1
ORDER BY 1
You can try subselect:
#standardSQL
SELECT
date,
total_users
FROM (
SELECT
EXTRACT(date FROM DATETIME(timestamp,'US/Eastern')) date,
SUM(users) total_users
FROM
`mydataset.mytable`
GROUP BY EXTRACT(date FROM DATETIME(timestamp, 'US/Eastern'))
)
ORDER BY
date

Google BigQuery: Select and group by "yyyy-mm" from date field ("yyyy-mm-dd") or timestamp

I would like to group by "yyyy-mm" from a date field ("yyyy-mm-dd") or timestamp field so that I can pull and group transactional data over multiple years without having to pull separate queries grouping by month for each year.
SELECT
CONCAT(STRING(YEAR(timestamp)),'-',RIGHT(STRING(100 + MONTH(timestamp)), 2)) AS yyyymm,
<any aggregations here>
FROM YourTable
GROUP BY 1
another option:
SELECT
STRFTIME_UTC_USEC(timestamp, "%Y-%m") AS yyyymm,
<any aggregations here>
FROM YourTable
GROUP BY 1
both versions should work with timestamp or date
You can use the following for Standard SQL:
SELECT concat(cast(format_date("%E4Y", cast(current_date() as date)) as string),'-',cast(format_date("%m", cast(current_date() as date)) as string)) as yyyymm, <other aggregations>
FROM <YourTable>
GROUP BY 1;
Just replace the current_date() with your column name containing the timestamp.

Convert Access Crosstab Query to T-SQL Equivalent

TRANSFORM Count(qryEAOCalls.CALLID) AS CountOfCALLID
SELECT qryEAOCalls.TAPSTAFFNAME, Count(qryEAOCalls.CALLID) AS [Total Calls]
FROM qryEAOCalls
WHERE qryEAOCalls.CALLDATE Between #1/1/1900# And Date()
GROUP BY qryEAOCalls.TAPSTAFFNAME
PIVOT qryEAOCalls.Status In ("Unassigned","Open","Closed","Follow-up Needed");
How do I convert this to T-SQL equivalent?
You should be able to use something similar to the following:
select TAPSTAFFNAME,
Unassigned, Open, Closed, [Follow-up Needed],
TotalCalls
from
(
select e.TAPSTAFFNAME,
e.CALLID,
e.Status,
count(*) over(partition by e.TAPSTAFFNAME) TotalCalls
from qryEAOCalls e
where e.CALLDATE >= '1900-01-01'
and e.CALLDATE <= getdate()
) src
pivot
(
count(CALLID)
for status in (Unassigned, Open, Closed, [Follow-up Needed])
) piv

SQL alias gives "invalide column name" for Group By

I have a problem trying to make an alias for new column and using it in GROUP BY clause:
SELECT TOP 100 Percent
count(id) AS [items_by_day],
(SELECT DATEADD(dd, 0, DATEDIFF(dd, 0, [date]))) AS [date_part]
FROM [MyDB].[dbo].[MyTable]
GROUP BY DAY([date]), MONTH([date]), YEAR([date]), date_part
I get the following error:
Msg 207, Level 16, State 1, Line 5
Invalid column name 'date_part'.
How is it possible to solve the problem?
How about a subquery?
See my demo at sqlfiddle
Select Count(*) as nrOfRecords, sq.[items_by_day], sq.[date_part]
From (
SELECT TOP 100 Percent count(id) AS [items_by_day]
,(Select Dateadd(dd, 0, Datediff(dd, 0, [date]))) AS [date_part]
From [MyTable]
Group By id, date
) as sq
Group by sq.[items_by_day], sq.[date_part]
The part (SELECT DateAdd(... DateDiff(...)) seems to return the plain date. Can you explain what i am missing?
You cannot use a column alias in a GROUP BY, aliases are for display, unless when the alias is in a subquery, in this case , it becomes the column name.