Trying to get the most frequent values in every month
from tables
inspection table :
CREATE TABLE inspection (lno INT,
idate DATE,
iid INT,
stime TIME,
passed INT,
violations VARCHAR(100),
check (passed = 1 or passed = 0),
PRIMARY KEY(lno,idate),
FOREIGN key (lno) REFERENCES restaurant);
can be ignored - > FOREIGN key (lno) REFERENCES restaurant)
data :
INSERT INTO inspection VALUES
(234,'6.1.2020' ,333, '16:00', 1 ,NULL),
(123,'7.2.2020' ,333 ,'12:15' ,0 ,'rats'),
(234, '7.2.2020', 333, '17:00', 0, 'Bugs'),
(456, '1.3.2021' ,222, '20:00' ,1,NULL),
(234, '10.3.2021', 333, '16:00', 1,NULL),
(567, '24.3.2021' ,333, '17:00' ,1,NULL),
(345, '9.4.2021' ,222, '18:00', 0, 'Rats'),
(345, '30.4.2021' ,222, '18:00' ,1,NULL),
(123,'11.5.2021', 111, '19:40', 0 ,'Mold'),
(567, '15.5.2021' ,111 ,'19:00' ,1,NULL),
(345, '17.5.2021' ,222, '19:00' ,1,NULL),
(456, '19.5.2021', 111 ,'17:00', 0 ,'Bats'),
(123, '13.6.2021' ,222, '13:00', 1,NULL),
(456, '16.6.2021' ,333 ,'21:00' ,0 ,'Mold');
query :
SELECT date_part('month', idate) ,max(iid)
FROM inspector natural join inspection where date_part('year', idate) >= date_part('year', current_date)
GROUP BY date_part('month', idate)
output:
month
id
3
333
4
222
5
222
6
333
expected output -
month
id
3
333
4
222
5
111
6
222
6
333
IMHO you don't need the inspector table for this calculation. A query like this would do:
with t1(month, iid, cnt) as
(
select date_part('month', idate), iid, count(*)
from inspection
where date_part('year', idate) = date_part('year',current_date)
group by date_part('month', idate), iid
),
t2 (month, maxCnt) as
(
select month, max(cnt)
from t1
group by month
)
select t1.month, t1.iid
from t1
inner join t2 on t1.month = t2.month and t1.cnt = t2.maxCnt
order by t1.month, t1.iid;
Here is Dbfiddle demo link.
Here is a an approach without using joins. With the assistance of DATE_PART and RANK
WITH occurrences AS (
SELECT
DATE_PART('MONTH',idate) as month,
iid,
COUNT(iid) cnt
FROM
inspection
WHERE
DATE_PART('YEAR',idate)=2021
GROUP BY
DATE_PART('MONTH',idate),
iid
),
ranked AS (
SELECT
month,
iid,
RANK() OVER (PARTITION BY month ORDER BY cnt DESC) rnk
FROM
occurrences
)
SELECT
month,
iid
FROM
ranked
WHERE
rnk=1
DB Fiddle
Related
I have created a Transaction table with columns card_id, amount, created_at. There may be more than 1 row of one user so I want to return the value card_id, sum(amount), first created_at date of all users.
CREATE TABLE Transactions(card_id int, amount money, created_at date)
INSERT INTO Transactions(card_id, amount, created_at)
SELECT 1, 500, '2016-01-01' union all
SELECT 1, 100, '2016-01-01' union all
SELECT 1, 100, '2016-01-01' union all
SELECT 1, 200, '2016-01-02' union all
SELECT 1, 300, '2016-01-03' union all
SELECT 2, 100, '2016-01-04' union all
SELECT 2, 200, '2016-01-05' union all
SELECT 3, 700, '2016-01-06' union all
SELECT 1, 100, '2016-01-07' union all
SELECT 2, 100, '2016-01-07' union all
SELECT 3, 100, '2016-01-07'
I have created function for that but one of my client says I need query not function. Can anyone here suggest what query to use?
CREATE FUNCTION [dbo].[card_id_data]()
RETURNS #t TABLE
(
card_id text,
amount money,
dateOfFirstTransaction date
)
AS
BEGIN
INSERT INTO #t(card_id)
SELECT DISTINCT(card_id) FROM Transactions;
UPDATE #t
SET dateOfFirstTransaction = b.createdat
FROM
(SELECT DISTINCT(card_id) cardid,
MIN(created_at) createdat
FROM Transactions
WHERE amount < 0
GROUP BY card_id) b
WHERE card_id = b.cardid;
UPDATE #t
SET amount = T.AMOUNT
FROM
(SELECT
card_id AS cardid, SUM(MIN(AMOUNT)) AMOUNT, created_at
FROM Transactions
WHERE amount < 0
GROUP BY card_id, created_at) T
WHERE card_id = cardid
AND dateOfFirstTransaction = created_at;
RETURN
END
I want a result as shown in this screenshot:
You can use DENSE_RANK for this. It will number the rows, taking into account tied places (same dates)
SELECT
t.card_id,
SumAmount = SUM(amount),
FirstDate = MIN(t.created_at)
FROM (
SELECT *,
rn = DENSE_RANK() OVER (PARTITION BY t.card_id ORDER BY t.created_at)
FROM dbo.Transactions t
) t
WHERE t.rn = 1
GROUP BY t.card_id;
If the dates are actually dates and times, and you want to sum the whole day, change t.created_at to CAST(t.created_at AS date)
Try this:
/*
CREATE TABLE dbo.Transactions
(
card_id INT,
amount MONEY,
created_at DATE
);
INSERT INTO dbo.Transactions (card_id, amount, created_at)
VALUES (1, 500, '2016-01-01'),
(1, 100, '2016-01-01'),
(1, 100, '2016-01-01'),
(1, 200, '2016-01-02'),
(1, 300, '2016-01-03'),
(2, 100, '2016-01-04'),
(2, 200, '2016-01-05'),
(3, 700, '2016-01-06'),
(1, 100, '2016-01-07'),
(2, 100, '2016-01-07'),
(3, 100, '2016-01-07');
*/
WITH FirstDatePerCard AS
(
SELECT
card_id,
FirstDate = MIN(created_at)
FROM
dbo.Transactions
GROUP BY
card_id
)
SELECT DISTINCT
t.card_id,
SumAmount = SUM(amount) OVER (PARTITION BY t.card_id),
FirstDate = f.FirstDate
FROM
FirstDatePerCard f
INNER JOIN
dbo.Transactions t ON f.card_id = t.card_id AND f.FirstDate = t.created_at
You'll get an output something like this:
card_id SumAmount FirstDate
--------------------------------
1 700.00 2016-01-01
2 100.00 2016-01-04
3 700.00 2016-01-06
Is that what you're looking for??
UPDATE: OK, so you want to sum the amount only for the first_date, for every card_id - is that correct? (wasn't clear from the original question)
Updated my solution accordingly
I have a table where I register a debt and the paid date:
CREATE TABLE my_table
(
the_debt_id varchar(6) NOT NULL,
the_debt_paid timestamp NOT NULL,
the_debt_due date NOT NULL
)
INSERT INTO my_table
VALUES ('LMUS01', '2019-05-02 09:00:01', '2019-05-02'),
('LMUS01', '2019-06-03 10:45:12', '2019-06-02'),
('LMUS01', '2019-07-01 15:39:58', '2019-07-02'),
('LMUS02', '2019-05-03 19:43:44', '2019-05-07'),
('LMUS02', '2019-06-07 08:37:05', '2019-06-07')
What I want is to aggregate this data per debt_id, payments (the quantity of payments per debt_id), tardiness (if the paid_date > due_date), the first due_date per debt_id and the percentage that each debt was late. This table should give the idea:
the_debt_id payments tardiness first_due_date percentage
LMUS01 3 1 2019-05-02 0.33
LMUS02 2 0 2019-05-07 0
So I tried this so far:
WITH t1 AS(
SELECT the_debt_id, the_debt_due, the_debt_paid,
CASE
WHEN the_debt_paid::date > the_debt_due THEN 1
ELSE 0
END AS tardiness
FROM my_table),
t2 AS(
SELECT the_debt_id,
sum(tardiness) AS tardiness,
count(the_debt_id) AS payments,
first_value(the_debt_due)
FROM t1
GROUP BY the_debt_id),
t3 AS(
SELECT *,
tardiness/payments::float AS percentage
FROM t2)
SELECT * FROM t3
I get an error where it says I need an OVER clause, which means that I need a partition but I'm not sure how to combine GROUP BY and PARTITION. Any help will be greatly appreciated.
Aggregation seems appropriate:
select the_debt_id,
count(*) as payments,
count(*) filter (where the_debt_paid::date > the_debt_due) as num_tardy,
min(the_debt_due) as first_due_date,
avg( (the_debt_paid::date > the_debt_due)::int ) as tardy_ratio
from my_table t
group by the_debt_id;
Here is a db<>fiddle.
I'm working with SQL Teradata and I have a table as such:
cust_id start_dt end_dt amount is_current_y_n
12345 1/8/2018 7/8/2018 7044 N
12345 7/9/2018 7/10/2018 8142 N
12345 7/11/2018 7/13/2018 7643 N
12345 7/14/2018 7/14/2018 8630 N
12345 7/14/2018 7/19/2018 5597 N
12345 7/20/2018 12/31/9999 5680 Y
Another case that I've seen:
cust_id start_dt end_dt amount is_current_y_n
54321 1/1/2015 12/31/9999 8650 Y
I need to calculate with SQL the average amount for the past:
7 days
30 days
90 days
180 days
"Average", meaning that if during the past 7 days the amount changed from 1000 to 2000 in the 3rd day, the average should be:
(1000x3 + 2000x4)/7
I tried to join the table with a date table but it's not very efficient.
Is there any efficient way to achieve that?
It can probably be done via Recursive Common Table Expression Query.
To unfold those date ranges.
With the amounts for each date, the CTE can be joined back to the table to get those averages.
I couldn't test the SQL on a TeraData (don't have it).
But it should almost work on that RDBMS (probably)
WITH RECURSIVE CTE (cust_id, dt, amount, start_dt, end_dt) AS
(
SELECT cust_id, start_dt as dt, amount, start_dt,
case when end_dt - start_dt > 4200 then start_dt else end_dt end
FROM table_as_such
UNION ALL
SELECT cust_id, dt+1, amount, start_dt, end_dt
FROM CTE
WHERE dt < end_dt
)
SELECT t.cust_id, t.start_dt
, ROUND(AVG(case when CTE.dt between t.start_dt - 7 and t.start_dt then CTE.amount end),2) as avg7
, ROUND(AVG(case when CTE.dt between t.start_dt - 30 and t.start_dt then CTE.amount end),2) as avg30
, ROUND(AVG(case when CTE.dt between t.start_dt - 90 and t.start_dt then CTE.amount end),2) as avg90
, ROUND(AVG(case when CTE.dt between t.start_dt - 180 and t.start_dt then CTE.amount end),2) as avg180
FROM table_as_such t
JOIN CTE ON (CTE.cust_id = t.cust_id AND CTE.dt between t.start_dt - 180 and t.start_dt)
GROUP BY t.cust_id, t.start_dt
ORDER BY t.cust_id, t.start_dt;
Sample Data Used:
create table table_as_such (id int not null primary key, cust_id int, start_dt date, end_dt date, amount int, is_current_y_n char(1));
insert into table_as_such values (1,12345,'2018-01-08','2018-07-08',7044,'N');
insert into table_as_such values (2,12345,'2018-07-09','2018-07-10',8142,'N');
insert into table_as_such values (3,12345,'2018-07-11','2018-07-13',7643,'N');
insert into table_as_such values (4,12345,'2018-07-14','2018-07-14',8630,'N');
insert into table_as_such values (5,12345,'2018-07-14','2018-07-19',5597,'N');
insert into table_as_such values (6,12345,'2018-07-20','9999-12-31',5680,'Y');
maybe the temporal feature of Teradata can help you in this case. This is due to the PERIOD datatyp and the function to expand.
check this example for this feature and your intention:
database demo;
create table demoDateExpand (
myID integer
,myUser VARCHAR(100)
,myAmount DECIMAL(10,2)
,startDT DATE
,endDT DATE
) no primary index;
insert into demoDateExpand values (1, 'User01', 2.5, '2018-01-01', '2018-01-05');
insert into demoDateExpand values (2, 'User01', 3.0, '2018-01-08', '2018-01-15');
insert into demoDateExpand values (3, 'User01', 1.5, '2018-01-11', '2018-01-25');
insert into demoDateExpand values (4, 'User02', 2.0, '2018-01-01', '2018-01-15');
insert into demoDateExpand values (5, 'User02', 2.5, '2018-01-05', '2018-01-25');
insert into demoDateExpand values (6, 'User02', 4.5, '2018-01-26', '2018-01-27');
insert into demoDateExpand values (7, 'User03', 1.0, '2018-01-10', '2018-01-15');
insert into demoDateExpand values (8, 'User03', 3.5, '2018-01-16', '2018-01-25');
select myID
,myUser
,myAmount
,startDT
,endDT
,period(startDT, endDT)
from demoDateExpand
;
select myID
,myUser
,myAmount
,BEGIN(myDate)
from demoDateExpand
expand on period(startDT, endDT) AS myDate BY ANCHOR DAY
order by myID, myDate
;
I managed to create my own query with the help of a table with dates:
2017-07-11
2017-07-12
...
My query is:
sel
c.cust_id
,avg(case when c.cal_dt between '2017-07-01' and '2018-01-01' then c.amount end) as avg_180
,avg(case when c.cal_dt between '2017-10-01' and '2018-01-01' then c.amount end) as avg_90
,avg(case when c.cal_dt between '2017-12-01' and '2018-01-01' then c.amount end) as avg_30
,avg(case when c.cal_dt between '2017-12-24' and '2018-01-01' then c.amount end) as avg_7
from
(
sel b.cust_id
,a.cal_dt
,b.amount
from
(
sel *
from CALENDAR_DAILY_TABLE
where cal_dt between '2017-07-01' and '2018-01-01'
) as a
join
(
sel *
from MY_TABLE
where (start_dt > '2017-07-01' or end_dt='9999-12-31')
) as b
on b.start_dt<=a.cal_dt and a.cal_dt<=b.end_dt
) as c
where c.cust_id ='12345'
group by c.cust_id
The result is:
cust_id avg_180 avg_90 avg_30 avg_7
12345 1.34 1.34 1.34 1.34
Thanks!
I browsed SO but could not quite find the exact answer or maybe it was for a different language.
Let's say I have a table, where each row is a record of a trade:
trade_id customer trade_date
1 A 2013-05-01 00:00:00
2 B 2013-05-01 10:00:00
3 A 2013-05-02 00:00:00
4 A 2013-05-05 00:00:00
5 B 2013-05-06 12:00:00
I would like to have the average time between trades, in days or fraction of days, for each customer, and the number of days since last trade. So for instance for customer A, time between trades 1 and 3 is 1 day and between trades 3 and 4 is 3 days, for an average of 2. So the end table would look like something like this (assuming today it's the 2013-05-10):
customer avg_time_btw_trades time_since_last_trade
A 2.0 5.0
B 5.08 3.5
If a customer has only got 1 trade I guess NULL is fine as output.
Not even sure SQL is the best way to do this (I am working with SQL server), but any help is appreciated!
SELECT
customer,
DATEDIFF(second, MIN(trade_date), MAX(trade_date)) / (NULLIF(COUNT(*), 1) - 1) / 86400.0,
DATEDIFF(second, MAX(trade_date), GETDATE() ) / 86400.0
FROM
yourTable
GROUP BY
customer
http://sqlfiddle.com/#!6/eb46e/7
EDIT: Added final field that I didn't notice, apologies.
The following SQL script uses your data and gives the expected results.
DECLARE #temp TABLE
( trade_id INT,
customer CHAR(1),
trade_date DATETIME );
INSERT INTO #temp VALUES (1, 'A', '20130501');
INSERT INTO #temp VALUES (2, 'B', '20130501 10:00');
INSERT INTO #temp VALUES (3, 'A', '20130502');
INSERT INTO #temp VALUES (4, 'A', '20130505');
INSERT INTO #temp VALUES (5, 'B', '20130506 12:00');
DECLARE #getdate DATETIME
-- SET #getdate = getdate();
SET #getdate = '20130510';
SELECT s.customer
, AVG(s.days_btw_trades) AS avg_time_between_trades
, CAST(DATEDIFF(hour, MAX(s.trade_date), #getdate) AS float)
/ 24.0 AS time_since_last_trade
FROM (
SELECT CAST(DATEDIFF(HOUR, t2.trade_date, t.trade_date) AS float)
/ 24.0 AS days_btw_trades
, t.customer
, t.trade_date
FROM #temp t
LEFT JOIN #temp t2 ON t2.customer = t.customer
AND t2.trade_date = ( SELECT MAX(t3.trade_date)
FROM #temp t3
WHERE t3.customer = t.customer
AND t3.trade_date < t.trade_date)
) s
GROUP BY s.customer
You need a date difference between every trade and average them.
select
a.customer
,avg(datediff(a.trade_date, b.trade_date))
,datediff(now(),max(a.trade_date))
from yourTable a, yourTable b
where a.customer = b.customer
and b.trade_date = (
select max(trade_date)
from yourTable c
where c.customer = a.customer
and a.trade_date > c.trade_date)
#gets the one earlier date for every trade
group by a.customer
Just for grins I added a solution that would use CTE's. You could probably use a temp table if the first query is too large. I used #MatBailie creation script for the table:
CREATE TABLE customer_trades (
id INT IDENTITY(1,1),
customer_id INT,
trade_date DATETIME,
PRIMARY KEY (id),
INDEX ix_user_trades (customer_id, trade_date)
)
INSERT INTO
customer_trades (
customer_id,
trade_date
)
VALUES
(1, '2013-05-01 00:00:00'),
(2, '2013-05-01 10:00:00'),
(1, '2013-05-02 00:00:00'),
(1, '2013-05-05 00:00:00'),
(2, '2013-05-06 12:00:00')
;
;WITH CTE as(
select customer_id, trade_date, datediff(hour,trade_date,ISNULL(LEAD(trade_date,1) over (partition by customer_id order by trade_date),GETDATE())) Trade_diff
from customer_trades
)
, CTE2 as
(SELECT customer_id, trade_diff, LAST_VALUE(trade_diff) OVER(Partition by customer_id order by trade_date) Curr_Trade from CTE)
SELECT Customer_id, AVG(trade_diff) AV, Max(Curr_Trade) Curr_Trade
FROM CTE2
GROUP BY customer_id
I have the following table structure and data
TransID TransType Product Qty OrderRef Date
------- --------- ------- --- -------- ----
C123 Credit Prod1 1 Order8 2014-07-08
C123 Credit Prod2 5 Order8 2014-07-08
Inv111 Invoice Prod1 1 Order8 2014-07-08
Inv111 Invoice Prod2 5 Order8 2014-07-08
C999 Credit Prod1 6 Order8 2014-07-08
C999 Credit Prod2 9 Order8 2014-07-08
Inv666 Invoice Prod1 6 Order8 2014-07-08
What I want to do is to be able to identify those Credit records that have an exact matching group of Invoice records. By exact matching I mean the same Product, OrderRef, Qty and Date
In the above data C123 would match with Inv111 but C999 would not match with Inv666 as Inv666 is missing a row
I want to delete both the Credit and Invoice records that have an exact match. There is no link between Invoice and Credits apart from the OrderRef
I've played around with the Except statement, something like this:-
;with CreditToInvoice(Product, Qty, OrderRef, Date)
as
(select Product
,Qty
,OrderRef
,Date)
from #t t1
where t1.TransType = 'Credit'
group by TransactionID, OrderRef, Product, Date, Qty
EXCEPT
select Product
,Qty
,OrderRef
,Date)
from #t t2
where t2.TransType = 'Invoice'
group by TransactionID, OrderRef, Product, Date, Qty
)
which gives me everything in table a not in table b as I would expect
The problem is I really need the TransactionID's so that I can proceed to delete correctly
Is the Except the wrong statement for this? Could I use a merge?
I think a LEFT JOIN and some GROUPing is the most obvious way to deal with this requirement:
SELECT
cr.TransID,
MAX(inv.TransID) as InvoiceID,
MAX(CASE WHEN inv.TransID is NULL THEN 1 ELSE 0 END) as Unsatsified
FROM
#t cr
left join
#t inv
on
cr.Product = inv.Product and
cr.OrderRef = inv.OrderRef and
cr.Qty = inv.Qty and
cr.Date = inv.Date and
inv.TransType = 'Invoice'
WHERE
cr.TransType = 'Credit'
GROUP BY
cr.TransID
HAVING
MAX(CASE WHEN inv.TransID is NULL THEN 1 ELSE 0 END) = 0
That is, we join together all of the matching rows between a credit and an invoice, and then we only select this result if all credit rows achieved a match.
You can place this in a subquery or CTE and perform an unpivot if you need both TransID values in a single column for the next part of your processing.
The resulting TransIDs should be the ones you need to delete
DECLARE #Trans TABLE
([TransID] varchar(6), [TransType] varchar(7), [Product] varchar(5), [Qty] int, [OrderRef] varchar(6), [Date] datetime)
;
INSERT INTO #Trans
([TransID], [TransType], [Product], [Qty], [OrderRef], [Date])
VALUES
('C123', 'Credit', 'Prod1', 1, 'Order8', '2014-07-08 00:00:00'),
('C123', 'Credit', 'Prod2', 5, 'Order8', '2014-07-08 00:00:00'),
('Inv111', 'Invoice', 'Prod1', 1, 'Order8', '2014-07-08 00:00:00'),
('Inv111', 'Invoice', 'Prod2', 5, 'Order8', '2014-07-08 00:00:00'),
('C999', 'Credit', 'Prod1', 6, 'Order8', '2014-07-08 00:00:00'),
('C999', 'Credit', 'Prod2', 9, 'Order8', '2014-07-08 00:00:00'),
('Inv666', 'Invoice', 'Prod1', 6, 'Order8', '2014-07-08 00:00:00')
;
DECLARE #TransUnique TABLE
([TransID] varchar(6)
)
INSERT INTO #TransUnique
SELECT DISTINCT TransID FROM #Trans
--Remove Credits
DELETE t
FROM #TransUnique t
INNER JOIN (
select t1.*,t2.TransID [TransId2],t2.TransType [TransType2]
From #Trans t1
LEFT JOIN #Trans t2 ON t1.OrderRef=t2.OrderRef
AND t1.Date=t2.Date
AND t1.Qty=t2.Qty
AND t1.Product=t2.Product
AND t2.TransType='Invoice'
WHERE t1.TransType='Credit'
) joined ON t.TransID=joined.TransId AND joined.TransId2 IS NULL
--Remove Invoices
DELETE t
FROM #TransUnique t
INNER JOIN (
select t1.*,t2.TransID [TransId2],t2.TransType [TransType2]
From #Trans t1
LEFT JOIN #Trans t2 ON t1.OrderRef=t2.OrderRef
AND t1.Date=t2.Date
AND t1.Qty=t2.Qty
AND t1.Product=t2.Product
AND t2.TransType='Invoice'
LEFT JOIN #TransUnique tu ON tu.TransID=t1.TransID
WHERE t1.TransType='Credit'
AND tu.TransID IS NULL
) joined ON t.TransID=joined.TransId2
SELECT * FROM #TransUnique
If I am reading this correctly, something like this should work.
select TransID, TransType, Product, Qty, OrderRef, Date from #t t1
where t1.TransType = 'Credit'
and exists (
select 1 from #t t2
where t2.TransType = 'Invoice'
and t2.Product = t1.Product
and t2.Qty = t1.Qty
and t2.OrderRef = t1.OrderRef
and t2.Date = t1.Date
)
Try this in order to get transid
Select TransId
From #t t1
join #t t2
on t1.transtype = 'Credit' and t2.transtype = 'Invoice'
and t1.product=t2.product and t1.qty = t2.qty
and t1.orderef=t2.orderref and t1.date = t2.date