TSQL - Exact match on subset of records

TSQL - Exact match on subset of records - sql

I have the following table structure and data
TransID TransType Product Qty OrderRef Date
------- --------- ------- --- -------- ----
C123 Credit Prod1 1 Order8 2014-07-08
C123 Credit Prod2 5 Order8 2014-07-08
Inv111 Invoice Prod1 1 Order8 2014-07-08
Inv111 Invoice Prod2 5 Order8 2014-07-08
C999 Credit Prod1 6 Order8 2014-07-08
C999 Credit Prod2 9 Order8 2014-07-08
Inv666 Invoice Prod1 6 Order8 2014-07-08
What I want to do is to be able to identify those Credit records that have an exact matching group of Invoice records. By exact matching I mean the same Product, OrderRef, Qty and Date
In the above data C123 would match with Inv111 but C999 would not match with Inv666 as Inv666 is missing a row
I want to delete both the Credit and Invoice records that have an exact match. There is no link between Invoice and Credits apart from the OrderRef
I've played around with the Except statement, something like this:-
;with CreditToInvoice(Product, Qty, OrderRef, Date)
as
(select Product
,Qty
,OrderRef
,Date)
from #t t1
where t1.TransType = 'Credit'
group by TransactionID, OrderRef, Product, Date, Qty
EXCEPT
select Product
,Qty
,OrderRef
,Date)
from #t t2
where t2.TransType = 'Invoice'
group by TransactionID, OrderRef, Product, Date, Qty
)
which gives me everything in table a not in table b as I would expect
The problem is I really need the TransactionID's so that I can proceed to delete correctly
Is the Except the wrong statement for this? Could I use a merge?

I think a LEFT JOIN and some GROUPing is the most obvious way to deal with this requirement:
SELECT
cr.TransID,
MAX(inv.TransID) as InvoiceID,
MAX(CASE WHEN inv.TransID is NULL THEN 1 ELSE 0 END) as Unsatsified
FROM
#t cr
left join
#t inv
on
cr.Product = inv.Product and
cr.OrderRef = inv.OrderRef and
cr.Qty = inv.Qty and
cr.Date = inv.Date and
inv.TransType = 'Invoice'
WHERE
cr.TransType = 'Credit'
GROUP BY
cr.TransID
HAVING
MAX(CASE WHEN inv.TransID is NULL THEN 1 ELSE 0 END) = 0
That is, we join together all of the matching rows between a credit and an invoice, and then we only select this result if all credit rows achieved a match.
You can place this in a subquery or CTE and perform an unpivot if you need both TransID values in a single column for the next part of your processing.

The resulting TransIDs should be the ones you need to delete
DECLARE #Trans TABLE
([TransID] varchar(6), [TransType] varchar(7), [Product] varchar(5), [Qty] int, [OrderRef] varchar(6), [Date] datetime)
;
INSERT INTO #Trans
([TransID], [TransType], [Product], [Qty], [OrderRef], [Date])
VALUES
('C123', 'Credit', 'Prod1', 1, 'Order8', '2014-07-08 00:00:00'),
('C123', 'Credit', 'Prod2', 5, 'Order8', '2014-07-08 00:00:00'),
('Inv111', 'Invoice', 'Prod1', 1, 'Order8', '2014-07-08 00:00:00'),
('Inv111', 'Invoice', 'Prod2', 5, 'Order8', '2014-07-08 00:00:00'),
('C999', 'Credit', 'Prod1', 6, 'Order8', '2014-07-08 00:00:00'),
('C999', 'Credit', 'Prod2', 9, 'Order8', '2014-07-08 00:00:00'),
('Inv666', 'Invoice', 'Prod1', 6, 'Order8', '2014-07-08 00:00:00')
;
DECLARE #TransUnique TABLE
([TransID] varchar(6)
)
INSERT INTO #TransUnique
SELECT DISTINCT TransID FROM #Trans
--Remove Credits
DELETE t
FROM #TransUnique t
INNER JOIN (
select t1.*,t2.TransID [TransId2],t2.TransType [TransType2]
From #Trans t1
LEFT JOIN #Trans t2 ON t1.OrderRef=t2.OrderRef
AND t1.Date=t2.Date
AND t1.Qty=t2.Qty
AND t1.Product=t2.Product
AND t2.TransType='Invoice'
WHERE t1.TransType='Credit'
) joined ON t.TransID=joined.TransId AND joined.TransId2 IS NULL
--Remove Invoices
DELETE t
FROM #TransUnique t
INNER JOIN (
select t1.*,t2.TransID [TransId2],t2.TransType [TransType2]
From #Trans t1
LEFT JOIN #Trans t2 ON t1.OrderRef=t2.OrderRef
AND t1.Date=t2.Date
AND t1.Qty=t2.Qty
AND t1.Product=t2.Product
AND t2.TransType='Invoice'
LEFT JOIN #TransUnique tu ON tu.TransID=t1.TransID
WHERE t1.TransType='Credit'
AND tu.TransID IS NULL
) joined ON t.TransID=joined.TransId2
SELECT * FROM #TransUnique

If I am reading this correctly, something like this should work.
select TransID, TransType, Product, Qty, OrderRef, Date from #t t1
where t1.TransType = 'Credit'
and exists (
select 1 from #t t2
where t2.TransType = 'Invoice'
and t2.Product = t1.Product
and t2.Qty = t1.Qty
and t2.OrderRef = t1.OrderRef
and t2.Date = t1.Date
)

Try this in order to get transid
Select TransId
From #t t1
join #t t2
on t1.transtype = 'Credit' and t2.transtype = 'Invoice'
and t1.product=t2.product and t1.qty = t2.qty
and t1.orderef=t2.orderref and t1.date = t2.date

Related

get most frequent values in every month in 2021

Trying to get the most frequent values in every month
from tables
inspection table :
CREATE TABLE inspection (lno INT,
idate DATE,
iid INT,
stime TIME,
passed INT,
violations VARCHAR(100),
check (passed = 1 or passed = 0),
PRIMARY KEY(lno,idate),
FOREIGN key (lno) REFERENCES restaurant);
can be ignored - > FOREIGN key (lno) REFERENCES restaurant)
data :
INSERT INTO inspection VALUES
(234,'6.1.2020' ,333, '16:00', 1 ,NULL),
(123,'7.2.2020' ,333 ,'12:15' ,0 ,'rats'),
(234, '7.2.2020', 333, '17:00', 0, 'Bugs'),
(456, '1.3.2021' ,222, '20:00' ,1,NULL),
(234, '10.3.2021', 333, '16:00', 1,NULL),
(567, '24.3.2021' ,333, '17:00' ,1,NULL),
(345, '9.4.2021' ,222, '18:00', 0, 'Rats'),
(345, '30.4.2021' ,222, '18:00' ,1,NULL),
(123,'11.5.2021', 111, '19:40', 0 ,'Mold'),
(567, '15.5.2021' ,111 ,'19:00' ,1,NULL),
(345, '17.5.2021' ,222, '19:00' ,1,NULL),
(456, '19.5.2021', 111 ,'17:00', 0 ,'Bats'),
(123, '13.6.2021' ,222, '13:00', 1,NULL),
(456, '16.6.2021' ,333 ,'21:00' ,0 ,'Mold');
query :
SELECT date_part('month', idate) ,max(iid)
FROM inspector natural join inspection where date_part('year', idate) >= date_part('year', current_date)
GROUP BY date_part('month', idate)
output:
month
id
3
333
4
222
5
222
6
333
expected output -
month
id
3
333
4
222
5
111
6
222
6
333

IMHO you don't need the inspector table for this calculation. A query like this would do:
with t1(month, iid, cnt) as
(
select date_part('month', idate), iid, count(*)
from inspection
where date_part('year', idate) = date_part('year',current_date)
group by date_part('month', idate), iid
),
t2 (month, maxCnt) as
(
select month, max(cnt)
from t1
group by month
)
select t1.month, t1.iid
from t1
inner join t2 on t1.month = t2.month and t1.cnt = t2.maxCnt
order by t1.month, t1.iid;
Here is Dbfiddle demo link.

Here is a an approach without using joins. With the assistance of DATE_PART and RANK
WITH occurrences AS (
SELECT
DATE_PART('MONTH',idate) as month,
iid,
COUNT(iid) cnt
FROM
inspection
WHERE
DATE_PART('YEAR',idate)=2021
GROUP BY
DATE_PART('MONTH',idate),
iid
),
ranked AS (
SELECT
month,
iid,
RANK() OVER (PARTITION BY month ORDER BY cnt DESC) rnk
FROM
occurrences
)
SELECT
month,
iid
FROM
ranked
WHERE
rnk=1
DB Fiddle

Numbers inputted each month but then don't appear after that (across month) SQL Server

Current SQL fiddle demo: http://sqlfiddle.com/#!18/1163a/1
I am looking for a total of how many items were added in a specific month but then a total of how many were never a part of an order in the months after that.
Tables:
CREATE TABLE Item (
ItemNo varchar(10)
,DateAdded date
);
CREATE TABLE Order1 (
OrderNo int,
ItemNo varchar(10),
OrderDate date
);
INSERT INTO Item (ItemNo, DateAdded)
VALUES ('111', '01-01-17'),
('222', '03-01-17'),
('333', '05-01-17'),
('444', '06-02-17'),
('555', '10-02-17'),
('666', '20-02-17');
INSERT INTO Order1 (ItemNo, OrderDate)
VALUES ('111', '10-01-17'),
('111', '20-02-17'),
('222', '07-05-17'),
('333', '20-01-17'),
('333', '08-03-17'),
('444', '25-01-17');
Currently i have:
SELECT
-- b.OrderDate,
A.DateAdded,
COUNT(DISTINCT A.ItemNo) AS [Items Added],
COUNT(CASE WHEN c.ItemNo IS NULL THEN 1 END) as [Items Never Ordered],
COUNT(A.ItemNo) OVER (Partition by MONTH(B.OrderDate))
FROM Item a
CROSS JOIN (SELECT DISTINCT OrderDate FROM Order1) b
LEFT JOIN Order1 c
ON a.ItemNo = c.ItemNo
AND b.OrderDate = c.OrderDate
GROUP BY A.DateAdded, b.OrderDate
The result i am looking for is:
| DateAdded | Items Added | Items Never Ordered |
|-----------|-------------|---------------------|
| Feb-17 | 3 | 2 |
| Jan-17 | 3 | 0 |
Can anyone advise me on the best way too approach this? Thanks
This is not a duplicate - there were no answers to the previous question because of date issue

Like pointed out by Abdul. Always use the right datatype, a date is not a string.
If you want your data grouped by month, than group by year and month, not by date.
Your cross join did not make sense to me, I left it out.
CREATE TABLE Item (
ItemNo varchar(10)
,DateAdded date
);
CREATE TABLE Order1 (
OrderNo int,
ItemNo varchar(10),
OrderDate date
);
INSERT INTO Item (ItemNo, DateAdded)
VALUES ('111', '2017-01-01'),
('222', '2017-01-03'),
('333', '2017-01-05'),
('444', '2017-02-06'),
('555', '2017-02-10'),
('666', '2017-02-20');
INSERT INTO Order1 (ItemNo, OrderDate)
VALUES ('111', '2017-01-10'),
('111', '2017-02-20'),
('222', '2017-05-07'),
('333', '2017-01-20'),
('333', '2017-03-08'),
('444', '2017-01-25');
SELECT
datepart(year,A.DateAdded) Year,datepart(Month,A.DateAdded) Month,
COUNT(DISTINCT A.ItemNo) AS [Items Added],
COUNT(CASE WHEN c.ItemNo IS NULL THEN 1 END) as [Items Never Ordered]
FROM Item a
LEFT JOIN Order1 c ON a.ItemNo = c.ItemNo
GROUP BY datepart(year,A.DateAdded),datepart(Month,A.DateAdded)

SQL find average time difference between rows for a given category

I browsed SO but could not quite find the exact answer or maybe it was for a different language.
Let's say I have a table, where each row is a record of a trade:
trade_id customer trade_date
1 A 2013-05-01 00:00:00
2 B 2013-05-01 10:00:00
3 A 2013-05-02 00:00:00
4 A 2013-05-05 00:00:00
5 B 2013-05-06 12:00:00
I would like to have the average time between trades, in days or fraction of days, for each customer, and the number of days since last trade. So for instance for customer A, time between trades 1 and 3 is 1 day and between trades 3 and 4 is 3 days, for an average of 2. So the end table would look like something like this (assuming today it's the 2013-05-10):
customer avg_time_btw_trades time_since_last_trade
A 2.0 5.0
B 5.08 3.5
If a customer has only got 1 trade I guess NULL is fine as output.
Not even sure SQL is the best way to do this (I am working with SQL server), but any help is appreciated!

SELECT
customer,
DATEDIFF(second, MIN(trade_date), MAX(trade_date)) / (NULLIF(COUNT(*), 1) - 1) / 86400.0,
DATEDIFF(second, MAX(trade_date), GETDATE() ) / 86400.0
FROM
yourTable
GROUP BY
customer
http://sqlfiddle.com/#!6/eb46e/7
EDIT: Added final field that I didn't notice, apologies.

The following SQL script uses your data and gives the expected results.
DECLARE #temp TABLE
( trade_id INT,
customer CHAR(1),
trade_date DATETIME );
INSERT INTO #temp VALUES (1, 'A', '20130501');
INSERT INTO #temp VALUES (2, 'B', '20130501 10:00');
INSERT INTO #temp VALUES (3, 'A', '20130502');
INSERT INTO #temp VALUES (4, 'A', '20130505');
INSERT INTO #temp VALUES (5, 'B', '20130506 12:00');
DECLARE #getdate DATETIME
-- SET #getdate = getdate();
SET #getdate = '20130510';
SELECT s.customer
, AVG(s.days_btw_trades) AS avg_time_between_trades
, CAST(DATEDIFF(hour, MAX(s.trade_date), #getdate) AS float)
/ 24.0 AS time_since_last_trade
FROM (
SELECT CAST(DATEDIFF(HOUR, t2.trade_date, t.trade_date) AS float)
/ 24.0 AS days_btw_trades
, t.customer
, t.trade_date
FROM #temp t
LEFT JOIN #temp t2 ON t2.customer = t.customer
AND t2.trade_date = ( SELECT MAX(t3.trade_date)
FROM #temp t3
WHERE t3.customer = t.customer
AND t3.trade_date < t.trade_date)
) s
GROUP BY s.customer

You need a date difference between every trade and average them.
select
a.customer
,avg(datediff(a.trade_date, b.trade_date))
,datediff(now(),max(a.trade_date))
from yourTable a, yourTable b
where a.customer = b.customer
and b.trade_date = (
select max(trade_date)
from yourTable c
where c.customer = a.customer
and a.trade_date > c.trade_date)
#gets the one earlier date for every trade
group by a.customer

Just for grins I added a solution that would use CTE's. You could probably use a temp table if the first query is too large. I used #MatBailie creation script for the table:
CREATE TABLE customer_trades (
id INT IDENTITY(1,1),
customer_id INT,
trade_date DATETIME,
PRIMARY KEY (id),
INDEX ix_user_trades (customer_id, trade_date)
)
INSERT INTO
customer_trades (
customer_id,
trade_date
)
VALUES
(1, '2013-05-01 00:00:00'),
(2, '2013-05-01 10:00:00'),
(1, '2013-05-02 00:00:00'),
(1, '2013-05-05 00:00:00'),
(2, '2013-05-06 12:00:00')
;
;WITH CTE as(
select customer_id, trade_date, datediff(hour,trade_date,ISNULL(LEAD(trade_date,1) over (partition by customer_id order by trade_date),GETDATE())) Trade_diff
from customer_trades
)
, CTE2 as
(SELECT customer_id, trade_diff, LAST_VALUE(trade_diff) OVER(Partition by customer_id order by trade_date) Curr_Trade from CTE)
SELECT Customer_id, AVG(trade_diff) AV, Max(Curr_Trade) Curr_Trade
FROM CTE2
GROUP BY customer_id

Any one help me to solve this i try my best but did not solve this?

ItemName Price CreatedDateTime
New Card 50.00 2014-05-26 19:17:09.987
Recharge 110.00 2014-05-26 19:17:12.427
Promo 90.00 2014-05-27 16:17:12.427
Membership 70.00 2014-05-27 16:17:12.427
New Card 50.00 2014-05-26 19:20:09.987
Out Put : Need a query which Sum the sale of Current hour and
sale of item which have maximum sale in that hour in breakdownofSale
Column.
Hour SaleAmount BreakDownOfSale
19 210 Recharge
16 160 Promo

This should do it
create table #t
(
ItemName varchar(50),
Price decimal(18,2),
CreatedDateTime datetime
);
set dateformat ymd;
insert into #t values('New Card', 50.00, '2014-05-26 19:17:09.987');
insert into #t values('Recharge', 110.00, '2014-05-26 19:17:12.427');
insert into #t values('Promo', 90.00, '2014-05-27 16:17:12.427');
insert into #t values('Membership', 70.00, '2014-05-27 16:17:12.427');
insert into #t values('New Card', 50.00, '2014-05-26 19:20:09.987');
with cte as
(
select datepart(hh, CreatedDateTime) as [Hour],
ItemName,
Price,
sum(Price) over (partition by datepart(hh, CreatedDateTime)) SaleAmount,
ROW_NUMBER() over (partition by datepart(hh, CreatedDateTime) order by Price desc) rn
from #t
)
select Hour,
SaleAmount,
ItemName
from cte
where rn = 1

Though i am not clear with the question, based on your desired output, you may use the query as below.
SELECT DATEPART(HOUR,CreatedDateTime) AS Hour, sum(Price) AS Price, ItemName AS BreakDownOfSale from TableName WHERE BY ItemName,DATEPART(HOUR,CreatedDateTime)
Replace table name and column name with the actual one.
Hope this helps!

Here is the sample query.
You can use SQL Server Windows functions to get the result you need.
DECLARE #Table TABLE
(
ItemName NVARCHAR(40),
Price DECIMAL(10,2),
CreatedDatetime DATETIME
)
-- Fill table.
INSERT INTO #Table
( ItemName, Price, CreatedDatetime )
VALUES
( N'New Card' , 50.00 , '2014-05-26 19:17:09.987' ),
( N'Recharge' , 110.00 , '2014-05-26 19:17:12.427' ) ,
( N'Promo' , 90.00 , '2014-05-27 16:17:12.427' ) ,
( N'Membership' , 70.00 , '2014-05-27 16:17:12.427' ) ,
( N'New Card' , 50.00 , '2014-05-26 19:20:09.987' )
-- Check record(s).
SELECT * FROM #Table
-- Get record(s) in required way.
;WITH T1 AS
(
SELECT
DATEPART(HOUR, T.CreatedDatetime) AS Hour,
CONVERT(DATE, T.CreatedDatetime) AS Date,
T.ItemName AS BreakDownOfSales,
-- Date and hour both will give unique record(s)
SUM(Price) OVER (PARTITION BY CONVERT(DATE, T.CreatedDatetime), DATEPART(HOUR, CreatedDateTime)) AS SaleAmount,
ROW_NUMBER() OVER(PARTITION BY CONVERT(DATE, T.CreatedDatetime), DATEPART(HOUR, T.CreatedDatetime) ORDER BY T.Price DESC) AS RN
FROM
#Table T
)
SELECT
T1.Date ,
T1.Hour ,
T1.SaleAmount,
T1.BreakDownOfSales
FROM
T1
WHERE T1. RN = 1
ORDER BY
T1.Hour

Check this simple solution, Please convert it to SQL Server Query.
This will give you perfect result even if you have multiple date data.
SELECT HOUR(CreatedDateTime), SUM(Price),
(SELECT itemname FROM t it WHERE HOUR(ot.CreatedDateTime) = HOUR(it.CreatedDateTime) AND
DATE(ot.CreatedDateTime) = DATE(it.CreatedDateTime)
GROUP BY itemname
ORDER BY price DESC
LIMIT 1
) g
FROM t ot
GROUP BY HOUR(CreatedDateTime);

SQL Rollup last 4 weeks total

I have a table which I want to get the previous four weeks Order total in a query. But I want to return it with a SELECT (A total of the row's previous 4 weeks Order1 column - if they exist)
PurchasingID Order1 Date FourWeekTotal
------------ ------------------- ------- ---------------
1 1.00 2013-04-21 14.00
2 2.00 2013-04-14 12.00
3 3.00 2013-04-07 9.00
4 4.00 2013-03-31 5.00
5 5.00 2013-03-24 0.00

My understanding is for each record in your table, you want to see the sum of Order1 for itself and each record that has a Date value within four weeks prior to the primary record. Here you go:
create table MysteryTable
(
PurchasingId int not null primary key identity(1,1),
Order1 money not null,
[Date] date not null
)
insert MysteryTable( Order1, [Date] ) values ( 1.00, '2013-04-21' )
insert MysteryTable( Order1, [Date] ) values ( 2.00, '2013-04-14' )
insert MysteryTable( Order1, [Date] ) values ( 3.00, '2013-04-07' )
insert MysteryTable( Order1, [Date] ) values ( 4.00, '2013-03-31' )
insert MysteryTable( Order1, [Date] ) values ( 5.00, '2013-03-24' )
select
t1.PurchasingId
, t1.Order1
, t1.Date
, SUM( ISNULL( t2.Order1, 0 ) ) FourWeekTotal
from
MysteryTable t1
left outer join MysteryTable t2
on DATEADD( ww, -4, t1.Date ) <= t2.Date and t1.Date > t2.Date
group by
t1.PurchasingId
, t1.Order1
, t1.Date
order by
t1.Date desc
Explanation:
Join the table on itself, t1 representing the records to return, t2 to be the records to aggregate. Join based on t1's Date minus four weeks being less than or equal to t2's Date and t1's Date being greater than t2's Date. Then group the records by the t1 fields and sum t2.Order1. Left outer join is to account for the one record that will not have any preceding data.

Try this...
Declare #LBDate date
SET #LBDate = DATEADD(d,-28,getdate())
Now write ur select query...
Select * from Orders where Date between #LBDate and Getdate()
You can also use your required date instead to current date..

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

TSQL - Exact match on subset of records - sql

Try this in order to get transid Select TransId From #t t1 join #t t2 on t1.transtype = 'Credit' and t2.transtype = 'Invoice' and t1.product=t2.product and t1.qty = t2.qty and t1.orderef=t2.orderref and t1.date = t2.date

Related

get most frequent values in every month in 2021

Numbers inputted each month but then don't appear after that (across month) SQL Server

SQL find average time difference between rows for a given category

Any one help me to solve this i try my best but did not solve this?

SQL Rollup last 4 weeks total

Categories

Resources