Parent count based on pairing of multiple children - sql

In the below example, I'm trying to count the number of drinks I can make based on the availability of ingredients per bar location that I have.
To further clarify, as seen in the below example: based on the figures highlighted in the chart below; I know that I can only make 1 Margarita on 6/30/2018 (in either DC or FL if I ship the supplies to the location).
Sample of data table
Please use the below code to enter the relevant data above:
CREATE TABLE #drinks
(
a_date DATE,
loc NVARCHAR(2),
parent NVARCHAR(20),
line_num INT,
child NVARCHAR(20),
avail_amt INT
);
INSERT INTO #drinks VALUES ('6/26/2018','CA','Long Island','1','Vodka','7');
INSERT INTO #drinks VALUES ('6/27/2018','CA','Long Island','2','Gin','5');
INSERT INTO #drinks VALUES ('6/28/2018','CA','Long Island','3','Rum','26');
INSERT INTO #drinks VALUES ('6/26/2018','DC','Long Island','1','Vodka','15');
INSERT INTO #drinks VALUES ('6/27/2018','DC','Long Island','2','Gin','18');
INSERT INTO #drinks VALUES ('6/28/2018','DC','Long Island','3','Rum','5');
INSERT INTO #drinks VALUES ('6/26/2018','FL','Long Island','1','Vodka','34');
INSERT INTO #drinks VALUES ('6/27/2018','FL','Long Island','2','Gin','14');
INSERT INTO #drinks VALUES ('6/28/2018','FL','Long Island','3','Rum','4');
INSERT INTO #drinks VALUES ('6/30/2018','DC','Margarita','1','Tequila','6');
INSERT INTO #drinks VALUES ('7/1/2018','DC','Margarita','2','Triple Sec','3');
INSERT INTO #drinks VALUES ('6/29/2018','FL','Margarita','1','Tequila','1');
INSERT INTO #drinks VALUES ('6/30/2018','FL','Margarita','2','Triple Sec','0');
INSERT INTO #drinks VALUES ('7/2/2018','CA','Cuba Libre','1','Rum','1');
INSERT INTO #drinks VALUES ('7/8/2018','CA','Cuba Libre','2','Coke','5');
INSERT INTO #drinks VALUES ('7/13/2018','CA','Cuba Libre','3','Lime','14');
INSERT INTO #drinks VALUES ('7/5/2018','DC','Cuba Libre','1','Rum','0');
INSERT INTO #drinks VALUES ('7/19/2018','DC','Cuba Libre','2','Coke','12');
INSERT INTO #drinks VALUES ('7/31/2018','DC','Cuba Libre','3','Lime','9');
INSERT INTO #drinks VALUES ('7/2/2018','FL','Cuba Libre','1','Rum','1');
INSERT INTO #drinks VALUES ('7/19/2018','FL','Cuba Libre','2','Coke','3');
INSERT INTO #drinks VALUES ('7/17/2018','FL','Cuba Libre','3','Lime','2');
INSERT INTO #drinks VALUES ('6/30/2018','DC','Long Island','3','Rum','4');
INSERT INTO #drinks VALUES ('7/7/2018','FL','Cosmopolitan','5','Triple Sec','7');
The expected results are as follows:
Please note, as seen in the expected results, children are interchangeable. For example, on 7/7/2018 Triple Sec arrived for the drink cosmopolitan; however because the child is also rum, it changes the availability of Margaritas for FL.
Also not the update to the DC region for Cuba Libre's on both 06/30 and 06/31.
Please take into consideration that parts are interchangeable and also that each time a new item arrives it makes available any item previously now.
Lastly - It would be awesome if I could add another column that shows kit availability regardless of location based only on availability of the child. For Ex. If there is a child #3 in DC and none in FL they FL can assume that they have enough inventory to make drink based on inventory in another location!

I've created a couple of extra tables to help with writing the query, but these could be generated from the #drinks table if you wanted:
CREATE TABLE #recipes
(
parent NVARCHAR(20),
child NVARCHAR(20)
);
INSERT INTO #recipes VALUES ('Long Island', 'Vodka');
INSERT INTO #recipes VALUES ('Long Island', 'Gin');
INSERT INTO #recipes VALUES ('Long Island', 'Rum');
INSERT INTO #recipes VALUES ('Maragrita', 'Tequila');
INSERT INTO #recipes VALUES ('Maragrita', 'Triple Sec');
INSERT INTO #recipes VALUES ('Cuba Libre', 'Coke');
INSERT INTO #recipes VALUES ('Cuba Libre', 'Rum');
INSERT INTO #recipes VALUES ('Cuba Libre', 'Lime');
INSERT INTO #recipes VALUES ('Cosmopolitan', 'Cranberry Juice');
INSERT INTO #recipes VALUES ('Cosmopolitan', 'Triple Sec');
CREATE TABLE #locations
(
loc NVARCHAR(20)
);
INSERT INTO #locations VALUES ('CA');
INSERT INTO #locations VALUES ('FL');
INSERT INTO #locations VALUES ('DC');
The query then becomes:
DECLARE #StartDateTime DATETIME
DECLARE #EndDateTime DATETIME
SET #StartDateTime = '2018-06-26'
SET #EndDateTime = '2018-07-31';
--First, build a range of dates that the report has to run for
WITH DateRange(a_date) AS
(
SELECT #StartDateTime AS DATE
UNION ALL
SELECT DATEADD(d, 1, a_date)
FROM DateRange
WHERE a_date < #EndDateTime
)
SELECT a_date, parent, loc, avail_amt
FROM (--available_recipes_inventory
SELECT a_date, parent, loc, avail_amt,
LAG(avail_amt, 1, 0) OVER (PARTITION BY loc, parent ORDER BY a_date) AS previous_avail_amt
FROM (--recipes_inventory
SELECT a_date, parent, loc,
--The least amount of the ingredients for a recipe is the most
--amount of drinks we can make for it
MIN(avail_amt) as avail_amt
FROM (--ingredients_inventory
SELECT dr.a_date, r.parent, r.child, l.loc,
--Default ingredients we don't have with a zero amount
ISNULL(d.avail_amt, 0) as avail_amt
FROM DateRange dr CROSS JOIN
#recipes r CROSS JOIN
#locations l OUTER APPLY
(
--Find the total amount available for each
--ingredient at each location for each date
SELECT SUM(d1.avail_amt) as avail_amt
FROM #drinks d1
WHERE d1.a_date <= dr.a_date
AND d1.loc = l.loc
AND d1.child = r.child
) d
) AS ingredients_inventory
GROUP BY a_date, parent, loc
) AS recipes_inventory
--Remove all recipes that we don't have enough ingredients for
WHERE avail_amt > 0
) AS available_recipes_inventory
--Selects the first time a recipe has enough ingredients to be made
WHERE previous_avail_amt = 0
--Selects when the amount of ingredients has changed
OR previous_avail_amt != avail_amt
ORDER BY a_date
--MAXRECURSION needed to generate the date range
OPTION (MAXRECURSION 0)
GO
The innermost SELECT creates a pseudo inventory table (ingredients_inventory) consisting of location, ingredient, date and amount available. When an ingredient is not available at a location for a particular date, then a zero is used.
The next SELECT query out finds how many of each recipe can be made for each location/date (again this may be zero).
The next SELECT query out is an intermediate table necessary to gather how many of each recipe for each location could be made for the previous day (whilst also removing any drinks that could not be made).
And finally, the outermost SELECT query uses the previous day's data to find when the quantity of each particular recipe that can be made has changed.
This query produces slightly different numbers to your table, but I think that's because yours is wrong? Taking Florida for example, an extra Rum comes in on 2nd July, so the number of Long Islands that can be made goes up to 5. And 2 Cuba Libres can be made by the 19th.
Results:
+------------+-------------+-----+-----------+
| a_date | parent | loc | avail_amt |
+------------+-------------+-----+-----------+
| 2018-06-28 | Long Island | DC | 5 |
| 2018-06-28 | Long Island | CA | 5 |
| 2018-06-28 | Long Island | FL | 4 |
| 2018-06-30 | Long Island | DC | 9 |
| 2018-07-01 | Maragrita | DC | 3 |
| 2018-07-02 | Long Island | FL | 5 |
| 2018-07-07 | Maragrita | FL | 1 |
| 2018-07-13 | Cuba Libre | CA | 5 |
| 2018-07-19 | Cuba Libre | FL | 2 |
| 2018-07-31 | Cuba Libre | DC | 9 |
+------------+-------------+-----+-----------+

I think this would give the required result.
Created a function that'll get the inventory.
Create function GetInventoryByDateAndLocation
(#date DATE, #Loc NVARCHAR(2))
RETURNS TABLE
AS
RETURN
(
Select child,avail_amt from
(Select a_date, child,avail_amt,
ROW_NUMBER() over (partition by child order by a_date desc) as ranking
from drinks where loc = #Loc and a_date<=#date)c
where ranking = 1
)
Then the query:
with parentChild as
(Select distinct parent, line_num, child from drinks),
ParentChildNo as
(Select parent, max(line_num) as ChildNo from parentChild group by parent)
,Inventory as
(Select a_date,loc,s.* from drinks d cross apply
GetInventoryByDateAndLocation(d.a_date, d.loc)s)
, Available as
(Select a_date,parent,loc,count(*) as childAvailable,min(avail_amt) as quantity
from Inventory i
join parentChild c
on i.child = c.child
group by parent,loc,a_date)
Select a_date,a.parent,loc,quantity from available a
join ParentChildNo pc
on a.parent = pc.parent and a.childAvailable = pc.ChildNo
where quantity > 0 order by 1
This would give all the drinks which can be made from the inventory. Hope it solves your issue.
These are just my 2 cents. There are better ways of doing this and I hope more people would read this and suggest better.

don't think this is exactly what your looking for... maybe it will help.
SELECT DISTINCT #drinks.loc,#drinks.parent,avail.Avail
FROM #drinks
LEFT OUTER JOIN (
SELECT DISTINCT #drinks.parent, MIN(availnow.maxavailnow / line_num)
OVER(PARTITION BY parent) as Avail
FROM #drinks
LEFT OUTER JOIN (
SELECT #drinks.child,SUM(avail_amt) maxavailnow
FROM #drinks
LEFT OUTER JOIN (SELECT MAX(a_date) date,loc,child FROM #drinks GROUP BY loc,child) maxx ON #drinks.loc = maxx.loc AND #drinks.child = maxx.child AND maxx.date = #drinks.a_date
GROUP BY #drinks.child
) availnow ON #drinks.child = availnow.child
) avail ON avail.parent = #drinks.parent

SELECT ( SELECT MAX(d2.a_date)
FROM #drinks AS d2
WHERE d2.parent = d.parent
AND d2.loc = d.loc) AS a_date
,d.loc
,d.parent
,SUM(d.avail_amt) AS [avail_amt(SUM)]
,COUNT(d.avail_amt) AS [avail_amt(COUNT)]
FROM #drinks AS d
GROUP BY d.loc
,d.parent
ORDER BY a_date

Related

SQL recursively creating matching groups based on reference table

Imagine you had a data source like:
Id
Val
Data_Date
1
A
2022-01-01
2
B
2022-01-05
3
C
2022-01-09
4
D
2022-01-31
5
E
2022-02-01
With a reference table matching values in this way:
Target_Val
Matching_Val
Valid_Start
Valid_End
B
A
2022-01-04
2022-01-06
C
B
2022-01-09
2022-01-09
D
A
2022-01-31
2022-01-31
Imagine you want to create a table grouping values together where there is a match in the reference table within X days, say 4.
And you want to apply this matching recursively.
Output would be something like this:
Group_Id
Id
1
1
1
2
1
3
2
4
3
5
The logic here would be that C matches to B in the appropriate date range, and B matches to A in the appropriate date range, therefore they are all one group.
But although D matches to A, it is too far apart (greater than 4 days). And E doesn't match to anything.
There could be any depth (A > B > C > D ...)
Is there an appropriate algorithm in SQL to accomplish this? The values of the group IDs are unimportant and just meant to group data points together.
Here's my attempt. You do indeed need a recursive CTE, but you need to join the source table to groups table and then join back to the source table to ensure that the child fits within the parent's 4 day window. E.g. in the case of D and A, as you mention, they match, but they aren't close enough to be counted.
Then I added a calc to work out which rows were valid hierarchies and used that for the recursive join, because we can exclude anything not part of a hierachy.
After that we need to order the records by their depth so we know which parent record is first, e.g. in the case of A > B > C.
Then DENSE_RANK over the results to get your final groups. This will need some testing with deeper levels of recursion though, but this should point you in the right direction:
CREATE TABLE SourceData
(
Id INTEGER,
Val CHAR(1),
Data_Date DATE
);
CREATE TABLE Groups
(
Target_Val CHAR(1),
Matching_Val CHAR(1),
Valid_Start DATE,
Valid_End DATE
);
INSERT INTO SourceData (Id, Val, Data_Date) VALUES (1,'A','2022-01-01');
INSERT INTO SourceData (Id, Val, Data_Date) VALUES (2,'B','2022-01-05');
INSERT INTO SourceData (Id, Val, Data_Date) VALUES (3,'C','2022-01-09');
INSERT INTO SourceData (Id, Val, Data_Date) VALUES (4,'D','2022-01-31');
INSERT INTO SourceData (Id, Val, Data_Date) VALUES (5,'E','2022-02-01');
INSERT INTO Groups (Target_Val, Matching_Val, Valid_Start, Valid_End ) VALUES ('B','A','2022-01-04','2022-01-06');
INSERT INTO Groups (Target_Val, Matching_Val, Valid_Start, Valid_End ) VALUES ('C','B','2022-01-09','2022-01-09');
INSERT INTO Groups (Target_Val, Matching_Val, Valid_Start, Valid_End ) VALUES ('D','A','2022-01-31','2022-01-31');
WITH sourceCTE AS
(
SELECT sd.Id, sd.Val, sd.Data_Date, g.Valid_Start, g.Valid_End, IIF(s.Val IS NULL, sd.Val, g.Matching_Val) [ParentVal], CAST(NULL AS DATE) [start], CAST(NULL AS DATE) [end], 1 [Depth],
IIF(s.Val IS NULL, 0, 1) IsHeirarchy
FROM SourceData sd
LEFT JOIN Groups g ON g.Target_Val = sd.Val AND sd.Data_Date BETWEEN g.Valid_Start AND g.Valid_End
LEFT JOIN SourceData s ON s.Val = g.Matching_Val AND ABS(DATEDIFF(DAY, s.Data_Date, sd.Data_Date)) < 5
UNION ALL
SELECT s.Id, s.Val, s.Data_Date, g.Valid_Start, g.Valid_End, g.Matching_Val, g.Valid_Start, g.Valid_End, s.[Depth] + 1, 1
FROM sourceCTE s
INNER JOIN Groups g ON g.Target_Val = s.[ParentVal] AND s.IsHeirarchy = 1
),
ResultCTE AS
(
SELECT *, ROW_NUMBER() OVER (PARTITION BY Id ORDER BY [Depth] DESC) [RNum]
FROM sourceCTE
)
SELECT DENSE_RANK() OVER (ORDER BY ParentVal) [Group_Id], Id
FROM ResultCTE
WHERE [RNum] = 1
Here's a working fiddle.
I can't promise this is the best solution, because just like the query optimiser I gave up after about 2 hours, ha.
Also, for any future questions, please provide sample data in script format to save time creating the structure.

Exclude rows where dates exist in another table

I have 2 tables, one is working pattern, another is absences.
1) Work pattern
ID | Shift Start | Shift End
123| 01-03-2017 | 02-03-2017
2) Absences
ID| Absence Start | Absence End
123| 01-03-2017 | 04-03-2017
What would be the best way, when selecting rows from work pattern, to exclude any that have a date marked as an absence in the absence table?
For example, I have a report that uses the work pattern table to count how may days a week an employee has worked, however I don't want it to include the days that have been marked as an absence on the absence table if that makes sense? Also don't want it to include any days that fall between the absence start and absence end date?
If the span of the absence should always encompass the shift to be excluded you can use not exists():
select *
from WorkPatterns w
where not exists (
select 1
from Absences a
where a.Id = w.Id
and a.AbsenceStart <= w.ShiftStart
and a.AbsenceEnd >= w.ShiftEnd
)
rextester demo: http://rextester.com/DCODC76816
returns:
+-----+------------+------------+
| id | ShiftStart | ShiftEnd |
+-----+------------+------------+
| 123 | 2017-02-27 | 2017-02-28 |
| 123 | 2017-03-05 | 2017-03-06 |
+-----+------------+------------+
given this test setup:
create table WorkPatterns ([id] int, [ShiftStart] datetime, [ShiftEnd] datetime) ;
insert into WorkPatterns ([id], [ShiftStart], [ShiftEnd]) values
(123, '20170227', '20170228')
,(123, '20170301', '20170302')
,(123, '20170303', '20170304')
,(123, '20170305', '20170306')
;
create table Absences ([id] int, [AbsenceStart] datetime, [AbsenceEnd] datetime) ;
insert into Absences ([id], [AbsenceStart], [AbsenceEnd]) values
(123, '20170301', '20170304');
What would be the best way, when selecting rows from work pattern
If you dealing only whit dates (no time) and have control over db schema,
One approach will be to create calendar table ,
Where you going to put all dates since company started and some years in future
Fill that table once.
After it is easy to join other tables whit dates and do math.
If you have trouble whit constructing TSQL query please edit question whit more details about columns and values of tables, relations and needed results.
How about this:
SELECT WP_START.[id], WP_START.[shift_start], WP_START.[shift_end]
FROM work_pattern AS WP_START
INNER JOIN absences AS A ON WP_START.id = A.id
WHERE WP_START.[shift_start] NOT BETWEEN A.[absence_start] AND A.[absence_end]
UNION
SELECT WP_END.[id], WP_END.[shift_start], WP_END.[shift_end]
FROM work_pattern AS WP_END
INNER JOIN absences AS A ON WP_END.id = A.id
WHERE WP_END.[shift_end] NOT BETWEEN A.[absence_start] AND A.[absence_end]
See it on SQL Fiddle: http://sqlfiddle.com/#!6/49ae6/6
Here is my example that includes a Date Dimension table. If your DBAs won't add it, you can create #dateDim as a temp table, like I've done with SQLFiddle (didn't know I could do that). A typical date dimension would have a lot more details you need about the days, but if the table can't be added, just use what you need. You'll have to populate the other Holidays you need. The DateDim I use often is at https://github.com/shawnoden/SQL_Stuff/blob/master/sql_CreateDateDimension.sql
SQL Fiddle
MS SQL Server 2014 Schema Setup:
/* Tables for your test data. */
CREATE TABLE WorkPatterns ( id int, ShiftStart date, ShiftEnd date ) ;
INSERT INTO WorkPatterns ( id, ShiftStart, ShiftEnd )
VALUES
(123, '20170101', '20171031')
, (124, '20170601', '20170831')
;
CREATE TABLE Absences ( id int, AbsenceStart date, AbsenceEnd date ) ;
INSERT INTO Absences ( id, AbsenceStart, AbsenceEnd )
VALUES
( 123, '20170123', '20170127' )
, ( 123, '20170710', '20170831' )
, ( 124, '20170801', '20170820' )
;
/* ******** MAKE SIMPLE CALENDAR TABLE ******** */
CREATE TABLE dateDim (
theDate DATE NOT NULL
, IsWeekend BIT DEFAULT 0
, IsHoliday BIT DEFAULT 0
, IsWorkDay BIT DEFAULT 0
);
/* Populate basic details of dates. */
INSERT dateDim(theDate, IsWeekend, IsHoliday)
SELECT d
, CONVERT(BIT, CASE WHEN DATEPART(dw,d) IN (1,7) THEN 1 ELSE 0 END)
, CONVERT(BIT, CASE WHEN d = '20170704' THEN 1 ELSE 0 END) /* 4th of July. */
FROM (
SELECT d = DATEADD(DAY, rn - 1, '20170101')
FROM
(
SELECT TOP (DATEDIFF(DAY, '20170101', '20171231'))
rn = ROW_NUMBER() OVER (ORDER BY s1.[object_id])
FROM sys.all_objects AS s1
CROSS JOIN sys.all_objects AS s2
ORDER BY s1.[object_id]
) AS x
) AS y ;
/* If not a weekend or holiday, it's a WorkDay. */
UPDATE dateDim
SET IsWorkDay = CASE WHEN IsWeekend = 0 AND IsHoliday = 0 THEN 1 ELSE 0 END
;
Query For Calculation:
SELECT wp.ID, COUNT(d.theDate) AS workDayCount
FROM WorkPatterns wp
INNER JOIN dateDim d ON d.theDate BETWEEN wp.ShiftStart AND wp.ShiftEnd
AND d.IsWorkDay = 1
LEFT OUTER JOIN Absences a ON d.theDate BETWEEN a.AbsenceStart AND a.AbsenceEnd
AND wp.ID = a.ID
WHERE a.ID IS NULL
GROUP BY wp.ID
ORDER BY wp.ID
Results:
| ID | workDayCount |
|-----|--------------|
| 123 | 172 | << 216 total days, 44 non-working
| 124 | 51 | << 65 total days, 14 non-working

Query to join left table with latest record on the right table using inner join

Below are the two tables of an oracle database(11g):
Table 1: EXPORT_TABLE
Insert into EXPORT_TABLE (INTBA,INDEXNAME,BANAME,SCBA,NAME,NVRCOUNTRY) values ('133520', 'Hong Kong',659923,'0',' Trustee Limited','HK');
Insert into EXPORT_TABLE (INTBA,INDEXNAME,BANAME,SCBA,NAME,NVRCOUNTRY) values ('133516',' Holdings ',659923,'0',' Banking Holdings','CH');
Table 2 : ba_name
Insert into ba_name (MAINKEY,INTBA,CLIENTNAME,BASEQ,TRAN_DATE,name_type_code,indx_name_e_dte) values (1000,'133520','ROYAL Challenge', 0, '2016-MAR-10' ,'001',NULL);
Insert into ba_name (MAINKEY,INTBA,CLIENTNAME,BASEQ,TRAN_DATE,name_type_code,indx_name_e_dte) values (1001,'133520','Royal Challenge',0, '2016-SEPT-24','001',NULL);
Insert into ba_name (MAINKEY,INTBA,CLIENTNAME,BASEQ,TRAN_DATE,name_type_code,indx_name_e_dte) values (1003, '133516','Deloitte AG','0','2016-MAY-20','001',NULL);
The requirement requires to join EXPORT_TABLE with ba_name on a join condition mentioned below
ba_name.INTBA = EXPORT_TABLE.INTBA
AND ba_name.name_type_code = '0001'
AND ba_name.indx_name_e_dte IS NULL
In the second table 'ba_name' , INTBA is not a unique key and the second table 'ba_name ' can have multiple rows of same INTBA which can occur on different transaction dates. Its a legacy system(cantdo anything now)
Since we have duplicate entries with INTBA, we need to join EXPORT_TABLE (column name=INTBA) with that of ba_name table , which is having the latest TRAN_DATE.
In other words i would like to join the EXPORT_TABLE with the matching record on ba_name which is very recent or having latest TRAN_DATE date. I wnat to join INTBA '133520' OF EXPORT_TABLE with INTBA '133520' of ba_name having tran_date as '2016-SEPT-24' , not the one having MARCH as tran_date ('2016-MAR-10').
THE RESULT SET WANTED TO SEE IS :
(intBA |CLIENTNAME| BASEQ | TRAN_DATE)
======================================
133520 |'Royal Challenge'| 0 | '2016-SEPT-24'
133516 |'Deloitte AG' |'0' | '2016-MAY-20'
However tried with below query :
SELECT EXPORT_TABLE.intBA,
ba_name.CLIENTNAME,
ba_name.BASEQ,
ba_name.TRAN_DATE
FROM EXPORT_TABLE INNER JOIN ba_name
ON ba_name.INTBA = EXPORT_TABLE.INTBA
AND ba_name.name_type_code = '0001'
AND ba_name.indx_name_e_dte IS NULL
I got below result which is incorrect:
(intBA |CLIENTNAME| BASEQ | TRAN_DATE)
======================================
133520 |'Royal Challenge'| 0 | '2016-SEPT-24'
133520 |'RoYAL Challenge'| 0 | '2016-MAR-10'
133516 |'Deloitte AG' | 0 | '2016-MAY-20'
Can you suggest how can I join left side table with the matching row of second(right) table having the latest transaction date.
One common method is row_number():
SELECT e.intBA, n.CLIENTNAME, n.BASEQ, n.TRAN_DATE
FROM EXPORT_TABLE e INNER JOIN
(SELECT n.*,
ROW_NUMBER() OVER (PARTITION BY n.INTBA ORDER BY n.TRAN_DATE DESC) as seqnum
FROM ba_name n
WHERE n.name_type_code = '0001' AND n.indx_name_e_dte IS NULL
) n
ON n.INTBA = e.INTBA AND n.seqnum = 1;
ou might be looking for something like:
the added subselect returns the latest TRAN_DATE for each combination
SELECT EXP.intBA,
bae.CLIENTNAME,
ba.BASEQ,
ba.TRAN_DATE
FROM EXPORT_TABLE EXP
INNER JOIN ba_name ba
ON ba.INTBA = EXP.INTBA
AND ba.name_type_code = '0001'
AND ba.indx_name_e_dte IS NULL
AND ba.TRAN_DATE = (SELECT MAX(TRAN_DATE) FROM ba_name bamax
WHERE bamax.INTBA = ba.INTBA
AND bamax.name_type_code = '0001'
AND bamax.indx_name_e_dte IS NULL
)

Inserting into multiple tables and selecting the first result

Let's say I have two tables that implement a very simple invoice system (note: the schema can't be changed):
create table invoices(
id serial primary key,
parent_invoice_id int null references invoices(id),
name text not null
);
create table line_items(
id serial primary key,
invoice_id int not null references invoices(id),
amount int not null
);
The user has the ability to "clone" an invoice and have it refer to the original "parent" invoice. In the system, the invoice is required directly after the clone (however the line_items are not required). Therefore, after cloning the invoice, the new invoice must be returned. Here's the SQL I'm using to clone an invoice:
with new_invoice_row as (
insert into invoices (parent_invoice_id, name)
values (12345/*invoice_to_clone_id*/, 'Hello World')
returning *
),
new_line_item_rows as (
insert into line_items (invoice_id, amount)
select
new_invoice_row.id, line_item.amount
from line_items
cross join new_invoice_row
where
line_item.invoice_id = 12345/*invoice_to_clone_id*/
returning id
)
select * from new_invoice_row;
Questions:
Is the cross join going to perform well? I was thinking of being able to just remove the cross join to reduce having to do the join, but it wouldn't run (error: missing FROM-clause entry for table "new_invoice_row"):
...
insert into line_items (invoice_id, amount)
select
new_invoice_row.id, line_item.amount
from line_items
where
line_item.invoice_id = 12345
returning id
...
Is there anyway that the returning id part of the new_line_item_rows statement can be removed? The new line items aren't needed, so I'd like to avoid the extra overhead if it can improve performance.
Should I stop using a query and move all of this into a function? The system was originally using a MS SQL database, so I'm more familiar with using declare and having multiple statements use the variable.
The first query can return only id and parent_invoice_id.
Use the second value in order to avoid re-writing the argument (as a protection against typos).
Cross join is necessary and correct.
You can skip returning * in the second query.
A function is not necessary, although it may be convenient to use.
with new_invoice_row as (
insert into invoices (parent_invoice_id, name)
values (12345, 'Hello World')
returning id, parent_invoice_id
),
new_line_item_rows as (
insert into line_items (invoice_id, amount)
select
new_invoice_row.id, line_items.amount
from line_items
cross join new_invoice_row
where
line_items.invoice_id = new_invoice_row.parent_invoice_id
)
select * from new_invoice_row;
create table invoices(
id serial primary key,
parent_invoice_id int null references invoices(id),
name text not null
);
INSERT INTO invoices(parent_invoice_id, name) VALUES
( NULL, 'One')
,( 1, 'two')
,( NULL, 'three')
;
create table line_items(
id serial primary key,
invoice_id int not null references invoices(id),
amount int not null
);
INSERT INTO line_items (invoice_id, amount) VALUES
(1, 10)
,(1, 11)
,(2, 21)
,(2, 22)
,(3, 33)
;
-- for demonstration purposes: the clone+insert as a prepared statement
-- (this is *not* necessary, only convenient)
PREPARE clone_the_invoice (INTEGER, text, INTEGER) AS
WITH new_invoice_row as (
INSERT into invoices (parent_invoice_id, name)
VALUES ( $1 /*invoice_to_clone_id*/, $2 /*name */ )
RETURNING id)
, new_line_item_rows as (
INSERT into line_items (invoice_id, amount)
SELECT new_invoice_row.id, $3 /* amount */
FROM new_invoice_row
RETURNING id
)
SELECT * FROM new_line_item_rows
;
-- call the prepared statement.
-- This will clone invoice#2,
-- and insert one row in items, referring to the cloned row
-- it returns the new item's id, which is sufficient to
-- find the invoice.id too, when needed.
-- -----------------------------------------------------------------
EXECUTE clone_the_invoice (2, 'four', 123);
-- Chek the result
SELECT
iv.id
, iv.parent_invoice_id
, iv.name
, li.id AS lineid
, li.amount
FROM invoices iv
JOIN line_items li ON li.invoice_id = iv.id
;
Result:
CREATE TABLE
INSERT 0 3
CREATE TABLE
INSERT 0 5
PREPARE
id
----
6
(1 row)
id | parent_invoice_id | name | lineid | amount
----+-------------------+-------+--------+--------
1 | | One | 1 | 10
1 | | One | 2 | 11
2 | 1 | two | 3 | 21
2 | 1 | two | 4 | 22
3 | | three | 5 | 33
4 | 2 | four | 6 | 123
(6 rows)
And for non-trivial cases, the FKs will need a supporting index (this is not added automatically, so you should do this manually)
CREATE INDEX ON invoices (parent_invoice_id);
CREATE INDEX ON line_items (invoice_id);
Update: if you insist on returning the new invoice, here you go:
PREPARE clone_the_invoice2 (INTEGER, text, integer) AS
WITH new_invoice_row as (
INSERT into invoices (parent_invoice_id, name)
VALUES ( $1 /*invoice_to_clone_id*/, $2 )
RETURNING *
)
, new_line_item_rows as (
INSERT into line_items (invoice_id, amount)
SELECT new_invoice_row.id, $3
FROM new_invoice_row
RETURNING *
)
SELECT iv.*
FROM new_invoice_row iv
JOIN new_line_item_rows new ON new.invoice_id = iv.id
;
UPDATE 2 (it appears the OP wants the detail lines to be cloned, too:
-- Clone an invoice
-- INCLUDING all associated line_items
-- --------------------------------------
PREPARE clone_the_invoice3 (INTEGER, text) AS
WITH new_invoice_row as (
INSERT into invoices (parent_invoice_id, name)
VALUES ( $1 /*invoice_to_clone_id*/
, $2 /* name */
)
RETURNING *
)
, new_line_item_rows as (
INSERT into line_items (invoice_id, amount)
SELECT cl.id -- the cloned invoice
, it.amount
FROM line_items it
CROSS JOIN new_invoice_row cl
WHERE it.invoice_id = $1 -- The original invoice
RETURNING *
)
SELECT iv.*
FROM new_invoice_row iv
JOIN new_line_item_rows new ON new.invoice_id = iv.id
;
EXECUTE clone_the_invoice3 (2, 'four');

Sql to update login id's dynamically based on count

I have 2 tables. One is main table and other one is login table. I may have 10 Records in Main table and 6 Records in login table. Each login id has to be assingned equally to main table. Can any one please give me the best solution to update the login information.
Example
Create table ##t1
(id int identity,
name varchar(5),
loginid varchar(10)
divno char(3))
create table ##l1
(
id int identity,
name varchar(10),divno char(3))
insert into ##t1 values
('Jin',null,'001')
insert into ##t1 values
('Anu',null,'001')
insert into ##t1 values
('kir',null'002')
insert into ##t1 values
('Asi',null,'003')
insert into ##t1 values
('Nil',null,'002')
insert into ##t1 values
('sup',null,'003')
insert into ##t1 values
('amu',null,'003')
insert into ##t1 values
('mani',null,'003')
insert into ##l1 values
('A','001')
insert into ##l1 values
('B','001')
insert into ##l1 values
('C','002')
insert into ##l1 values
('D','002')
insert into ##l1 values
('E','002')
insert into ##l1 values
('F','003')
Data Example
Main table
id name loginid divno
----------- ----- ----------
1 Jin NULL 001
2 Anu NULL 001
3 kir NULL 002
4 Asi NULL 003
5 Nil NULL 002
6 sup NULL 003
7 amu NULL 003
8 mani NULL 003
Login Table
id name divno
----------- -------------
1 A 001
2 B 001
3 C 002
4 D 002
5 E 002
6 F 003
desired output
How can we do this without looping?
update ##t1
set loginid = #l1.name
from
##t1
inner join
(select *, (ROW_NUMBER() Over (order by id) -1)% (select COUNT(*) from ##l1)+1 as rn from ##t1) v
on ##t1.id = v.id
inner join
##l1
on v.rn = ##l1.id
Let me do this as a select query rather than as an upadte.
select id, name, l.login
from (select mt.*,
(row_number() over (order by id) % l.loginCount) + 1 as loginSeqnum
from MainTable mt cross join
(select count(*) as loginCount from login) l
) mt join
(select l.*, row_number() over (order by id) as seqnum
from login l
) l
on mt.LoginSeqnum = l.seqnum
What this is doing is adding a sequence number to the logins (just in case loginid is not 1..n. It then calculates a similar value for each record in the first table.
One nice thing about this method is you can modify it to get more random orderings, by changing the "order by" clause in the row_number() statements. For instance, using "order by newid()" will randomize the assignment, rather than doing it in a round-robin fashion.