Randomly join tables and return columns

Randomly join tables and return columns - sql

I have these 3 tables:
CREATE TABLE DimEmployee(EmployeeID INT)
CREATE TABLE DimDepartment(DepartmentID INT)
CREATE TABLE DimDocteur(PositionID INT)
INSERT INTO DimEmployee(EmployeeID) VALUES (1),(2),(3)
INSERT INTO DimDepartment(DepartmentID) VALUES (1),(5),(6)
INSERT INTO DimPosition(PositionID) VALUES (7),(8),(9)
I want to randomly join the 3 tables and get output like below : (example)
First execute:
EmployeeID DepartmentID PositionID RandomDate
1 4 7 2020-07-24 00:00:00.000
2 5 9 2020-11-25 00:00:00.000
Second execute:
EmployeeID DepartmentID PositionID RandomDate
1 4 7 2020-05-04 00:00:00.000
2 5 9 2020-10-30 00:00:00.000

If you want a random join :
SELECT DP.EmployeeID, Q.Department INTO #T1
FROM DimEmployee AS DP
CROSS APPLY (SELECT TOP 1 Dd.DepartmentID FROM DimDepartment AS DD
ORDER BY NEWID() ) AS Q
SELECT *
INTO #T2
FROM #T1 AS T
CROSS APPLY (SELECT TOP 1 DP.PositionID FROM DimPosition AS DP
ORDER BY NEWID() ) AS Q
Or if you want all possibilities :
SELECT
a.EmployeeID, b.DepartmentID, c.PositionID
FROM
DimEmployee AS a
CROSS JOIN
DimDepartment AS b
CROSS JOIN
DimPosition AS c

You need to row-number each table and join on row-number:
CREATE TABLE DimEmployee(EmployeeID INT)
CREATE TABLE DimDepartment(DepartmentID INT)
CREATE TABLE DimDocteur(PositionID INT)
SELECT
emp.EmployeeID,
dep.DepartmentID,
doc.PositionID,
DATEADD(day, (ABS(CHECKSUM(NEWID())) % 65530), 0) RandomDate
FROM (
SELECT *, ROW_NUMBER() OVER(ORDER BY (SELECT 1)) rn
FROM DimEmployee
) emp
JOIN (
SELECT *, ROW_NUMBER() OVER(ORDER BY (SELECT 1)) rn
FROM DimDepartment
) dep ON dep.rn = emp.rn
JOIN (
SELECT *, ROW_NUMBER() OVER(ORDER BY (SELECT 1)) rn
FROM DimDocteur
) doc ON doc.rn = emp.rn
You can also change the ORDER BY to ORDER BY NEWID() to get a more random ordering.

Related

Am I looking for a cross join?

If I have two tables:
id_table entry_table
-------- -----------
[Id] [entries]
1 a
2 b
3 c
4 d
5
Is there a way to select the results into another table where I get:
select_results
--------------
[Id] [entries]
1 a
2 b
3 c
I'm trying e.g.
Select top 3 * from id_table, entry_table
and vice versa, but that gives:
1 a
1 b
1 c
These two tables SHOULD have the same amount of entries but I'm using "top 3" at the minute to see if it's possible.
Maybe I need a cross join with a where clause?

you can use row_number() and then use join
select a.*,b.* from
(select *,row_number() over(order by Id) rn1
from id_table
) a
join
(select *,row_number() over(order by entries) rn1
from entry_table
) b on a.rn=b.rn1

Alternatively you can use row_number() window analytic function with top keyword as
select top 3
[Id], [entries]
from id_table i
join ( select row_number() over (order by [entries]) as row_id,
[entries]
from entry_table ) e
on i.[Id]=e.row_id;
Demo

Create episode for each value with new Begin and End Dates

This is in reference to below Question
Loop through each value to the seq num
But now Client want to see the data differently and started a new thread for this question.
below is the requirement.
This is the data .
ID seqNum DOS Service End Date
1 1 1/1/2017 1/15/2017
1 2 1/16/2017 1/16/2017
1 3 1/17/2017 1/21/2017
1 4 1/22/2017 2/13/2017
1 5 2/14/2017 3/21/2017
1 6 2/16/2017 3/21/2017
Expected outPut:
ID SeqNum DOSBeg DOSEnd
1 1 1/1/2017 1/30/2017
1 2 1/31/2017 3/1/2017
1 3 3/2/2017 3/31/2017
For each DOSBeg, add 29 and that is DOSEnd. then Add 1 to DOSEnd (1/31/2017) is new DOSBeg.
Now add 29 to (1/31/2017) and that is 3/1/2017 which is DOSEnd . Repeat this untill DOSend >=Max End Date i.e 3/21/2017.
Basically, we need episode of 29 days for each ID.
I tried with this code and it is giving me duplicates.
with cte as (
select ID, minDate as DOSBeg,dateadd(day,29,mindate) as DOSEnd
from #temp
union all
select ID,dateadd(day,1,DOSEnd) as DOSBeg,dateadd(day,29,dateadd(day,1,DOSEnd)) as DOSEnd
from cte
)
select ID,DOSBeg,DOSEnd
from cte
OPTION (MAXRECURSION 0)
Here mindate is Minimum DOS for this ID i.e. 1/1/2017
I came up with below logic and this is working fine for me. Is there any better way than this ?
declare #table table (id int, seqNum int identity(1,1), DOS date, ServiceEndDate date)
insert into #table
values
(1,'20170101','20170115'),
(1,'20170116','20170116'),
(1,'20170117','20170121'),
(1,'20170122','20170213'),
(1,'20170214','20170321'),
(1,'20170216','20170321'),
(2,'20170101','20170103'),
(2,'20170104','20170118')
select * into #temp from #table
--drop table #data
select distinct ID, cast(min(DOS) over (partition by ID) as date) as minDate
,row_Number() over (partition by ID order by ID, DOS) as SeqNum,
DOS,
max(ServiceEndDate) over (partition by ID)as maxDate
into #data
from #temp
--drop table #StartDateLogic
with cte as
(select ID,mindate as startdate,maxdate
from #data
union all
select ID,dateadd(day,30,startdate) as startdate,maxdate
from cte
where maxdate >= dateadd(day,30,startdate))
select distinct ID,startdate
into #StartDateLogic
from cte
OPTION (MAXRECURSION 0)
--final Result set
select ID
,ROW_NUMBER() over (Partition by ID order by ID,StartDate) as SeqNum
,StartDate
,dateadd(day,29,startdate) as EndDate
from #StartDateLogic

You were on the right track wit the recursive cte, but you forgot the anchor.
declare #table table (id int, seqNum int identity(1,1), DOS date, ServiceEndDate date)
insert into #table
values
(1,'20170101','20170115'),
(1,'20170116','20170116'),
(1,'20170117','20170121'),
(1,'20170122','20170213'),
(1,'20170214','20170321'),
(1,'20170216','20170321'),
(2,'20170101','20170103'),
(2,'20170104','20170118')
;with dates as(
select top 1 with ties id, seqnum, DOSBeg = DOS, DOSEnd = dateadd(day,29,DOS)
from #table
order by row_number() over (partition by id order by seqnum)
union all
select t.id, t.seqNum, DOSBeg = dateadd(day,1,d.DOSEnd), DOSEnd = dateadd(day,29,dateadd(day,1,d.DOSEnd))
from dates d
inner join #table t on
d.id = t.id and t.seqNum = d.seqNum + 1
)
select *
from dates d
where d.DOSEnd <= (select max(dateadd(month,1,ServiceEndDate)) from #table where id = d.id)
order by id, seqNum

Query to return first date of missing date ranges

Looking for help with a query using SQL 2008 R2... I have a table with client and date fields. Most clients have a record for most dates, however some don't.
For example I have this data:
CLIENTID DT
1 5/1/14
1 5/2/14
2 5/3/14
3 5/1/14
3 5/2/14
I can find the missing dates for each CLIENTID by creating a temp table with all possible dates for the period and then joining that to each CLIENTID and DT and only selecting where there is a NULL.
This is what I can get easily for the date range 5/1/14 to 5/4/14:
CLIENTID DTMISSED
1 5/3/14
1 5/4/14
2 5/1/14
2 5/2/14
2 5/4/14
3 5/3/14
3 5/4/14
However I want to group each consecutive missed period together and get the beginning of each period and the length.
For example, if I use the date range 5/1/14 to 5/4/14 I'd like to get:
CLIENTID DTSTART MISSED
1 5/3/14 2
2 5/1/14 2
2 5/4/14 1
3 5/3/14 2
Thanks for helping!

It's fascinating how more elegantly and also mere efficiently this kind of problems can be solved in 2012.
First, the tables:
create table #t (CLIENTID int, DT date)
go
insert #t values
(1, '5/1/14'),
(1, '5/2/14'),
(2, '5/3/14'),
(3, '5/1/14'),
(3, '5/2/14')
go
create table #calendar (dt date)
go
insert #calendar values ('5/1/14'),('5/2/14'),('5/3/14'),('5/4/14')
go
Here's the 2008 solution:
;with x as (
select *, row_number() over(order by clientid, dt) as rn
from #calendar c
cross join (select distinct clientid from #t) x
where not exists (select * from #t where c.dt=#t.dt and x.clientid=#t.clientid)
),
y as (
select x1.*, x2.dt as x2_dt, x2.clientid as x2_clientid
from x x1
left join x x2 on x1.clientid=x2.clientid and x1.dt=dateadd(day,1,x2.dt)
),
z as (
select *, (select sum(case when x2_dt is null then 1 else 0 end) from y y2 where y2.rn<=y.rn) as grp
from y
)
select clientid, min(dt), count(*)
from z
group by clientid, grp
order by clientid
Compare it to 2012:
;with x as (
select *, row_number() over(order by dt) as rn
from #calendar c
cross join (select distinct clientid from #t) x
where not exists (select * from #t where c.dt=#t.dt and x.clientid=#t.clientid)
),
y as (
select x1.*, sum(case when x2.dt is null then 1 else 0 end) over(order by x1.clientid,x1.dt) as grp
from x x1
left join x x2 on x1.clientid=x2.clientid and x1.dt=dateadd(day,1,x2.dt)
)
select clientid, min(dt), count(*)
from y
group by clientid, grp
order by clientid

T-SQL using SUM for a running total

I have a simple table with some dummy data setup like:
|id|user|value|
---------------
1 John 2
2 Ted 1
3 John 4
4 Ted 2
I can select a running total by executing the following sql(MSSQL 2008) statement:
SELECT a.id, a.user, a.value, SUM(b.value) AS total
FROM table a INNER JOIN table b
ON a.id >= b.id
AND a.user = b.user
GROUP BY a.id, a.user, a.value
ORDER BY a.id
This will give me results like:
|id|user|value|total|
---------------------
1 John 2 2
3 John 4 6
2 Ted 1 1
4 Ted 2 3
Now is it possible to only retrieve the most recent rows for each user? So the result would be:
|id|user|value|total|
---------------------
3 John 4 6
4 Ted 2 3
Am I going about this the right way? any suggestions or a new path to follow would be great!

No join is needed, you can speed up the query this way:
select id, [user], value, total
from
(
select id, [user], value,
row_number() over (partition by [user] order by id desc) rn,
sum(value) over (partition by [user]) total
from users
) a
where rn = 1

try this:
;with cte as
(SELECT a.id, a.[user], a.value, SUM(b.value) AS total
FROM users a INNER JOIN users b
ON a.id >= b.id
AND a.[user] = b.[user]
GROUP BY a.id, a.[user], a.value
),
cte1 as (select *,ROW_NUMBER() over (partition by [user]
order by total desc) as row_num
from cte)
select id,[user],value,total from cte1 where row_num=1
SQL Fiddle Demo

add where statement:
select * from
(
your select statement
) t
where t.id in (select max(id) from table group by user)
also you can use this query:
SELECT a.id, a.user, a.value,
(select max(b.value) from table b where b.user=a.user) AS total
FROM table a
where a.id in (select max(id) from table group by user)
ORDER BY a.id

Adding a right join would perform better than nested select.
Or even simpler:
SELECT MAX(id), [user], MAX(value), SUM(value)
FROM table
GROUP BY [user]

Compatible with SQL Server 2008 or later
DECLARE #AnotherTbl TABLE
(
id INT
, somedate DATE
, somevalue DECIMAL(18, 4)
, runningtotal DECIMAL(18, 4)
)
INSERT INTO #AnotherTbl
(
id
, somedate
, somevalue
, runningtotal
)
SELECT LEDGER_ID
, LL.LEDGER_DocDate
, LL.LEDGER_Amount
, NULL
FROM ACC_Ledger LL
ORDER BY LL.LEDGER_DocDate
DECLARE #RunningTotal DECIMAL(18, 4)
SET #RunningTotal = 0
UPDATE #AnotherTbl
SET #RunningTotal=runningtotal = #RunningTotal + somevalue
FROM #AnotherTbl
SELECT *
FROM #AnotherTbl

How to limit the selection in SQL Server by sum of a column?

Can I limit rows by sum of a column in a SQL Server database?
For example:
Type | Time (in minutes)
-------------------------
A | 50
B | 10
C | 30
D | 20
E | 70
...
And I want to limit the selection by sum of time. For example maximum of 100 minutes. Table must look like this:
Type | Time (in minutes)
-------------------------
A | 50
B | 10
C | 30
Any ideas? Thanks.

DECLARE #T TABLE
(
[Type] CHAR(1) PRIMARY KEY,
[Time] INT
)
INSERT INTO #T
SELECT 'A',50 UNION ALL
SELECT 'B',10 UNION ALL
SELECT 'C',30 UNION ALL
SELECT 'D',20 UNION ALL
SELECT 'E',70;
WITH RecursiveCTE
AS (
SELECT TOP 1 [Type], [Time], CAST([Time] AS BIGINT) AS Total
FROM #T
ORDER BY [Type]
UNION ALL
SELECT R.[Type], R.[Time], R.Total
FROM (
SELECT T.*,
T.[Time] + Total AS Total,
rn = ROW_NUMBER() OVER (ORDER BY T.[Type])
FROM #T T
JOIN RecursiveCTE R
ON R.[Type] < T.[Type]
) R
WHERE R.rn = 1 AND Total <= 100
)
SELECT [Type], [Time], Total
FROM RecursiveCTE
OPTION (MAXRECURSION 0);
Or if your table is small
SELECT t1.[Type],
t1.[Time],
SUM(t2.[Time])
FROM #T t1
JOIN #T t2
ON t2.[Type] <= t1.[Type]
GROUP BY t1.[Type],t1.[Time]
HAVING SUM(t2.[Time]) <=100

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

Randomly join tables and return columns - sql

Related

Am I looking for a cross join?

Create episode for each value with new Begin and End Dates

Query to return first date of missing date ranges

T-SQL using SUM for a running total

How to limit the selection in SQL Server by sum of a column?

Categories

Resources