Complex Joining multiple tables in SQL Server for a fact table - sql

Hi I have 4 dimensions and I am trying to insert multiple data from the dimensions into the fact table.
I have a Gunsales table that contains the majority of the data for the fact table and then primary keys from other tables that I would like to join on. homicide_id from the Homicide table, article_id from the BBC table, incident_id from the Gun_violence table and shooting_id from the School_shooting table. The rest of the data if from the Gunsales table.
INSERT INTO [dbo].[FactGunSales]
(
sale_id,
sale_date,
sale_state,
permit,
hand_gun,
long_gun,
other_gun,
multiple_gun,
incident_id,
homicide_id,
article_id,
shooitng_id)
So logically it is a full join of the Gun_sales table and an inner join for the ID keys in the other table but I am struggling to get this to work.
adding in the DDL for all the tables:
USE [Gun Violence]
GO
DROP TABLE IF EXISTS Gun_Violence
CREATE TABLE Gun_Violence(
incident_id int PRIMARY KEY,
incident_date date,
state_name varchar (50),
city_name varchar(50),
death int ,
injury int ,
)
DROP TABLE IF EXISTS Gun_Sales
CREATE TABLE Gun_Sales(
sale_id int PRIMARY KEY,
sale_date date,
sale_state varchar(50),
permit int,
hand_gun int ,
long_gun int ,
other_gun int ,
multiple_gun int ,
)
DROP TABLE IF EXISTS School_Shootings
CREATE TABLE School_Shootings(
shooting_id int PRIMARY KEY,
shooting_date date,
shooting_state varchar(50),
shooting_city varchar(50),
shooting_death int ,
shooting_injury int,
)
DROP TABLE IF EXISTS Homicide
CREATE TABLE Homicide(
Homicide_id int PRIMARY KEY,
homicide_state varchar(50),
homicide_victims int,
homeicide_date date
)
DROP TABLE IF EXISTS BBC
CREATE TABLE BBC(
ariticle_id int PRIMARY KEY,
article_date date,
article_link varchar(1000),
article_headline varchar(1000),
article_count int,
article_keyword varchar(100),
article_month varchar(10),
article_year int,
article_state varchar(50)
)
Output:
As mentioned above I am trying to create a new fact table that has all columns from the Gun_sales table and the primary keys from the other tables.
Thanks in advance

#RitaMurran Is't true? Do you want Something like this?
USE [Gun Violence]
go
INSERT INTO [dbo].[FACTGUNSALES]
([sale_id],
[sale_date],
[sale_state],
[hand_gun],
[other_gun],
[multiple_gun],
[incident_id],
[homicide_id],
[article_id],
[shooitng_id])
SELECT [sale_id],
[sale_date],
[sale_state],
[hand_gun],
[other_gun],
[multiple_gun],
[dbo].[GUN_VIOLENCE].incident_id,
[dbo].[HOMICIDE].homicide_id,
[dbo].[BBC].ariticle_id,
[dbo].[SCHOOL_SHOOTINGS].shooting_id
FROM [dbo].[GUN_SALES]
LEFT JOIN [dbo].[GUN_VIOLENCE]
ON ( [dbo].[GUN_VIOLENCE].incident_date = [dbo].[GUN_SALES].[sale_date] AND
[dbo].[GUN_VIOLENCE].state_name = [dbo].[GUN_SALES].[sale_state] )
LEFT JOIN [dbo].[SCHOOL_SHOOTINGS]
ON ( [dbo].[SCHOOL_SHOOTINGS].shooting_date = [dbo].[GUN_SALES].[sale_date] AND
[dbo].[SCHOOL_SHOOTINGS].shooting_state = [dbo].[GUN_SALES]. [sale_state] )
LEFT JOIN [dbo].[HOMICIDE]
ON ( [dbo].[HOMICIDE].homeicide_date = [dbo].[GUN_SALES].[sale_date] AND
[dbo].[HOMICIDE].homicide_state = [dbo].[GUN_SALES].[sale_state] )
LEFT JOIN [dbo].[BBC]
ON ( [dbo].[BBC].article_date = [dbo].[GUN_SALES].[sale_date] AND
[dbo].[BBC].[article_state] = [dbo].[Gun_Sales].[sale_state] )
EDIT:
#RitaMurran, I answer your question:
"Is there a way to have only distinct values when the join happens?"
with the example:
DECLARE #tbl_A TABLE (ID_A INT, col_A varChar(10))
INSERT #tbl_A (ID_A,col_A)
SELECT 1 AS ID_A,'aaa' AS col_A UNION ALL
SELECT 2 ,'bbb' UNION ALL
SELECT 3 ,'ccc' UNION ALL
SELECT 4 ,'ddd' UNION ALL
SELECT 5 ,'eee'
DECLARE #tbl_B TABLE (ID_B INT,ID_A_FK INT, col_B varChar(10))
INSERT #tbl_B (ID_B,ID_A_FK,col_B)
SELECT 1 AS ID_B,1 AS ID_A_FK,NULL AS col_B UNION ALL
SELECT 2 ,1 ,NULL UNION ALL
SELECT 3 ,3 ,NULL UNION ALL
SELECT 4 ,3 ,NULL UNION ALL
SELECT 5 ,3 ,NULL
SELECT *
FROM #tbl_A tbA
LEFT JOIN
( select ID_A_FK,col_B FROM #tbl_B GROUP BY ID_A_FK,col_B ) tbB ON tbA.ID_A = tbB.ID_A_FK
The Output:
ID_A col_A ID_A_FK col_B
1 aaa 1 NULL
2 bbb NULL NULL
3 ccc 3 NULL
4 ddd NULL NULL
5 eee NULL NULL
If col_B has value,The output is like this:
INSERT #tbl_B (ID_B,ID_A_FK,col_B)
SELECT 1 AS ID_B,1 AS ID_A_FK,'a1' AS col_B UNION ALL
SELECT 2 ,1 ,'a2' UNION ALL
SELECT 3 ,3 ,'c1' UNION ALL
SELECT 4 ,3 ,'c2' UNION ALL
SELECT 5 ,3 ,'c3'
Output:
ID_A col_A ID_A_FK col_B
1 aaa 1 a1
1 aaa 1 a2
2 bbb NULL NULL
3 ccc 3 c1
3 ccc 3 c2
3 ccc 3 c3
4 ddd NULL NULL
5 eee NULL NULL

Related

How can we get records in single row in SQL server using SQL/TSQL

-- Source
Create table staging
(id int,
name varchar(50),
empid int,
company_id int,
addres varchar(50)
)
Create table Destination
(id int,
name varchar(50),
empid int,
company_id int,
addres varchar(50)
)
insert into staging
select 1, 'amit',NULL,101,'USA'
UNION ALL
Select 1,'amit',10002,'','USA'
UNION ALL
Select 2,'Vijay','',650,'China'
UNION ALL
Select 2,'Vijay','','','China'
UNION ALL
Select 5,'Sanjay',589756,NULL,'India'
UNION ALL
Select 5,'Sanjay',NULL,151215,'India'
Select * from staging
-- Expected result
-- Destination table
id name empid company_id addres
1 amit 10002 101 USA
2 Vijay 0 650 China
5 Sanjay 589756 151215 India
Use Group By and MAX
insert into destination
select id, max(name), max(empid), max(company_id), max(addres) from staging
group by id
Result:

TSQL Distinct Counts

I have a table that looks like this:
ID SuppressionTypeID PersonID
------------------------------
1 1 123
2 1 456
3 2 456
I want to get a rolling count (distinct people) rather than a normal group by count.
e.g. not this:
SuppressionTypeID Count
---------------------------
1 2
2 1
This:
SuppressionTypeID RecordsLost
----------------------------------
1 2
2 0
The latter being zero as we lost person 456 on suppresiontypeid 1.
Thanks in advance.
You may need to use a temporary table or a table variable as shown below
DECLARE #t TABLE (
ID INT
,SuppressionTypeID INT
,PersonID INT
)
INSERT INTO #t
SELECT 1
,1
,123
UNION ALL
SELECT 2
,1
,456
UNION ALL
SELECT 3
,2
,456
DECLARE #t1 TABLE (
ID INT
,SuppressionTypeID INT
,PersonID INT
,firstid INT
)
INSERT INTO #t1
SELECT *
,NULL
FROM #t
UPDATE t1
SET t1.firstid = t2.firstid
FROM #t1 AS t1
INNER JOIN (
SELECT personid
,min(SuppressionTypeID) AS firstid
FROM #t1
GROUP BY personid
) AS t2 ON t1.PersonID = t2.PersonID
SELECT coalesce(t2.firstid, t1.SuppressionTypeID) AS SuppressionTypeID
,count(DISTINCT t2.personid) AS count
FROM #t1 AS t1
LEFT JOIN #t1 AS t2 ON t1.personid = t2.personid
AND t1.SuppressionTypeID = t2.firstid
GROUP BY coalesce(t2.firstid, t1.SuppressionTypeID)
The result is
SuppressionTypeID count
----------------- -----------
1 2
2 0
You can try;
with tmp_tbl as (
select
x.SuppressionTypeID, count(x.PersonID) as RecordsLost
from (
select
min(SuppressionTypeID) as SuppressionTypeID,
PersonID
from tbl
group by PersonID
) as x
group by x.PersonID
order by x.SuppressionTypeID
)
select
distict t.SuppressionTypeID, coalesce(tmp.RecordsLost, 0) as RecordsLost
from tbl t
left join tmp_tbl tmp on tmp.SuppressionTypeID = t.SuppressionTypeID

how to select all not NULL values in from every column... Need LINQ or ADO query?

Please answer this question... i m trying to make single table instead of having multiple tables
Table structure like
ID COL1 COL2 COL3 ... ...
1 SHIRT NULL NULL
2 SKIRT NULL NULL
3 TROUSER NULL NULL
4 NULL WINTER NULL
5 NULL SUMMER NULL
6 NULL AUTUMN NULL
7 NULL NULL NIGHT
8 NULL NULL EVENING
9 NULL NULL NULL
. .. .. ..
we can achieve this by using row_number ,order by and Common table Expression
declare #table table(ID int,Col1 varchar(10),col2 Varchar(10),Col3 Varchar(10))
insert into #table(ID,Col1,col2,col3)values (1,'Shirt',NULL,NULL) ,
(2,'SKIRT',NULL,NULL),(3,'TROUSER',NULL,NULL),(4,NULL,'WINTER',NULL),(5,NULL,'SUMMER',NULL)
,(6,NULL,'AUTUMN ',NULL),(7,NULL,NULL,'NIGHT'),(8,NULL,NULL,'Evening')
;with CTE AS (
select col1,ROW_NUMBER()OVER( ORDER BY (SELECT NULL))R from #table
where Col1 is not NULL
), CTE1 AS (
select col2,ROW_NUMBER()OVER( ORDER BY (SELECT NULL))R from #table
where Col2 is not NULL
), CTE2 AS (
select col3,ROW_NUMBER()OVER( ORDER BY (SELECT NULL))R from #table
where Col3 is not NULL
)
select c.col1,cc.col2,ccc.Col3 from CTE C INNER JOIN cte1 CC
ON c.R = cc.R LEFT JOIN CTE2 CCC ON
CCC.R = C.R

How do you order a group of records then insert their order placement too?

I have a table of logs that contain a ID and TIMESTAMP. I want to ORDER BY ID and then TIMESTAMP.
For example, this is what the result set would look like:
12345 05:40
12345 05:50
12345 06:22
12345 07:55
12345 08:33
Once that's done, I want to INSERT a order value in a third column that signifies it's placement in the group from earliest to latest.
So, you would have something like this:
12345 05:40 1 <---First entry
12345 05:50 2
12345 06:22 3
12345 07:55 4
12345 08:33 5 <---Last entry
How can I do that in a SQL statement? I can select the data and ORDER BY ID, TIMESTAMP. But, I can't seem to INSERT a order value based on the groupings. :(
Try this update not an insert:
Fiddle demo here:
;with cte as(
select id, yourdate, row_number() over(order by id,yourdate) rn
from yourTable
)
Update ut Set thirdCol = rn
From yourTable ut join cte on ut.Id = cte.id and ut.yourdate = cte.yourdate
NOTE: if you need to get the thirdColumn updated per id basis, please partition your rownumber by using row_number() over (partition by id, order by order by id,yourdate)
Results:
| ID | YOURDATE | THIRDCOL |
|-------|----------|----------|
| 12345 | 05:40 | 1 |
| 12345 | 05:50 | 2 |
| 12345 | 06:22 | 3 |
| 12345 | 07:55 | 4 |
| 12345 | 08:33 | 5 |
Using a derived table and an update.
IF OBJECT_ID('tempdb..#TableOne') IS NOT NULL
begin
drop table #TableOne
end
CREATE TABLE #TableOne
(
SomeColumnA int ,
LetterOfAlphabet varchar(12) ,
PositionOrdinal int not null default 0
)
INSERT INTO #TableOne ( SomeColumnA , LetterOfAlphabet )
select 123 , 'x'
union all select 123 , 'b'
union all select 123 , 'z'
union all select 123 , 't'
union all select 123 , 'c'
union all select 123 , 'd'
union all select 123 , 'e'
union all select 123 , 'a'
Select 'pre' as SpaceTimeContinium , * from #TableOne order by LetterOfAlphabet
Update
#TableOne
Set PositionOrdinal = derived1.rowid
From
( select SomeColumnA , LetterOfAlphabet , rowid = row_number() over (order by LetterOfAlphabet asc) from #TableOne innerT1 )
as derived1
join #TableOne t1
on t1.LetterOfAlphabet = derived1.LetterOfAlphabet and t1.SomeColumnA = derived1.SomeColumnA
Select 'post' as SpaceTimeContinium, * from #TableOne order by LetterOfAlphabet
IF OBJECT_ID('tempdb..#TableOne') IS NOT NULL
begin
drop table #TableOne
end
To get the order you desire without doing an insert and an update, you can set your clustered index to handle it for you. The example below creates a clustered primary key.
To do this you must remove any clustered index that you already have on the table because you can only have one clustered index per table.
CREATE TABLE dbo.Table_1
(
ID int NOT NULL,
DTStamp datetime NOT NULL
)
ALTER TABLE dbo.Table_1 ADD CONSTRAINT
PK_Table_1 PRIMARY KEY CLUSTERED
(
ID,
DTStamp
)
Insert some random data to test with...
INSERT INTO [dbo].[Table_1]([ID],[DTStamp])VALUES(12346,getdate());
INSERT INTO [dbo].[Table_1]([ID],[DTStamp])VALUES(12346,dateadd(mi,1,getdate()));
INSERT INTO [dbo].[Table_1]([ID],[DTStamp])VALUES(12346,dateadd(mi,2,getdate()));
INSERT INTO [dbo].[Table_1]([ID],[DTStamp])VALUES(12346,dateadd(mi,3,getdate()));
INSERT INTO [dbo].[Table_1]([ID],[DTStamp])VALUES(12346,dateadd(mi,4,getdate()));
INSERT INTO [dbo].[Table_1]([ID],[DTStamp])VALUES(12340,dateadd(mi,5,getdate()));
INSERT INTO [dbo].[Table_1]([ID],[DTStamp])VALUES(12340,dateadd(mi,6,getdate()));
INSERT INTO [dbo].[Table_1]([ID],[DTStamp])VALUES(12340,dateadd(mi,7,getdate()));
INSERT INTO [dbo].[Table_1]([ID],[DTStamp])VALUES(12340,dateadd(mi,8,getdate()));
INSERT INTO [dbo].[Table_1]([ID],[DTStamp])VALUES(12344,dateadd(mi,1,getdate()));
INSERT INTO [dbo].[Table_1]([ID],[DTStamp])VALUES(12344,dateadd(mi,2,getdate()));
INSERT INTO [dbo].[Table_1]([ID],[DTStamp])VALUES(12344,dateadd(mi,3,getdate()));
INSERT INTO [dbo].[Table_1]([ID],[DTStamp])VALUES(12344,dateadd(mi,4,getdate()));
INSERT INTO [dbo].[Table_1]([ID],[DTStamp])VALUES(12344,dateadd(mi,5,getdate()));
INSERT INTO [dbo].[Table_1]([ID],[DTStamp])VALUES(12344,dateadd(mi,6,getdate()));
INSERT INTO [dbo].[Table_1]([ID],[DTStamp])VALUES(12344,dateadd(mi,7,getdate()));
INSERT INTO [dbo].[Table_1]([ID],[DTStamp])VALUES(12344,dateadd(mi,8,getdate()));
Now query your table and check out the order...
SELECT [ID] ,[DTStamp] FROM [Table_1]
If you need the order to display in a query, you can add the row number with an over clause.
SELECT [ID] ,[DTStamp],row_number() over (partition by [ID] order by [ID] ,[DTStamp]) as SortOdr FROM [Table_1]

update oldID field based on fields in the same table

I need help with the following query.
create table #table1
(id int not null primary key identity,
customer_name varchar(25),
usage float,
oldID int null
)
insert into #table1 values('ABC',46.5,null)
insert into #table1 values('ABC',46.5,null)
insert into #table1 values('DEF',36.8,null)
insert into #table1 values('XYZ',50.1,null)
insert into #table1 values('DEF',36.8,null)
insert into #table1 values('XYZ',50.1,null)
select * from #table1
I want my table to be updated like this
id customer_name usage oldID
----------- ------------------------- ---------------------- -----------
1 ABC 46.5 NULL
2 ABC 46.5 1
3 DEF 36.8 NULL
4 XYZ 50.1 NULL
5 DEF 36.8 3
6 XYZ 50.1 4
The two records with the same name and usage means the later record was renewed.
In the new record the oldID field should point to its old record (ID).
Although in my actual table, I have a bunch of date fields which I probably can use but this would help me for now.
Try this using a CTE:
;WITH data AS
(
SELECT
id, customer_name,
OldID = (SELECT MIN(id) FROM #table1 t2 WHERE t2.customer_name = t.customer_name)
FROM #table1 t
)
UPDATE #table1
SET OldID = data.OldID
FROM Data
WHERE
data.customer_Name = #table1.customer_name
AND #table1.ID <> data.oldid
select * from #table1
The Data CTE basically just determines the minimum ID for each customer, and if that customer's ID isn't that minimum ID, then OldID is set to that ID value.
When I run this, I get a resulting output:
id customer_name usage oldID
1 ABC 46.5 NULL
2 ABC 46.5 1
3 DEF 36.8 NULL
4 XYZ 50.1 NULL
5 DEF 36.8 3
6 XYZ 50.1 4
With cte, without subquerys, updating only customers with several rows:
with cte as (
select customer_name, min( id ) as id
from #table1
group by customer_name
having count(*) > 1
)
update #table1
set oldID = cte.id
from cte
where #table1.customer_name = cte.customer_name
and #table1.id != cte.id