PARTITION BY multiple column while inserting data from another table - sql

How can I skip unique constraint error in SQL Server?
This is my source table:
CREATE TABLE source
(
RollNo INTEGER,
Nam VARCHAR(6),
Gender VARCHAR(1),
Score INTEGER
);
INSERT INTO source (RollNo, Nam, Gender, Score)
VALUES ('101', 'John', 'M', '85'),
('102', 'Tracy', 'F', '79'),
('103', 'Jake', 'M', '92'),
('104', 'Edgar', 'M', NULL),
('105', 'Monica', 'F', '25'),
('106', 'Monica', 'F', '50'),
('1070', 'Yash', 'M', '68'),
('107', 'Yash', 'M', '70'),
('108', 'SFS', 'M', '68'),
('18', 'SFS77', 'F', '65');
I want populate in the dest table from source table where name & Gender is a Unique key and SeqNo should increment automatically:
dest table description:
CREATE TABLE dest
(
SeqNo BIGINT IDENTITY(1000,1) PRIMARY KEY,
RollNo INTEGER,
Nam VARCHAR(6),
Gender VARCHAR(1),
Score INTEGER
);
Here's what I tried:
Attempt #1:
INSERT INTO dest (RollNo, Nam, Gender, Score)
SELECT
FIRST_VALUE(RollNo) OVER (PARTITION BY Nam, Gender ORDER BY Score DESC),
FIRST_VALUE(Nam) OVER (PARTITION BY Nam, Gender ORDER BY Score DESC),
FIRST_VALUE(Gender) OVER (PARTITION BY Nam, Gender ORDER BY Score DESC),
FIRST_VALUE(Score) OVER (PARTITION BY Nam, Gender ORDER BY Score DESC)
FROM
source
WHERE
Nam IS NOT NULL AND Gender IS NOT NULL ;
ERROR: Violation of UNIQUE KEY constraint
Attempt #2:
INSERT INTO dest (RollNo, Nam, Gender, Score)
SELECT MAX(RollNo),Nam, Gender, MAX(Score)
FROM source
GROUP BY Nam, Gender
ORDER BY MAX(Score) DESC;
Output:
| SeqNo | RollNo | Nam | Gender | Score |
|-------|--------|--------|--------|--------|
| 1000 | 103 | Jake | M | 92 |
| 1001 | 101 | John | M | 85 |
| 1002 | 102 | Tracy | F | 79 |
| 1003 | 1070 | Yash | M | 70 |
| 1004 | 108 | SFS | M | 68 |
| 1005 | 18 | SFS77 | F | 65 |
| 1006 | 106 | Monica | F | 50 |
| 1007 | 104 | Edgar | M | (null) |
If you see the row of yash it is taking max of RollNo. and max of Score which is wrong, I want it to take first value but I don't know how to do it.
Is there any other way to solve exclude this above two methods?

You can identify the row that has the highest score for each Nam/Gender tuple with ROW_NUMBER(), and use that information to filter the source data:
INSERT INTO dest (RollNo, Nam, Gender, Score)
SELECT RollNo, Nam, Gender, Score
FROM (
SELECT s.*,
ROW_NUMBER() OVER(PARTITION BY Nam, Gender ORDER BY Score DESC) rn
FROM source s
) s
WHERE rn = 1
Side note: I would recommend putting a unique constraint on Nam/Gender tuples in the target table, so potential duplicates are always rejected at insert time:
CREATE TABLE dest (
SeqNo BIGINT IDENTITY(1000,1) PRIMARY KEY,
RollNo INTEGER,
Name VARCHAR(6),
Gender VARCHAR(1),
Score INTEGER,
UNIQUE (Name, Gender)
);
Side note #2: don't put single quotes around column names; they stand for literal strings in standard SQL.

Related

Get records having the same value in 2 columns but a different value in a 3rd column

I am having trouble writing a query that will return all records where 2 columns have the same value but a different value in a 3rd column. I am looking for the records where the Item_Type and Location_ID are the same, but the Sub_Location_ID is different.
The table looks like this:
+---------+-----------+-------------+-----------------+
| Item_ID | Item_Type | Location_ID | Sub_Location_ID |
+---------+-----------+-------------+-----------------+
| 1 | 00001 | 20 | 78 |
| 2 | 00001 | 110 | 124 |
| 3 | 00001 | 110 | 124 |
| 4 | 00002 | 3 | 18 |
| 5 | 00002 | 3 | 25 |
+---------+-----------+-------------+-----------------+
The result I am trying to get would look like this:
+---------+-----------+-------------+-----------------+
| Item_ID | Item_Type | Location_ID | Sub_Location_ID |
+---------+-----------+-------------+-----------------+
| 4 | 00002 | 3 | 18 |
| 5 | 00002 | 3 | 25 |
+---------+-----------+-------------+-----------------+
I have been trying to use the following query:
SELECT *
FROM Table1
WHERE Item_Type IN (
SELECT Item_Type
FROM Table1
GROUP BY Item_Type
HAVING COUNT (DISTINCT Sub_Location_ID) > 1
)
But it returns all records with the same Item_Type and a different Sub_Location_ID, not all records with the same Item_Type AND Location_ID but a different Sub_Location_ID.
This should do the trick...
-- some test data...
IF OBJECT_ID('tempdb..#TestData', 'U') IS NOT NULL
BEGIN DROP TABLE #TestData; END;
CREATE TABLE #TestData (
Item_ID INT NOT NULL PRIMARY KEY,
Item_Type CHAR(5) NOT NULL,
Location_ID INT NOT NULL,
Sub_Location_ID INT NOT NULL
);
INSERT #TestData (Item_ID, Item_Type, Location_ID, Sub_Location_ID) VALUES
(1, '00001', 20, 78),
(2, '00001', 110, 124),
(3, '00001', 110, 124),
(4, '00002', 3, 18),
(5, '00002', 3, 25);
-- adding a covering index will eliminate the sort operation...
CREATE NONCLUSTERED INDEX ix_indexname ON #TestData (Item_Type, Location_ID, Sub_Location_ID, Item_ID);
-- the actual solution...
WITH
cte_count_group AS (
SELECT
td.Item_ID,
td.Item_Type,
td.Location_ID,
td.Sub_Location_ID,
cnt_grp_2 = COUNT(1) OVER (PARTITION BY td.Item_Type, td.Location_ID),
cnt_grp_3 = COUNT(1) OVER (PARTITION BY td.Item_Type, td.Location_ID, td.Sub_Location_ID)
FROM
#TestData td
)
SELECT
cg.Item_ID,
cg.Item_Type,
cg.Location_ID,
cg.Sub_Location_ID
FROM
cte_count_group cg
WHERE
cg.cnt_grp_2 > 1
AND cg.cnt_grp_3 < cg.cnt_grp_2;
You can use exists :
select t.*
from table t
where exists (select 1
from table t1
where t.Item_Type = t1.Item_Type and
t.Location_ID = t1.Location_ID and
t.Sub_Location_ID <> t1.Sub_Location_ID
);
Sql server has no vector IN so you can emulate it with a little trick. Assuming '#' is illegal char for Item_Type
SELECT *
FROM Table1
WHERE Item_Type+'#'+Cast(Location_ID as varchar(20)) IN (
SELECT Item_Type+'#'+Cast(Location_ID as varchar(20))
FROM Table1
GROUP BY Item_Type, Location_ID
HAVING COUNT (DISTINCT Sub_Location_ID) > 1
);
The downsize is the expression in WHERE is non-sargable
I think you can use exists:
select t1.*
from table1 t1
where exists (select 1
from table1 tt1
where tt1.Item_Type = t1.Item_Type and
tt1.Location_ID = t1.Location_ID and
tt1.Sub_Location_ID <> t1.Sub_Location_ID
);

SQL - selecting only certain rows if they exist

I have a table that contains Home addresses and Mailing addresses. It looks like this:
ID Name StNum StName City State Zip Type
-- ---- ----- ------ ---- ----- --- ----
1 Joe 1234 Main St Waco TX 76767 HOM
1 Joe 2345 High St Waco TX 76763 MLG
2 Amy 3456 Broad St Athens GA 34622 HOM
3 Mel 987 Front St Cary NC 65331 HOM
3 Mel 1111 Main Ave Hilo HI 99779 MLG
I need to write an SQL statement that will only return the Mailing address (MLG record) if it exists, and if not, will return the Home address (HOM record).
The expected results from this table would be:
ID Name StNum StName City State Zip Type
-- ---- ----- ------ ---- ----- --- ----
1 Joe 2345 High St Waco TX 76763 MLG
2 Amy 3456 Broad St Athens GA 34622 HOM
3 Mel 1111 Main Ave Hilo HI 99779 MLG
Any help that you can provide would be much appreciated! Thanks!
use correlated subquery
select * from
(
select *,case when Type='MLG' then 1 else 0 end as typeval
from tablename
)A where typeval in (select max(case when Type='MLG' then 1 else 0 end) from tablename b
where a.name=b.name)
OR if your DB supports row_number() then u can try below -
select * from
(
select *, row_number() over(partition by name order by case when Type='MLG' then 1 else 0 end desc)
from tablename
)A where rn=1
In case you are using SQL Server, i would solve it with the ROW_NUMBER function.
SELECT ID, Name, StNum, StName, City, State, Zip, Type
FROM (
SELECT *
,ROW_NUMBER() OVER (PARTITION BY ID ORDER BY Type DESC) AS Rn
FROM yourtable
)
WHERE Rn = 1
This can be done using a WHERE clause that exclude the ids of the users who have MLG
Schema (MySQL v5.7)
CREATE TABLE test (
`ID` INTEGER,
`Name` VARCHAR(3),
`StNum` INTEGER,
`StName` VARCHAR(8),
`City` VARCHAR(6),
`State` VARCHAR(2),
`Zip` INTEGER,
`Type` VARCHAR(3)
);
INSERT INTO test
(`ID`, `Name`, `StNum`, `StName`, `City`, `State`, `Zip`, `Type`)
VALUES
('1', 'Joe', '1234', 'Main St', 'Waco', 'TX', '76767', 'HOM'),
('1', 'Joe', '2345', 'High St', 'Waco', 'TX', '76763', 'MLG'),
('2', 'Amy', '3456', 'Broad St', 'Athens', 'GA', '34622', 'HOM'),
('3', 'Mel', '987', 'Front St', 'Cary', 'NC', '65331', 'HOM'),
('3', 'Mel', '1111', 'Main Ave', 'Hilo', 'HI', '99779', 'MLG');
Query #1
SELECT id,
name,
StNum,
StName,
City,
State,
Zip,
Type
FROM test t1
WHERE t1.`Type` = 'MLG'
OR t1.id NOT IN
(
SELECT id
FROM test t2
WHERE t2.`Type` = 'MLG'
);
Output :
| id | name | StNum | StName | City | State | Zip | Type |
| --- | ---- | ----- | -------- | ------ | ----- | ----- | ---- |
| 1 | Joe | 2345 | High St | Waco | TX | 76763 | MLG |
| 2 | Amy | 3456 | Broad St | Athens | GA | 34622 | HOM |
| 3 | Mel | 1111 | Main Ave | Hilo | HI | 99779 | MLG |
View on DB Fiddle
Or, my first dumb version :
This can be done using UNION
Schema (MySQL v5.7)
CREATE TABLE test (
`ID` INTEGER,
`Name` VARCHAR(3),
`StNum` INTEGER,
`StName` VARCHAR(8),
`City` VARCHAR(6),
`State` VARCHAR(2),
`Zip` INTEGER,
`Type` VARCHAR(3)
);
INSERT INTO test
(`ID`, `Name`, `StNum`, `StName`, `City`, `State`, `Zip`, `Type`)
VALUES
('1', 'Joe', '1234', 'Main St', 'Waco', 'TX', '76767', 'HOM'),
('1', 'Joe', '2345', 'High St', 'Waco', 'TX', '76763', 'MLG'),
('2', 'Amy', '3456', 'Broad St', 'Athens', 'GA', '34622', 'HOM'),
('3', 'Mel', '987', 'Front St', 'Cary', 'NC', '65331', 'HOM'),
('3', 'Mel', '1111', 'Main Ave', 'Hilo', 'HI', '99779', 'MLG');
Query #1
SELECT id,
name,
StNum,
StName,
City,
State,
Zip,
Type
FROM test t1
WHERE t1.`Type` = 'MLG'
UNION ALL
SELECT id,
name,
StNum,
StName,
City,
State,
Zip,
Type
FROM test t2
WHERE t2.id NOT IN (SELECT id FROM test t3 WHERE t3.`Type` = 'MLG')
ORDER BY id;
Output
| id | name | StNum | StName | City | State | Zip | Type |
| --- | ---- | ----- | -------- | ------ | ----- | ----- | ---- |
| 1 | Joe | 2345 | High St | Waco | TX | 76763 | MLG |
| 2 | Amy | 3456 | Broad St | Athens | GA | 34622 | HOM |
| 3 | Mel | 1111 | Main Ave | Hilo | HI | 99779 | MLG |
View on DB Fiddle
This is a prioritization query. With two values, often the simplest method is union all with not exists (or not in).
That does not generalize well For more values, using row_number() with case is convenient:
select t.*
from (select t.*,
row_number() over (partition by id
order by (case when type = 'MLG' then 1 else 2 end)
) as seqnum
from t
) t
where seqnum = 1;
In your particular case, you could use order by type desc, because the two types happen to be prioritized in reverse alphabetical ordering. However, I recommend using case because the intention is more explicit.

Merge where definition exists

I have 3 tables I'm working with here. ATTRIBUTE_MAP, GROUP_DEFINITIONS, and GROUP_MAP.
ATTRIBUTE_MAP contains the CUST_ID and the associated ATTRIBUTE_ID.
GROUP_DEFINITIONS defines a group. Its columns are GROUP_ID, ATTRIBUTE_1, VALUE_1, ATTRIBUTE_2, VALUE_2, ATTRIBUTE_3, VALUE_3 A group consists of 1 to 3 attributes with values. For example, an attribute could be 'State' with its value being 'New York'. Values can also be null for boolean values like 'Owns Car'.
GROUP_MAP simply maps the CUST_ID to a GROUP_ID.
Now, I'm trying to write a script that will look at the ATTRIBUTE_MAP and see if a customer falls into one of the defined groups in GROUP_DEFINITIONS. If he (the customer) does, then insert/update a row into GROUP_MAP with the CUST_ID and GROUP_ID. The part I'm having trouble with, is matching the attribute values.
Here is what I have so far:
merge GROUP_MAP gm using
( select am.CUST_ID
,am.ATTRIBUTE_ID
,am.START_DATE
,gd.GROUP_ID
,gd.ATTRIBUTE_1
,gd.VALUE_1
,gd.ATTRIBUTE_2
,gd.VALUE_2
,gd.ATTRIBUTE_3
,gd.VALUE_3
from ATTRIBUTE_MAP am, GROUP_DEFINITIONS gd ) src
on gm.GROUP_ID=src.GROUP_ID
AND gm.CUST_ID=src.CUST_ID
when not matched then -- create association in GROUP_MAP
insert (CUST_ID, GROUP_ID, FROM_DATE)
values (src.CUST_ID, src.GROUP_ID, src.START_DATE);
Am I approaching this correctly? I'm guessing I need to just improve my nested select statement in my merge to join the ATTRIBUTE_MAP and PEER_GROUP_DEFINTIONS and then go from there. Any help/suggestions would be appreciated.
Here's an example for reference:
ATTRIBUTE_MAP:
+---------+--------------+------------+
| CUST_ID | ATTRIBUTE_ID | VALUE |
+---------+--------------+------------+
| 50 | 1 | 'New York' |
+---------+--------------+------------+
| 50 | 2 | |
+---------+--------------+------------+
GROUP_DEFINITIONS:
+----------+-------------+------------+-------------+---------+-------------+---------+
| GROUP_ID | ATTRIBUTE_1 | VALUE_1 | ATTRIBUTE_2 | VALUE_2 | ATTRIBUTE_3 | VALUE_3 |
+----------+-------------+------------+-------------+---------+-------------+---------+
| 10 | 1 | 'New York' | 2 | | | |
+----------+-------------+------------+-------------+---------+-------------+---------+
| 20 | 2 | | | | | |
+----------+-------------+------------+-------------+---------+-------------+---------+
and so the script should generate (in GROUP_MAP):
+---------+----------+--------+
| CUST_ID | GROUP_ID | DATE |
+---------+----------+--------+
| 50 | 10 | *date* |
+---------+----------+--------+
| 50 | 20 | *date* |
+---------+----------+--------+
I could be totally off, but it looks like your inner select needs to be something like this. If I understand what you are trying to do, this will return a unique list of CUST_ID, GROUP_ID, START_DATE where all of the customer attributes match all of the group attributes. Just wrote this fast, so might have some errors, but it might get you going the right direction.
with gd as (
SELECT GROUP_ID, ATTRIBUTE_1 as ATTRIBUTE_ID, VALUE_1 as VALUE from GROUP_DEFINITIONS
UNION
SELECT GROUP_ID, ATTRIBUTE_2, VALUE_2 from GROUP_DEFINITIONS
UNION
SELECT GROUP_ID, ATTRIBUTE_3, VALUE_3 from GROUP_DEFINITIONS
)
MERGE GROUP_MAP gm
USING
(
SELECT am.CUST_ID, gd.GROUP_ID, am.START_DATE
FROM ATTRIBUTE_MAP am
JOIN gd
ON am.ATTRIBUTE_ID = gd.ATTRIBUTE_ID AND coalesce(am.VALUE, '') = coalesce(gd.VALUE, '')
join (select GROUP_ID, count(*) as ATTR_COUNT from gd where ATTRIBUTE_ID is NOT NULL group by GROUP_ID) as gc
on gd.GROUP_ID = gc.GROUP_ID
GROUP BY am.CUST_ID, gd.GROUP_ID, am.START_DATE
HAVING count(am.ATTRIBUTE_ID) = max(gc.ATTR_COUNT)
) src
ON gm.GROUP_ID = src.GROUP_ID
AND gm.CUST_ID = src.CUST_ID
WHEN NOT MATCHED
THEN -- create association in GROUP_MAP
INSERT(CUST_ID,
GROUP_ID,
FROM_DATE) VALUES
(src.CUST_ID, src.GROUP_ID, src.START_DATE);
If I understood the problem correctly this should do it:
Please note I have used GETDATE() as I do not have the field [START_DATE] but you will need to substitute this within the code
SAMPLE DATA:
CREATE TABLE #ATTRIBUTE_MAP(CUST_ID INT,
ATTRIBUTE_ID INT,
VALUE VARCHAR(20));
INSERT INTO #ATTRIBUTE_MAP
VALUES
(50, 1, 'New York'),
(50, 2, NULL);
CREATE TABLE #GROUP_DEFINITIONS(GROUP_ID INT,
ATTRIBUTE_1 INT,
VALUE_1 VARCHAR(20),
ATTRIBUTE_2 INT,
VALUE_2 VARCHAR(20),
ATTRIBUTE_3 INT,
VALUE_3 VARCHAR(20));
INSERT INTO #GROUP_DEFINITIONS
VALUES
(10, 1, 'New York', 2, NULL, NULL, NULL),
(20, 2, NULL, NULL, NULL, NULL, NULL);
CREATE TABLE #GROUP_MAP(CUST_ID INT,
GROUP_ID INT,
[FROM_DATE] DATE);
QUERY:
MERGE #GROUP_MAP gm
USING
(SELECT DISTINCT
am.CUST_ID,
CAST(GETDATE() AS DATE) AS [START_DATE], --<-- you will need to change this
gd.GROUP_ID
FROM #ATTRIBUTE_MAP am
INNER JOIN
(
SELECT GROUP_ID,
ATTRIBUTE_1 AS ATTRIBUTE_ID,
VALUE_1
FROM #GROUP_DEFINITIONS
UNION ALL
SELECT GROUP_ID,
ATTRIBUTE_2,
VALUE_2
FROM #GROUP_DEFINITIONS
UNION ALL
SELECT GROUP_ID,
ATTRIBUTE_3,
VALUE_3
FROM #GROUP_DEFINITIONS) gd ON am.ATTRIBUTE_ID = gd.ATTRIBUTE_ID) src
ON gm.GROUP_ID = src.GROUP_ID
AND gm.CUST_ID = src.CUST_ID
WHEN NOT MATCHED
THEN -- create association in GROUP_MAP
INSERT(CUST_ID,
GROUP_ID,
FROM_DATE) VALUES
(src.CUST_ID, src.GROUP_ID, src.START_DATE);
VERIFY RESULT:
SELECT CUST_ID , GROUP_ID , FROM_DATE
FROM #GROUP_MAP;
RESULT:

Select rows into columns and show a flag in the column

Trying to get an output like the below:
| UserFullName | JAVA | DOTNET | C | HTML5 |
|--------------|--------|--------|--------|--------|
| Anne San | | | | |
| John Khruf | 1 | 1 | | 1 |
| Mary Jane | 1 | | | 1 |
| George Mich | | | | |
This shows the roles of a person. A person could have 0 or N roles. When a person has a role, I am showing a flag, like '1'.
Actually I have 2 blocks of code:
Block #1: The tables and a simple output which generates more than 1 rows per person.
SQL Fiddle
MS SQL Server 2008 Schema Setup:
CREATE TABLE AvailableRoles
(
id int identity primary key,
CodeID varchar(5),
Description varchar(500),
);
INSERT INTO AvailableRoles
(CodeID, Description)
VALUES
('1', 'JAVA'),
('2', 'DOTNET'),
('3', 'C'),
('4', 'HTML5');
CREATE TABLE PersonalRoles
(
id int identity primary key,
UserID varchar(100),
RoleID varchar(5),
);
INSERT INTO PersonalRoles
(UserID, RoleID)
VALUES
('John.Khruf', '1'),
('John.Khruf', '2'),
('Mary.Jane', '1'),
('Mary.Jane', '4'),
('John.Khruf', '4');
CREATE TABLE Users
(
UserID varchar(20),
EmployeeType varchar(1),
EmployeeStatus varchar(1),
UserFullName varchar(500),
);
INSERT INTO Users
(UserID, EmployeeType, EmployeeStatus, UserFullName)
VALUES
('John.Khruf', 'E', 'A', 'John Khruf'),
('Mary.Jane', 'E', 'A', 'Mary Jane'),
('Anne.San', 'E', 'A', 'Anne San'),
('George.Mich', 'T', 'A', 'George Mich');
Query 1:
SELECT
A.UserFullName,
B.RoleID
FROM
Users A
LEFT JOIN PersonalRoles B ON B.UserID = A.UserID
WHERE
A.EmployeeStatus = 'A'
ORDER BY
A.EmployeeType ASC,
A.UserFullName ASC
Results:
| UserFullName | RoleID |
|--------------|--------|
| Anne San | (null) |
| John Khruf | 1 |
| John Khruf | 2 |
| John Khruf | 4 |
| Mary Jane | 1 |
| Mary Jane | 4 |
| George Mich | (null) |
Block #2: An attempt to convert the rows into columns to be used in the final result
SQL Fiddle
MS SQL Server 2008 Schema Setup:
CREATE TABLE AvailableRoles
(
id int identity primary key,
CodeID varchar(5),
Description varchar(500),
);
INSERT INTO AvailableRoles
(CodeID, Description)
VALUES
('1', 'JAVA'),
('2', 'DOTNET'),
('3', 'C'),
('4', 'HTML5');
Query 1:
SELECT
*
FROM
(
SELECT CodeID, Description
FROM AvailableRoles
) d
PIVOT
(
MAX(CodeID)
FOR Description IN (Java, DOTNET, C, HTML5)
) piv
Results:
| Java | DOTNET | C | HTML5 |
|--------|--------|-------|--------|
| 1 | 2 | 3 | 4 |
Any help in mixing both blocks to show the top output will be welcome. Thanks.
Another option without PIVOT operator is:
select u.UserFullName,
max(case when a.CodeID='1' then '1' else '' end) JAVA,
max(case when a.CodeID='2' then '1' else '' end) DOTNET,
max(case when a.CodeID='3' then '1' else '' end) C,
max(case when a.CodeID='4' then '1' else '' end) HTML5
from
Users u
LEFT JOIN PersonalRoles p on (u.UserID = p.UserID)
LEFT JOIN AvailableRoles a on (p.RoleID = a.CodeID)
group by u.UserFullName
order by u.UserFullName
SQLFiddle: http://sqlfiddle.com/#!3/630c3/19
You can try this.
SELECT *
FROM
(
select u.userfullname,
case when p.roleid is not null then 1 end as roleid,
a.description
from users u
left join personalroles p
on p.userid = u.userid
left join availableroles a
on a.codeid = p.roleid
) d
PIVOT
(
MAX(roleID)
FOR Description IN (Java, DOTNET, C, HTML5)
) piv
Fiddle

Find Min Value and value of a corresponding column for that result

I have a table of user data in my SQL Server database and I am attempting to summarize the data. Basically, I need some min, max, and sum values and to group by some columns
Here is a sample table:
Member ID | Name | DateJoined | DateQuit | PointsEarned | Address
00001 | Leyth | 1/1/2013 | 9/30/2013 | 57 | 123 FirstAddress Way
00002 | James | 2/1/2013 | 7/21/2013 | 34 | 4 street road
00001 | Leyth | 2/1/2013 | 10/15/2013| 32 | 456 LastAddress Way
00003 | Eric | 2/23/2013 | 4/14/2013 | 15 | 5 street road
I'd like the summarized table to show the results like this:
Member ID | Name | DateJoined | DateQuit | PointsEarned | Address
00001 | Leyth | 1/1/2013 | 10/15/2013 | 89 | 123 FirstAddress Way
00002 | James | 2/1/2013 | 7/21/2013 | 34 | 4 street road
00003 | Eric | 2/23/2013 | 4/14/2013 | 15 | 5 street road
Here is my query so far:
Select MemberID, Name, Min(DateJoined), Max(DateQuit), SUM(PointsEarned), Min(Address)
From Table
Group By MemberID
The Min(Address) works this time, it retrieves the address that corresponds to the earliest DateJoined. However, if we swapped the two addresses in the original table, we would retrieve "123 FirstAddress Way" which would not correspond to the 1/1/2013 date joined.
For almost everything you can use a simple groupby, but as you need "the same address than the row where the minimum datejoined is" is a little bit tricker and you can solve it in several ways, one is a subquery searching the address each time
SELECT
X.*,
(select Address
from #tmp t2
where t2.MemberID = X.memberID and
t2.DateJoined = (select MIN(DateJoined)
from #tmp t3
where t3.memberID = X.MemberID))
FROM
(select MemberID,
Name,
MIN(DateJoined) as DateJoined,
MAX(DateQuit) as DateQuit,
SUM(PointsEarned) as PointEarned
from #tmp t1
group by MemberID,Name
) AS X
`
Or other is a subquery with a Join
SELECT
X.*,
J.Address
FROM
(select
MemberID,
Name,
MIN(DateJoined) as DateJoined,
MAX(DateQuit) as DateQuit,
SUM(PointsEarned) as PointEarned
from #tmp t1
group by MemberID,Name
) AS X
JOIN #tmp J ON J.MemberID = X.MemberID AND J.DateJoined = X.DateJoined
You could rank your rows according to the date, and select the minimal one:
SELECT t.member_id,
name,
date_joined,
date_quit,
points_earned
address AS address
FROM (SELECT member_id
name,
MIN (date_joined) AS date_joined,
MAX (date_quit) AS date_quit,
SUM (points_earned) AS points_earned,
FROM my_table
GROUP BY member_id, name) t
JOIN (SELECT member_id,
address,
RANK() OVER (PARTITION BY member_id ORDER BY date_joined) AS rk
FROM my_table) addr ON addr.member_id = t.member_id AND rk = 1
SELECT DISTINCT st.memberid, st.name, m1.datejoined, m2.datequit, SUM(st.pointsearned), m1.Address
from SAMPLEtable st
LEFT JOIN ( SELECT memberid
, name
, MIN(datejoined)
, datequit
FROM sampletable
) m1 ON st.memberid = m1.memberid
LEFT JOIN ( SELECT memberid
, name
, datejoined
, MAX(datequit)
FROM sampletable
) m2 ON m1.memberid = m2.memberid