SQL Group By ignore shared fields by group - sql

I have product variants that I want to get as a group and ignore any fields that are shared for the one aggregate record that is returned. It is unclear how many or what fields the various variants will share. This is an example table
CREATE TABLE IF NOT EXISTS `products` (
`id` int(6) unsigned NOT NULL,
`name` varchar(200),
`variantId` int(3),
`size` varchar(200),
`color` varchar(200),
PRIMARY KEY (`id`)
) DEFAULT CHARSET=utf8;
INSERT INTO `products` (`id`, `name`, `variantId`, `size`, `color`) VALUES
('1', 'SoccerBalls', '1', '3', 'Black-white'),
('2', 'SoccerBalls', '1', '4', 'Black-white'),
('3', 'SoccerBalls', '1', '5', 'Black-white'),
('4', 'RefereeCard', '2', null, 'yellow'),
('5', 'RefereeCard', '2', null', 'red'),
('6', 'Jersey', '3', 'L', 'Tottenham'),
('7', 'Jersey', '3', 'M', 'Chelsea'),
('8', 'Jersey', '3', 'S', 'Arsenal');
What I want as an out put is something like:
null, SoccerBalls, null, null, Black-white
null, RefereeCard, null, null, null,
null, Jersey, null, null, null

I think you want:
select (case when min(id) = max(id) then min(id) end) as id,
name, variantid,
(case when min(size) = max(size) then min(size) end) as size,
(case when min(color) = max(color) then min(color) end) as color
from products
group by variantid, name;
If the values can be NULL, then you need an additional condition -- if you want to treat NULL as yet-another-value:
select (case when min(id) = max(id) then min(id) end) as id,
name, variantid,
(case when min(size) = max(size) and count(size) = count(*)
then min(size)
end) as size,
(case when min(color) = max(color) and count(color) = count(*)
then min(color)
end) as color
from products
group by variantid, name;
Obviously, you don't need this check on id, because it is the primary key.

Related

SQL Server Insert Value that match foreign key

I use SQL Server SGBD and I have the following scenario with 2 tables :
CREATE TABLE D_CLIENT
(
ID_CLIENT varchar(10) NOT NULL,
NOM_CLIENT varchar(10) NULL,
PRIMARY KEY (ID_CLIENT)
)
CREATE TABLE F_FACT
(
ANNEE varchar(10) NOT NULL,
DOCUMENT varchar(10) NOT NULL,
NUM_DOC varchar(10) NOT NULL,
NUM_LIGNE_DOC varchar(10) NOT NULL,
ID_CLIENT varchar(10) NOT NULL,
ID_REP varchar(10) NOT NULL,
CA decimal(10,2) NULL,
PRIMARY KEY (ANNEE, DOCUMENT, NUM_DOC, NUM_LIGNE_DOC),
CONSTRAINT FK_FactClient
FOREIGN KEY (ID_CLIENT) REFERENCES D_CLIENT(ID_CLIENT)
)
INSERT INTO D_CLIENT (ID_CLIENT, NOM_CLIENT)
VALUES ('1', 'A'), ('2', 'B'), ('3', 'C'), ('4', 'D')
INSERT INTO F_FACT (ANNEE, DOCUMENT, NUM_DOC, NUM_LIGNE_DOC, ID_CLIENT, ID_REP, CA)
VALUES ('2022', 'FAC', '1', '1', '1', '1', 100),
('2022', 'FAC', '1', '2', '1', '1', 100),
('2022', 'FAC', '2', '1', '5', '1', 100)
I have a foreign key on ID_CLIENT for the integrity of data, so if I try to insert a row into F_FACT with an ID_CLIENT which doesn't exist in D_CLIENT, it will fail and it's normal because of foreign key constraint.
So when I execute the INSERT query, I get a error message because the value '5' doesn't exist in the table D_CLIENT but the 2 first row are not inserted either, where the ID_CLIENT does exist in the D_CLIENT table.
My question: is it possible, with a query, to insert only the correct rows (that's means the 2 first rows) and **reject only ** the third row ?
Thanks for your help
Join the source with the lookup table to reject missing values
with src as (
select *
from (
VALUES
('2022','FAC','1','1','1','1',100),
('2022','FAC','1','2','1','1',100),
('2022','FAC','2','1','5','1',100)
) t(ANNEE, DOCUMENT, NUM_DOC, NUM_LIGNE_DOC, ID_CLIENT, ID_REP, CA)
)
insert into F_FACT(ANNEE, DOCUMENT, NUM_DOC, NUM_LIGNE_DOC, ID_CLIENT, ID_REP, CA)
select src.ANNEE, src.DOCUMENT, src.NUM_DOC, src.NUM_LIGNE_DOC, src.ID_CLIENT, src.ID_REP, src.CA
from src
join D_CLIENT c on c.ID_CLIENT = src.ID_CLIENT
db<>fiddle
This is something I would use an exists check for:
insert into F_FACT (ANNEE, DOCUMENT, NUM_DOC, NUM_LIGNE_DOC, ID_CLIENT, ID_REP, CA)
select ANNEE, DOCUMENT, NUM_DOC, NUM_LIGNE_DOC, ID_CLIENT, ID_REP, CA from (
values
('2022','FAC','1','1','1','1',100),
('2022','FAC','1','2','1','1',100),
('2022','FAC','2','1','5','1',100)
)v(ANNEE, DOCUMENT, NUM_DOC, NUM_LIGNE_DOC, ID_CLIENT, ID_REP, CA)
where exists (select * from D_CLIENT d where d.ID_CLIENT = v.ID_CLIENT)

get only records which have similar values in column A and different values in column B

My dataset has 2 tables:
animals with animal_id and animal_type
owners with animal_id and owner_name
I want to get records only for those animals (+ their owners name) which owners have a CAT and another different pet.
Here is my schema:
CREATE TABLE IF NOT EXISTS `animals` (
`animal_id` int(6) unsigned NOT NULL,
`animal_type` varchar(200) NOT NULL,
PRIMARY KEY (`animal_id`)
) DEFAULT CHARSET=utf8;
INSERT INTO `animals` (`animal_id`, `animal_type`) VALUES
('1', 'cat'),
('2', 'dog'),
('3', 'cat'),
('4', 'cat'),
('5', 'dog'),
('6', 'dog'),
('7', 'cat'),
('8', 'dog'),
('9', 'cat'),
('10', 'hamster');
CREATE TABLE IF NOT EXISTS `owners` (
`animal_id` int(6) unsigned NOT NULL,
`owner_name` varchar(200) NOT NULL,
PRIMARY KEY (`animal_id`)
) DEFAULT CHARSET=utf8;
INSERT INTO `owners` (`animal_id`, `owner_name`) VALUES
('1', 'CatOwner'),
('2', 'DogOwner'),
('3', 'CatsOwner'),
('4', 'CatsOwner'),
('5', 'DogsOwner'),
('6', 'DogsOwner'),
('7', 'CatDogOwner'),
('8', 'CatDogOwner'),
('9', 'CatHamsterOwner'),
('10', 'CatHamsterOwner');
I can filter and show only records for owners which have more then one pet:
SELECT *
FROM animals AS a
JOIN owners AS o
ON a.animal_id = o.animal_id
WHERE o.owner_name IN (SELECT o.owner_name
FROM animals AS a
JOIN owners AS o
ON a.animal_id = o.animal_id
GROUP BY o.owner_name HAVING COUNT(o.owner_name) > 1)
Please tell me how can I make it this way:
I would suggest window functions:
SELECT ao.*
FROM (SELECT a.*, o.owner_name,
SUM(CASE WHEN a.animal_type = 'cat' THEN 1 ELSE 0 END) OVER (PARTITION BY o.owner_name) as num_cats,
COUNT(*) OVER (PARTITION BY o.owner_name) as num_animals
FROM owners o JOIN
animals a
ON a.animal_id = o.animal_id
) ao
WHERE num_cats > 0 AND num_animals >= 2;
Note: I'm not clear if the condition is for more than one animal or an animal that is not a cat. If the latter, then use:
SELECT ao.*
FROM (SELECT a.*, o.owner_name,
SUM(CASE WHEN a.animal_type = 'cat' THEN 1 ELSE 0 END) OVER (PARTITION BY o.owner_name) as num_cats,
COUNT(*) OVER (PARTITION BY o.owner_name) as num_animals
FROM owners o JOIN
animals a
ON a.animal_id = o.animal_id
) ao
WHERE num_cats > 0 AND num_animals <> num_cats;

ORDER BY a subqueries which has JOIN, LEFT JOIN and UNION

My question is: How do I order the subquery by PositionAssetId and then follow by its related PhysicalAssetId based on table TrxAssetPool?
I need LEFT JOIN because not all Position and Physical were linked together. Some of Position/Physical were standalone. A Physical might exist in PhysicalAsset and TrxPhysicalAsset but not exist in TrxAssetPool because it was not linked to any Position; and vice versa. These data also needed to be displayed.
CREATE TABLE `PositionAssets` (
`Id` int(5) unsigned NOT NULL,
`Code` varchar(50) NOT NULL,
`Desc` varchar(200) NOT NULL,
PRIMARY KEY (`Id`)
);
CREATE TABLE `PhysicalAssets` (
`Id` int(5) unsigned NOT NULL,
`Code` varchar(50) NOT NULL,
`Desc` varchar(200) NOT NULL,
PRIMARY KEY (`Id`)
);
CREATE TABLE `TrxPositionAssets` (
`Id` int(5) unsigned NOT NULL,
`MaintTrxId` int(5) unsigned NOT NULL,
`PositionAssetId` int(5) NOT NULL,
PRIMARY KEY (`Id`,`MaintTrxId`)
);
CREATE TABLE `TrxPhysicalAssets` (
`Id` int(5) unsigned NOT NULL,
`MaintTrxId` int(5) unsigned NOT NULL,
`PhysicalAssetId` int(5) NOT NULL,
PRIMARY KEY (`Id`,`MaintTrxId`)
);
CREATE TABLE `TrxAssetPool` (
`Id` int(5) unsigned NOT NULL,
`MaintTrxId` int(5) NOT NULL,
`PositionAssetId` int(5) NOT NULL,
`PhysicalAssetId` int(5) NOT NULL,
PRIMARY KEY (`Id`)
);
INSERT INTO `PositionAssets` (`Id`, `Code`, `Desc`) VALUES
('1', 'PositionC', 'Air conditioner'),
('2', 'PositionB', 'Laptop'),
('3', 'PositionA', 'Mobile Phone')
;
INSERT INTO `PhysicalAssets` (`Id`, `Code`, `Desc`) VALUES
('1', 'PhysicalD', 'Dunlop Car Tyre'),
('2', 'PhysicalA1', 'Samsung'),
('3', 'PhysicalB2', 'Acer'),
('4', 'PhysicalB1', 'Lenovo')
;
INSERT INTO `TrxPositionAssets` (`Id`, `MaintTrxId`, `PositionAssetId`) VALUES
('1', '1', '2'),
('2', '1', '3'),
('3', '1', '1')
;
INSERT INTO `TrxPhysicalAssets` (`Id`, `MaintTrxId`, `PhysicalAssetId`) VALUES
('1', '1', '2'),
('2', '1', '3'),
('3', '1', '1'),
('4', '1', '4')
;
INSERT INTO `TrxAssetPool` (`Id`,`MaintTrxId`,`PositionAssetId`,`PhysicalAssetId`) VALUES
('1', '1', '3', '2'),
('2', '1', '2', '4'),
('3', '1', '2', '3')
;
SELECT DataType, DataCode, DataDesc
FROM (
SELECT 'Position' AS DataType, pos.Code AS DataCode, pos.Desc AS DataDesc
FROM TrxPositionAssets trxpos
JOIN PositionAssets pos ON pos.Id = trxpos.PositionAssetId
LEFT JOIN TrxAssetPool trxpool ON (trxpool.PositionAssetId = trxpos.PositionAssetId and trxpool.MaintTrxId = trxpos.MaintTrxId)
WHERE trxpos.MaintTrxId = 1
UNION
SELECT 'Physical' AS DataType, phy.Code AS DataCode, phy.Desc AS DataDesc
FROM TrxPhysicalAssets trxphy
JOIN PhysicalAssets phy ON phy.Id = trxphy.PhysicalAssetId
LEFT JOIN TrxAssetPool trxpool ON (trxpool.PhysicalAssetId = trxphy.PhysicalAssetId and trxpool.MaintTrxId = trxphy.MaintTrxId)
WHERE trxphy.MaintTrxId = 1
) DataPool
Sample at sqlfiddle.com
Current result:
DataType DataCode DataDesc
Position PositionA Mobile Phone
Position PositionB Laptop
Position PositionC Air conditioner
Physical PhysicalA1 Samsung
Physical PhysicalB1 Lenovo
Physical PhysicalB2 Acer
Physical PhysicalD Dunlop Car Tyre
Expected Result:
DataType DataCode DataDesc
Position PositionA Mobile Phone
Physical PhysicalA1 Samsung
Position PositionB Laptop
Physical PhysicalB1 Lenovo
Physical PhysicalB2 Acer
Position PositionC Air conditioner
Physical PhysicalD Dunlop Car Tyre
Air conditioner is not related to any Physical. Dunlop Car Tyre is not related to any Position.
In the end of query put,
ORDER BY DATA.DataId ASC;
You need to select the information you want in the subquery. Also, the LEFT JOINs are not necessary, because they are undone by the WHERE and you probably want UNION ALL:
SELECT Data.[DataId], Data.[TrxnDataId], Data.[Type]
FROM ((SELECT pa.[Id] AS DataId, tpa.[Id] AS TrxnDataId, 'Position' AS Type,
tap.PositionAssetId, 1 as ord
FROM {TrxPositionAssets} tpa JOIN
{PositionAssets} pa
ON pa.[Id] = tpa.[PositionAssetId] JOIN
TrxAssetPool} tap
ON tap.[PositionAssetId] = pa.[Id] AND tap.[TrxId] = tpa.[TrxId])
WHERE tpa.[TrxId] = #TrxId
) UNION ALL
(SELECT pa.[Id] AS DataId, tpa.[Id] AS TrxnDataId, 'Physical' AS Type,
tap.PositionAssetId, 2 as ord
FROM {TrxPhysicalAssets} tpa JOIN
{PhysicalAssets} pa
ON pa.[Id] = tpa.[PhysicalAssetId] JOIN
{TrxAssetPool} tap
ON tap.[PhysicalAssetId] = pa.[Id] AND tap.[TrxId] = tpa.[TrxId]
WHERE tpa.[TrxId] = #TrxId
)
) data
ORDER BY PositionAssetId, ord, dataId;
To solve your problem you have to simplify it and solve it step by step, it will be easier to find a solution.
E.g. Simply join two table
SELECT Orders.OrderID, Customers.CustomerName, Orders.OrderDate
FROM Orders
INNER JOIN Customers ON Orders.CustomerID=Customers.CustomerID;

how to select the min value using having key word

I have created the table stu_dep_det
CREATE TABLE `stu_dept_cs` (
`s_d_id` int(10) unsigned NOT NULL auto_increment,
`stu_name` varchar(15) , `gender` varchar(15) , `address` varchar(15),`reg_no` int(10) ,
`ex_no` varchar(10) ,
`mark1` varchar(10) ,
`mark2` varchar(15) ,
`mark3` varchar(15) ,
`total` varchar(15) ,
`avg` double(2,0),
PRIMARY KEY (`s_d_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC AUTO_INCREMENT=8 ;
then Inserted the values
INSERT INTO `stu_dept_cs` (`s_d_id`, `stu_name`, `gender`, `address`, `reg_no`, `ex_no`, `mark1`, `mark2`, `mark3`, `total`, `avg`) VALUES
(1, 'alex', 'm', 'chennai', 5001, 's1', '70', '90', '95', '255', 85),
(2, 'peter', 'm', 'chennai', 5002, 's1', '80', '70', '90', '240', 80),
(6, 'parv', 'f', 'mumbai', 5003, 's1', '88', '60', '80', '228', 76),
(7, 'basu', 'm', 'kolkatta', 5004, 's1', '85', '95', '56', '236', 79);
I want to select the min(avg) using having keyword and I have used the following sql statement
SELECT * FROM stu_dept_cs s having min(avg)
Is it correct or not plz write the correct ans....
select somecolumn1,somecolumn2
from stu_dept_cs
group by somecolumn1,somecolumn2,avg
having avg = min(avg)
or
with t1
(select rownumber() over (partition by somecolumn1,somecolumn2
order by somecolumn1,somecolumn2,avg asc) as rownum
from stu_dept_cs )
select * from t1 where rownum=1
SELECT t1.* FROM stu_dept_cs t1
LEFT JOIN stu_dept_cs t2
ON t1.avg > t2.avg
WHERE t2.stu_name IS NULL;

Best way to write this query

I have two MySql tables as shown below with the data shown:
CREATE TABLE `A` (
`id` int(12) NOT NULL AUTO_INCREMENT,
`status` varchar(50) DEFAULT NULL,
`another_field` varchar(50) DEFAULT NULL
)
INSERT INTO `A` VALUES ('1', null, 'a');
INSERT INTO `A` VALUES ('2', null, 'b');
INSERT INTO `A` VALUES ('3', null, 'c');
CREATE TABLE `B` (
`id` int(12) NOT NULL AUTO_INCREMENT,
`status` varchar(50) DEFAULT NULL,
`tableA_id` int(12) DEFAULT NULL,
PRIMARY KEY (`id`)
)
INSERT INTO `B` VALUES ('1', 'aa', '1');
INSERT INTO `B` VALUES ('2', 'aa', '1');
INSERT INTO `B` VALUES ('3', 'aa', '2');
INSERT INTO `B` VALUES ('4', 'aa', '3');
INSERT INTO `B` VALUES ('5', 'bb', '3');
I want to know if it is possible to update A.status if all B.status are the same when A.id = B.tableA_id using a single query?
This is what I want my table A to look like:
('1', 'aa', 'a') - Status is updated to 'aa' as B.id 1 & 2 have the same status and same B.tableA_id value.
('2', 'aa', 'b') - Status is updated to 'aa' as B.id 3 has the same status.
('3', null, 'c') - This is not updated because B.id 4 & 5 have different status and the same table2.table1_id value.
Thanks
UPDATE A
SET status = COALESCE((
SELECT MAX(B.status)
FROM B
WHERE B.tableA_id = A.id
HAVING MAX(B.status) = MIN(B.status)
), A.status)
(Note: I added a correction, you need the COALESCE(..., A.status) or otherwise the status will be set to NULL in case there were multiple statuses in B
Not sure about MySql but in MSSQL you could write something like:
UPDATE A SET A.Status = 'aa'
FROM A INNER JOIN B on A.id = B.tableA_id
WHERE b.status = 'aa'
It should be similar in MySQL, but I'm not if the language supports joins on update. But still I hope it helps.
UPDATE a SET status =
(
SELECT status FROM b WHERE tableA_id = a.id LIMIT 0,1
)
WHERE id IN
(
SELECT tableA_id FROM b
GROUP BY tableA_id
HAVING COUNT(DISTINCT status) = 1
)
Update: Roland was right; I have updated the query and it now yields the correct results.
CREATE TABLE `A` (
`id` int(12) NOT NULL AUTO_INCREMENT,
`status` varchar(50) DEFAULT NULL,
`another_field` varchar(50) DEFAULT NULL
)
INSERT INTO `A` VALUES ('1', null, 'a');
INSERT INTO `A` VALUES ('2', null, 'b');
INSERT INTO `A` VALUES ('3', null, 'c');
CREATE TABLE `B` (
`id` int(12) NOT NULL AUTO_INCREMENT,
`status` varchar(50) DEFAULT NULL,
`tableA_id` int(12) DEFAULT NULL,
PRIMARY KEY (`id`)
)
INSERT INTO `B` VALUES ('1', 'aa', '1');
INSERT INTO `B` VALUES ('2', 'aa', '1');
INSERT INTO `B` VALUES ('3', 'aa', '2');
INSERT INTO `B` VALUES ('4', 'aa', '3');
INSERT INTO `B` VALUES ('5', 'bb', '3');