SQL Server 2008 R2: Pivot table query performance - sql

Table: Product
create table Product
(
productID int,
productName varchar(20),
productsalesdate DATETIME,
producttype varchar(20)
);
Insertion:
insert into product values(1,'PenDrive','2010-01-01','Electronic');
insert into product values(1,'Computer','2016-01-01','Electronic');
insert into product values(1,'Laptop','2011-02-02','Electronic');
insert into product values(2,'textbook','2014-02-02','books');
insert into product values(2,'notebook','2016-01-01','books');
insert into product values(3,'Car','2016-01-01','Vehicle');
insert into product values(3,'Bike','2016-01-07','Vehicle');
First Try: In this I am getting wrong sum of productType
SELECT productID, FirstSale,LastSale, [Electronic],[books],[Vehicle]
FROM
(
SELECT
productID,
MIN(ProductSalesdate) as FirstSale,
MAX(ProductSalesdate) as LastSale,
productType
FROM
Product
Group by productID,productType
) a
PIVOT
(
COUNT(productType)
FOR productType IN ( [Electronic],[books],[Vehicle] )
) AS pvt;
Second Try: In this try I have solved the sum problem but the query is taking more time for execute for huge records.
SELECT productID,FirstSale,LastSale ,[Electronic],[books],[Vehicle]
FROM
(
SELECT a.ProductID, a.FirstSale, a.LastSale, b.ProductType
FROM Product b
inner join
(
SELECT
productID,
MIN(ProductSalesdate) as FirstSale,
MAX(ProductSalesdate) as LastSale
FROM
Product
Group by productID
) as a
ON a.ProductID = b.ProductID
) ab
PIVOT
(
COUNT(productType)
FOR productType IN ( [Electronic],[books],[Vehicle] )
) AS pvt;
Note: The second query is works fine but the problem is with the performance, because of
I am joining two same table because to get count of productType in the pivot query.
Question: How to optimize the second query which is a my second try?

The following uses a temporary table to store the derived table ab. My guess is it will improve the execution plan of the second query.
SELECT a.ProductID, a.FirstSale, a.LastSale, b.ProductType
INTO #ab
FROM Product b
inner join
(
SELECT
productID,
MIN(ProductSalesdate) as FirstSale,
MAX(ProductSalesdate) as LastSale
FROM
Product
Group by productID
) as a
ON a.ProductID = b.ProductID;
SELECT productID,FirstSale,LastSale ,[Electronic],[books],[Vehicle]
FROM #ab AS ab
PIVOT
(
COUNT(productType)
FOR productType IN ( [Electronic],[books],[Vehicle] )
) AS pvt;
DROP TABLE #ab;
EDIT: Just for sports I wrote following script which has 15k rows in #product. The whole script executes in ~1 second. I still don't understand how your query takes 5.5 minutes. Here goes:
SET NOCOUNT ON;
CREATE TABLE #product (
product_id INT,
product_name VARCHAR(20),
product_sales_date DATE,
product_type VARCHAR(20)
);
DECLARE #cnt INT=0;
WHILE #cnt<15000
BEGIN
INSERT INTO #product(
product_id,
product_name,
product_sales_date,
product_type
)
SELECT
product_id=ROUND(20*RAND(),0),
product_name=LEFT(NEWID(),20),
product_sales_date=DATEADD(DAY,ROUND((-10+20*RAND()), 0),GETDATE()),
product_type=
CASE ROUND(2*RAND(),0)
WHEN 0 THEN 'Electronic'
WHEN 1 THEN 'books'
ELSE 'Vehicle'
END;
SET #cnt=#cnt+1;
END
SELECT a.product_id, a.first_sale, a.last_sale, b.product_type
INTO #ab
FROM #product b
inner join
(
SELECT
product_id,
MIN(product_sales_date) as first_sale,
MAX(product_sales_date) as last_sale
FROM
#product
GROUP BY
product_id
) as a
ON a.product_id= b.product_id;
SELECT product_id,first_sale,last_sale,[Electronic],[books],[Vehicle]
FROM #ab AS ab
PIVOT
(
COUNT(product_type)
FOR product_type IN ( [Electronic],[books],[Vehicle] )
) AS pvt;
DROP TABLE #ab;
DROP TABLE #product;

Seems like you're trying to do something like this.. Not sure why you'd need extra joins or temp tables..
SELECT * FROM
(
SELECT productID,
productType,
MIN(ProductSalesdate) as FirstSale,
MAX(ProductSalesdate) as LastSale,
COUNT(productType) AS ProductCount
FROM Product
GROUP BY productID,productType
) t
PIVOT
(
SUM(ProductCount)
FOR productType IN ([Electronic],[books],[Vehicle])
) p
you'll get NULLS for the 0 counts but you can coalesce those values to 0 pretty easily

Related

Iterate through SQL select query to get more related data

I would like to generate an excel report based on a SQL stored procedure.
Therefore i have to 'fill' few columns on this report. Some are related to (let's say) the Order table, others are related to the Product table and so on.
On the excel report i need to see the columns: Order no, Order value, Product 1 name, Product 2 name, Product 1 value, Product 2 value (there are max 2 products on each order).
The Product table is linked to the Order table.
I know this kind of listing it's a bit confusing, but that is what i wanna get.
Atm i have written the sequence:
SELECT
order.OrderNo,
order.OrderValue,
product.Name,
product.Value
From ORDER AS order
lEFT outer join PRODUCT as product on order.OrderId = product.OrderId
The query works but i only get data for the first product in each order. Is there a way to select data from all products specific to each order?
You can use row_number() and conditional aggregation:
SELECT op.OrderNo, op.OrderValue,
MAX(CASE WHEN seqnum = 1 THEN Name END) as Name_1,
MAX(CASE WHEN seqnum = 1 THEN Value END) as Value_1,
MAX(CASE WHEN seqnum = 2 THEN Name END) as Name_2,
MAX(CASE WHEN seqnum = 2 THEN Value END) as Value_2
FROM (SELECT o.OrderNo, o.OrderValue, p.Name, p.Value,
ROW_NUMBER() OVER (PARTITION BY o.OrderNo ORDER BY p.Value DESC) as seqnum
FROM ORDER o LEFT JOIN
PRODUCT p
ON o.OrderId = p.OrderId
) op
GROUP BY op.OrderNo, op.OrderValue;
If your DBMS supports it, you can use a PIVOT for this, for example:
declare #order table(
OrderId int,
OrderNo varchar(10),
OrderValue decimal(10,2)
)
declare #product table(
OrderId int,
[Name] varchar(20),
[Value] decimal(10,2)
)
insert into #order values
(1,'ORD001',436.45),
(2,'ORD002',964.33),
(3,'ORD003',1265.98)
insert into #product values
(1,'Widget',195.45),
(1,'Doohickey',241.00),
(2,'Widget',195.45),
(2,'Thingy',397.99),
(2,'Doofer',370.89),
(3,'Widget',195.45),
(3,'Thingy',397.99),
(3,'Foobar',415.78),
(3,'Whatchamacallit',256.76)
select
OrderNo,
OrderValue,
[Widget],
[Doohickey],
[Thingy],
[Doofer],
[Foobar],
[Whatchamacallit]
from
(
select
o.OrderNo,
o.OrderValue,
p.[Name],
p.[Value]
from #order o
left join #product p on o.OrderId = p.OrderId
) SourceTable
pivot
(
sum([Value])
for [Name] in ([Widget], [Doohickey], [Thingy], [Doofer], [Foobar], [Whatchamacallit])
) as PivotTable
gives you this result:
However, this does require you to know all the distinct values of Product.Name which could appear and to include them in your query. The upside is you can perform further aggregation on the resultant values, per Product.

How to run distinct and Sum in one query in sql server 2008 R2

I have a table #1 as shown in image attached. First i want to sum all quantity of all distinct id. Then want to show number of id that have same quantity.
Use SUM and COUNT:
SELECT
COUNT(*) AS totalId,
qty
FROM (
SELECT
id, SUM(qty) AS qty
FROM tbl
GROUP BY id
)t
GROUP BY qty
ONLINE DEMO
Try this one after creating a temporary table
create table #Temp
(
id int,
qty int
)
Insert Into #Temp
SELECT id, SUM(qty)
FROM yourTable
group by id
SELECT * FROM #Temp
SELECT Count(id) , qty
FROM #Temp
GROUP BY qty
ORDER BY qty DESC
to show the sum of all quantities of all distinct id:
SELECT id,SUM(qty) FROM table GROUP BY id;
to show number of id that have same quantity
SELECT count(id),quantity FROM (SELECT id,SUM(qty) AS quantity FROM table GROUP BY id) GROUP BY quantity

VLookup in SQL? - Joining to only pick out the top row

I am trying to get just the first row from a JOIN in SQL. Something similiar to Vlookup in Excel.
I have the following tables
CREATE TABLE customer_lookup (
customer_product varchar(50),
supplier_product varchar(50),
customer_code varchar(10)
)
CREATE TABLE supplier (
part_number varchar(50)
)
INSERT INTO customer_lookup (
customer_product,
supplier_product,
customer_code ) VALUES ('CONTAINER', 'BOX', 'CUST01')
INSERT INTO customer_lookup (
customer_product,
supplier_product,
customer_code ) VALUES ('CONTAINER', 'BOX', 'CUST02')
INSERT INTO customer_lookup (
customer_product,
supplier_product,
customer_code ) VALUES ('FABRIC', 'MATERIAL', 'CUST01')
INSERT INTO supplier ( part_number ) VALUES ('FABRIC')
INSERT INTO supplier ( part_number ) VALUES ('CONTAINER')
INSERT INTO supplier ( part_number ) VALUES ('PAINT')
and my query is
SELECT
s.part_number, c.supplier_product, c.customer_code
FROM
supplier s
LEFT JOIN
(
SELECT * FROM customer_lookup t
) c
ON s.part_number = c.customer_product
http://sqlfiddle.com/#!6/716b5/1
The result I am trying to get is
part_number supplier_product customer_code
FABRIC MATERIAL CUST01
CONTAINER BOX CUST01
PAINT (null) (null)
but the above SQL query produces
part_number supplier_product customer_code
FABRIC MATERIAL CUST01
CONTAINER BOX CUST01
CONTAINER BOX CUST02
PAINT (null) (null)
I don't care that the row with CONTAINER is missing customer_code CUST02. I just need to top one
I have tried
SELECT
s.part_number, c.supplier_product, c.customer_code
FROM
supplier s
LEFT JOIN
(
SELECT TOP 1 * FROM customer_lookup t
) c
ON s.part_number = c.customer_product
but this just nulls out both FABRIC and PAINT rows
Any help would be appreciated
You can use GROUP BY and MAX to achieve what you're looking for
SELECT
s.part_number, c.supplier_product, MAX(c.customer_code)
FROM
supplier s
LEFT JOIN
(
SELECT * FROM customer_lookup t
) c
ON s.part_number = c.customer_product
GROUP BY s.part_number, c.supplier_product
For every part_number and supplier_product unique identifying combination, you want the highest customer_code value.
If you don't care which row qualifies as the top row, as long as it returns one row at most, then you can use the row_number window function with order by null.
SELECT s.part_number, c.supplier_product, c.customer_code
FROM supplier s
LEFT JOIN (SELECT *,
row_number() over (partition by customer_product order by null) as rn
FROM customer_lookup) c
ON s.part_number = c.customer_product
AND c.rn = 1
If you do care which row gets picked, then just modify the order by clause accordingly.
You can simply use CROSS APPLY to get your results, the main benefit here is that you are not using aggregation (GROUP BY)
SELECT
s.part_number, c.supplier_product, c.customer_code
FROM
supplier s
CROSS APPLY
(
SELECT TOP 1 * FROM customer_lookup t
WHERE s.part_number = t.customer_product
ORDER BY t.customer_code
) c
You should also add an ORDER BY to ensure the results are order the way you want them to be (I have added this in for you).
You should also define columns that you are using rather than using an asterisk (*) but that's up to you (I've left this as is for now)
http://sqlfiddle.com/#!6/716b5/17
If you're wanting the results you showed and in the order you showed them
SELECT
s.part_number, c.supplier_product, MIN(c.customer_code)
FROM
supplier s
LEFT JOIN
(
SELECT * FROM customer_lookup t
) c
ON s.part_number = c.customer_product
GROUP BY s.part_number, c.supplier_product
ORDER BY c.supplier_product DESC

SQL Group BY SUM one column and select of first row of grouped items

I have a part table where I have 5 fields. I want to sum the QTY of the mfgpn while showing the first returned row for the other 3 fields (Manfucturer, DateCode, Description). I initially thought of using the MIN function as follows, but that doesn't really help me insofar as that the data is not a int data type. How would I go about doing this? Right now I'm stuck at the following query below:
SELECT SUM([QTY]) AS QTY
,[MFGPN]
,MIN([MANUFACTURER]) AS MANUFACTURER
,MIN([DATECODE]) AS DateCode
,MIN([DESCRIPTION]) AS DESCRIPTION
INTO part
GROUP BY MFGPN, MANUFACTURER, DATECODE, description
ORDER BY mfgpn ASC
Would CROSS APPLY work for you?
SELECT
SUM(a.[QTY]) AS QTY
,a.[MFGPN]
,c.[MANUFACTURER]
,c.[DATECODE]
,c.[DESCRIPTION]
FROM part a
CROSS APPLY (SELECT TOP 1 * FROM part b WHERE a.[MFGPN] = b.[MFGPN]) c
GROUP BY
a.[MFGPN]
,c.[MANUFACTURER]
,c.[DATECODE]
,c.[DESCRIPTION]
Tested with the following:
DECLARE #T1 AS TABLE (
[QTY] int
,[MFGPN] NVARCHAR(50)
,[MANUFACTURER] NVARCHAR(50)
,[DATECODE] DATE
,[DESCRIPTION] NVARCHAR(50));
INSERT #T1 VALUES
(2, 'MFGPN-1', 'MANUFACTURER-A', '20120101', 'A-1'),
(4, 'MFGPN-1', 'MANUFACTURER-B', '20120102', 'B-1'),
(3, 'MFGPN-1', 'MANUFACTURER-C', '20120103', 'C-1'),
(1, 'MFGPN-2', 'MANUFACTURER-A', '20120101', 'A-2'),
(5, 'MFGPN-2', 'MANUFACTURER-B', '20120101', 'B-2')
SELECT
SUM(a.[QTY]) AS QTY
,a.[MFGPN]
,c.[MANUFACTURER]
,c.[DATECODE]
,c.[DESCRIPTION]
FROM #T1 a
CROSS APPLY (SELECT TOP 1 * FROM #T1 b WHERE a.[MFGPN] = b.[MFGPN]) c
GROUP BY
a.[MFGPN]
,c.[MANUFACTURER]
,c.[DATECODE]
,c.[DESCRIPTION]
Produces
QTY MFGPN MANUFACTURER DATECODE DESCRIPTION
9 MFGPN-1 MANUFACTURER-A 2012-01-01 A-1
6 MFGPN-2 MANUFACTURER-A 2012-01-01 A-2
This can be easily managed with a windowed SUM():
WITH summed_and_ranked AS (
SELECT
MFGPN,
MANUFACTURER,
DATECODE,
DESCRIPTION,
QTY = SUM(QTY) OVER (PARTITION BY MFGPN),
RNK = ROW_NUMBER() OVER (
PARTITION BY MFGPN
ORDER BY DATECODE -- or which column should define the order?
)
FROM atable
)
SELECT
MFGPN,
MANUFACTURER,
DATECODE,
DESCRIPTION,
QTY,
INTO parts
FROM summed_and_ranked
WHERE RNK = 1
;
For every row, the total group quantity and the ranking within the group is calculated. When actually getting rows for inserting into the new table (the main SELECT), only rows with RNK values of 1 are pulled. Thus you get a result set containing group totals as well as details of certain rows.

select latest 2 records of each product id in single table

this is my table structure,
create table ArticleTbl
(
ArticleID bigint identity(1,1),
ProductID int ,
ArticleName varchar(100),
PubDate datetime,
AuthorName varchar(50),
AuthorImage bit,
HtmlValues nvarchar(max)
)
here productid are
1=creditcard,2=prepaidcard,3 saving account,.........
each productid is having multiple rows of records ,
i want to select latest 2 records of each productid in one shot instead of going to database each time .
my procedure now is like..
create proc USP_GetArticle_ByProduct(#ProductID int) as
select top(2) * from ArticleTbl where ProductID=#ProductID
if i use this procedure each productid i have to go to database...
how to get one shot all product(latest 2 records ) using query????
SELECT
*
FROM
(
SELECT
/*Random order per product*/
ROW_NUMBER() OVER (PARTITION BY ProductID ORDER BY NEWID() ) AS Ranking,
*
FROM
ArticleTbl
) foo
WHERE
foo.Ranking <= 2
i figure this is on sql server yeah?
if so, you could do this...
select a1.*
from Articletbl a1
where a1.articleid in
(select top 2 a2.articleid
from ArticleTbl a2
where a2.productid = a1.productid
order by a2.articleid DESC)
order by a1.ProductID