Inner join in postgreSQL getting duplicate rows - sql

I have 2 SQL query.
query 1
select file_number_fk,sent_date as submitted_date from fl_file_movement
where sent_by_post_fk='735'
and file_number_fk='98223'
query 2
select file_number_fk,received_date as received_date from fl_file_movement
where recipient_post_fk='735'
and file_number_fk='98223'
each query return a table with 7 rows
when i try to join them i getting 49 rows
select distinct a.file_number_fk,
a.received_date,
b.submitted_date from(
select file_number_fk,received_date as received_date from fl_file_movement
where recipient_post_fk='735'
and file_number_fk='98223')a LEFT JOIN (
select file_number_fk,sent_date as submitted_date from fl_file_movement
where sent_by_post_fk='735'
and file_number_fk='98223')b ON a.file_number_fk=b.file_number_fk
i want a joined table with 7 rows. how to do this

I think your JOIN condition is not specific enough, as both of your query have equal on two fields and your JOIN has equal on only one of them, thus multiplying the result:
select distinct a.file_number_fk, a.received_date, b.submitted_date
from (select file_number_fk, received_date as received_date
from fl_file_movement
where recipient_post_fk='735' and file_number_fk='98223') a
LEFT JOIN (
select file_number_fk,sent_date as submitted_date
from fl_file_movement
where sent_by_post_fk='735'
and file_number_fk='98223') b
ON a.file_number_fk=b.file_number_fk AND a.recipient_post_fk = b.file_number_fk
The above query is basically what you have provided + extra JOIN condition + improved readability. I think that DISTINCT can be removed in this case.
Also, you can also solve this with SELF JOIN. Something like this:
SELECT src.file_number_fk, src.received_date, dest.submitted_date
FROM fl_file_movement src
JOIN fl_file_movement dest ON dest.recipient_post_fk = src.sent_by_post_fk and src.file_number_fk = dest.file_number_fk
WHERE dest.recipient_post_fk = '735' AND src.file_number_fk = '98223'

Related

Postgres, how to limit number of rows returned from joined tables

I have the following query that return the data I want, however for the joined tables, I want to limit the number of rows returned and preferrably be able to specify for each joined table.
I tried using limit with the select itself, but doesn't seem to be supported.
Is this possible? I am using Postgres 11.
select array_to_json(array_agg(t)) from (
select
tbl_327.field_43,tbl_327.field_1,tbl_327.field_2,
jsonb_agg(distinct jsonb_build_object('id',tbl_332.id,'data',tbl_332.fullname)) as field_7,
jsonb_agg(distinct jsonb_build_object('id',tbl_312.id,'data',tbl_312.fullname)) as field_33
from schema_1.tbl_327 tbl_327
left join schema_1.tbl_327_to_tbl_332_field_7 field_7 on field_7.tbl_327_id=tbl_327.id
left join schema_1.tbl_332_customid tbl_332 on tbl_332.id = field_7.tbl_332_id
left join schema_1.tbl_327_to_tbl_312_field_33 field_33 on field_33.tbl_327_id=tbl_327.id
left join schema_1.tbl_312_customid tbl_312 on tbl_312.id = field_33.tbl_312_id
group by tbl_327.field_43,tbl_327.field_1,tbl_327.field_2
) t
UPDATED
here is my new query. I simplified it, but the issue is it's no longer returning correct data. For the field_4 field, it's returing rows/data that isn't associated with the record. Do I have something wrong?
select array_to_json(array_agg(t)) from (
select
tbl_342.field_1,tbl_342.field_2,tbl_342.id,
jsonb_agg(distinct jsonb_build_object('id',tbl_312.id,'data',tbl_312.fullname)) as field_4
from schema_1.tbl_342 tbl_342
left join lateral (
select distinct field_4.*
from schema_1.tbl_342_to_tbl_312_field_4 field_4
where field_4.tbl_342_id=tbl_342.id
limit 50) field_4 on true
left join lateral (
select distinct tbl_312.*
from schema_1.tbl_312_customid tbl_312
where tbl_312_id = field_4.tbl_312_id
limit 5
) tbl_312 on true
group by tbl_342.field_1,tbl_342.field_2,tbl_342.id
) t
One approach is to turn each left join to a lateral join; you can then set the limit within each subquery:
select array_to_json(array_agg(t)) from (
select
tbl_327.field_43,tbl_327.field_1,tbl_327.field_2,
jsonb_agg(distinct jsonb_build_object('id',tbl_332.id,'data',tbl_332.fullname)) as field_7,
jsonb_agg(distinct jsonb_build_object('id',tbl_312.id,'data',tbl_312.fullname)) as field_33
from schema_1.tbl_327 tbl_327
left join lateral (
select field_7.*
from schema_1.tbl_327_to_tbl_332_field_7 field_7
where field_7.tbl_327_id=tbl_327.id
order by ...
limit 5
) field_7 on true
left join lateral (
select tbl_332.*
from schema_1.tbl_332_customid tbl_332
where tbl_332.id = field_7.tbl_332_id
order by ??
limit 5
) tbl_332 on true
left join lateral ...
group by tbl_327.field_43,tbl_327.field_1,tbl_327.field_2
) t
Note that you need an order by to go along with limit in order to get stable results - you can replace the question marks in the query with the revelant columns or set of columns.

How to join 100 random rows from table 1 multiple other tables in oracle

I have scrapped my previous question as I did not do a good job explaining. Maybe this will be simpler.
I have the following query.
Select * from comp_eval_hdr, comp_eval_pi_xref, core_pi, comp_eval_dtl
where comp_eval_hdr.START_DATE between TO_DATE('01-JAN-16' , 'DD-MON-YY')
and TO_DATE('12-DEC-17' , 'DD-MON-YY')
and comp_eval_hdr.COMP_EVAL_ID = comp_eval_dtl.COMP_EVAL_ID
and comp_eval_hdr.COMP_EVAL_ID = comp_eval_pi_xref.COMP_EVAL_ID
and core_pi.PI_ID = comp_eval_pi_xref.PI_ID
and core_pi.PROGRAM_CODE = 'PS'
Now if I only want a random 100 rows from the comp_eval_hdr table to join with the other tables how would I go about it? If it makes it easier you can disregard the comp_eval_dtl table.
I think you are pretty much there. You just need subqueries, table aliases, and JOIN conditions:
SELECT . . .
FROM (SELECT a.*
FROM (SELECT a.*
FROM a
WHERE a.START_DATE BEWTWEEN DATE '2016-01-01' AND DATE '2017-12-12'
ORDER BY DBMS_RANDOM.VALUE
) a
WHERE ROWNUM <= 100
) a JOIN
mapping m
ON a.? = m.? JOIN
b
ON m.? = b.?;
The ? is just a placeholder for the join columns.
It's a bit of a stretch to know what you want with the question as written but here's my attempt.
WITH rand_list AS
(SELECT * FROM comp_eval_hdr
WHERE comp_eval_hdr.START_DATE BEWTWEEN TO_DATE('01-JAN-16' , 'DD-MON-YY') AND TO_DATE('12-DEC-17' , 'DD-MON-YY')
ORDER BY DBMS_RANDOM.VALUE)
first_100 AS
(SELECT *
FROM rand_list
WHERE ROWNUM <=100)
SELECT md.col_1, t3.col_a
FROM first_100 md
INNER JOIN
table2 t2 ON md.id_column = t2.fk_comp_eval_hdr_id
INNER JOIN
table3 t3 ON t3.id_column = t2.fk_table3_id
You haven't given any indication how they join or the table names and obviously I haven't run this against any mock tables.
You've got a list of randomised records with RAND_LIST which you could, if you wanted, combine with the FIRST_100 query (your choice).
The main query then just joins that through your mapping table (T2) to your 'multiples' table (T3).
how does table 2 look like?...Let me put one example as person table and order table?
select * from (
select * from person ps , order order where ps.city = 'mumbai' and ps.id = order.purchasedby ) porder where porder.rownum <= 100
I did not tested it but it will look something like this.

Display Y/N column if record found in detail table

I'm trying to create a query so that I can have a column show Y/N if a particular item was ordered for a group of orders. The item I'm looking for would be OLI.id = '538'.
So my results would be:
Order#, Customer#, FreightPaid
12345, 00112233, Y
12346, 00112233, N
I cannot figure out if I need to use a subquery or the where exists function ?
Here's my current query:
SELECT distinct
OrderID,
Accountuid as Customerno
FROM [SMILEWEB_live].[dbo].[OrderLog] OL
inner join Orderlog_item OLI on OLI.orderlogkey = OL.[key]
inner join Account A on A.uid = OL.Accountuid
where A.GroupId = 'X9955'
and OL.CreateDate >= GETDATE() - 60
I would suggest an exists clause instead of a join:
select ol.OrderID, ol.Accountuid as Customerno,
(case when exists (select 1
from Orderlog_item OLI join
Account A
on A.uid = OL.Accountuid
where OLI.orderlogkey = OL.[key] and A.GroupId = 'X9955'
)
then 1 else 0
end) as flag
from [SMILEWEB_live].[dbo].[OrderLog] OL
where OL.CreateDate >= GETDATE() - 60;
This prevents a couple of problems. First, duplicate rows which are caused when there are multiple matching rows (and select distinct add unnecessary overhead). Second, missing rows, which happen when you use inner join instead of an outer join.

How to use multiple count and where condition sql server 2008?

I have this two query
1.
select CL_Clients.cl_id,CL_Clients].cl_name,COUNT(*) AS number_of_orders
from CL_Clients,CLOI_ClientOrderItems
where CL_Clients.cl_id=CLOI_ClientOrderItems.cl_id
group by CL_Clients.cl_name,CL_Clients.cl_id
2.
select CL_Clients.cl_id,count(cloi_current_status) as dis
from CLOI_ClientOrderItems,CL_Clients
where cloi_current_status]='12'
and CL_Clients.cl_id=CLOI_ClientOrderItems.cl_id
group by CL_Clients.cl_name,CL_Clients.cl_id,CLOI_ClientOrderItems.cloi_current_status
i have this column i need to put count function and where condition
[cloi_current_status]
166
30
30
30
150
150
150
150
150
150
150
Quite simple, you just encapsulate the queries and give their result sets an alias and then do a JOIN between their aliases on the column that is common. (In the query below I assume you'll be joining by client id)
SELECT *
FROM (
SELECT CL_Clients.cl_id,
CL_Clients].cl_name,
COUNT(*) AS number_of_orders
FROM CL_Clients,
CLOI_ClientOrderItems
WHERE CL_Clients.cl_id = CLOI_ClientOrderItems.cl_id
GROUP BY CL_Clients.cl_name,
CL_Clients.cl_id
) A
INNER JOIN (
SELECT CL_Clients.cl_id,
count(cloi_current_status) AS dis
FROM CLOI_ClientOrderItems,
CL_Clients
WHERE cloi_current_status] = '12'
AND CL_Clients.cl_id = CLOI_ClientOrderItems.cl_id
GROUP BY CL_Clients.cl_name,
CL_Clients.cl_id,
CLOI_ClientOrderItems.cloi_current_status
) B
ON A.cl_id = B.cl_id
WHERE ...
GROUP BY ...
This will be treated as a separate result set, so you can also filter results with a WHERE or just a GROUP BY, just like in a normal SELECT.
UPDATE:
To answer the question in your comments, when you join two tables that have a column with the same value and use
SELECT * FROM A INNER JOIN B the * will show all columns returned by the join, meaning all columns from A and all columns from B, this is why you have duplicate columns.
If you want to filter the columns returned you can specifiy which columns you want returned. So, in your case, the top SELECT * can be replaced with
SELECT A.cl_id, A.cl_name, A.number_of_orders, B.dis so, your query becomes:
SELECT A.cl_id, A.cl_name, A.number_of_orders, B.dis
FROM (
SELECT CL_Clients.cl_id,
CL_Clients].cl_name,
COUNT(*) AS number_of_orders
FROM CL_Clients,
CLOI_ClientOrderItems
WHERE CL_Clients.cl_id = CLOI_ClientOrderItems.cl_id
GROUP BY CL_Clients.cl_name,
CL_Clients.cl_id
) A
INNER JOIN (
SELECT CL_Clients.cl_id,
count(cloi_current_status) AS dis
FROM CLOI_ClientOrderItems,
CL_Clients
WHERE cloi_current_status] = '12'
AND CL_Clients.cl_id = CLOI_ClientOrderItems.cl_id
GROUP BY CL_Clients.cl_name,
CL_Clients.cl_id,
CLOI_ClientOrderItems.cloi_current_status
) B
ON A.cl_id = B.cl_id
UPDATE #2:
For your last question, you need to GROUP BY at the end of the big query and use a HAVING condtion, like this:
GROUP BY A.cl_id, A.cl_name, A.number_of_orders, B.dis
HAVING COUNT(cloi_current_status) > 100
All depends on what data you are trying to get, but you can go about it like this.
SELECT Column_x, Column_y, etc..
FROM ClL_Clients a
JOIN (select CL_Clients.cl_id,CL_Clients].cl_name,COUNT(*) AS number_of_orders
from CL_Clients,CLOI_ClientOrderItems
where CL_Clients.cl_id=CLOI_ClientOrderItems.cl_id
group by CL_Clients.cl_name,CL_Clients.cl_id) b
on a.cl_id = b.cl_id
JOIN (select CL_Clients.cl_id,count(cloi_current_status) as dis
from CLOI_ClientOrderItems,CL_Clients
where cloi_current_status]='12'
and CL_Clients.cl_id=CLOI_ClientOrderItems.cl_id
group by CL_Clients.cl_name,CL_Clients.cl_id,CLOI_ClientOrderItems.cloi_current_status) c
on a.cl_id = c.cl_id
Group by BLAH BLAH
Hope this gets you in the right direction.

Limit join to one row

I have the following query:
SELECT sum((select count(*) as itemCount) * "SalesOrderItems"."price") as amount, 'rma' as
"creditType", "Clients"."company" as "client", "Clients".id as "ClientId", "Rmas".*
FROM "Rmas" JOIN "EsnsRmas" on("EsnsRmas"."RmaId" = "Rmas"."id")
JOIN "Esns" on ("Esns".id = "EsnsRmas"."EsnId")
JOIN "EsnsSalesOrderItems" on("EsnsSalesOrderItems"."EsnId" = "Esns"."id" )
JOIN "SalesOrderItems" on("SalesOrderItems"."id" = "EsnsSalesOrderItems"."SalesOrderItemId")
JOIN "Clients" on("Clients"."id" = "Rmas"."ClientId" )
WHERE "Rmas"."credited"=false AND "Rmas"."verifyStatus" IS NOT null
GROUP BY "Clients".id, "Rmas".id;
The problem is that the table "EsnsSalesOrderItems" can have the same EsnId in different entries. I want to restrict the query to only pull the last entry in "EsnsSalesOrderItems" that has the same "EsnId".
By "last" entry I mean the following:
The one that appears last in the table "EsnsSalesOrderItems". So for example if "EsnsSalesOrderItems" has two entries with "EsnId" = 6 and "createdAt" = '2012-06-19' and '2012-07-19' respectively it should only give me the entry from '2012-07-19'.
SELECT (count(*) * sum(s."price")) AS amount
, 'rma' AS "creditType"
, c."company" AS "client"
, c.id AS "ClientId"
, r.*
FROM "Rmas" r
JOIN "EsnsRmas" er ON er."RmaId" = r."id"
JOIN "Esns" e ON e.id = er."EsnId"
JOIN (
SELECT DISTINCT ON ("EsnId") *
FROM "EsnsSalesOrderItems"
ORDER BY "EsnId", "createdAt" DESC
) es ON es."EsnId" = e."id"
JOIN "SalesOrderItems" s ON s."id" = es."SalesOrderItemId"
JOIN "Clients" c ON c."id" = r."ClientId"
WHERE r."credited" = FALSE
AND r."verifyStatus" IS NOT NULL
GROUP BY c.id, r.id;
Your query in the question has an illegal aggregate over another aggregate:
sum((select count(*) as itemCount) * "SalesOrderItems"."price") as amount
Simplified and converted to legal syntax:
(count(*) * sum(s."price")) AS amount
But do you really want to multiply with the count per group?
I retrieve the the single row per group in "EsnsSalesOrderItems" with DISTINCT ON. Detailed explanation:
Select first row in each GROUP BY group?
I also added table aliases and formatting to make the query easier to parse for human eyes. If you could avoid camel case you could get rid of all the double quotes clouding the view.
Something like:
join (
select "EsnId",
row_number() over (partition by "EsnId" order by "createdAt" desc) as rn
from "EsnsSalesOrderItems"
) t ON t."EsnId" = "Esns"."id" and rn = 1
this will select the latest "EsnId" from "EsnsSalesOrderItems" based on the column creation_date. As you didn't post the structure of your tables, I had to "invent" a column name. You can use any column that allows you to define an order on the rows that suits you.
But remember the concept of the "last row" is only valid if you specifiy an order or the rows. A table as such is not ordered, nor is the result of a query unless you specify an order by
Necromancing because the answers are outdated.
Take advantage of the LATERAL keyword introduced in PG 9.3
left | right | inner JOIN LATERAL
I'll explain with an example:
Assuming you have a table "Contacts".
Now contacts have organisational units.
They can have one OU at a point in time, but N OUs at N points in time.
Now, if you have to query contacts and OU in a time period (not a reporting date, but a date range), you could N-fold increase the record count if you just did a left join.
So, to display the OU, you need to just join the first OU for each contact (where what shall be first is an arbitrary criterion - when taking the last value, for example, that is just another way of saying the first value when sorted by descending date order).
In SQL-server, you would use cross-apply (or rather OUTER APPLY since we need a left join), which will invoke a table-valued function on each row it has to join.
SELECT * FROM T_Contacts
--LEFT JOIN T_MAP_Contacts_Ref_OrganisationalUnit ON MAP_CTCOU_CT_UID = T_Contacts.CT_UID AND MAP_CTCOU_SoftDeleteStatus = 1
--WHERE T_MAP_Contacts_Ref_OrganisationalUnit.MAP_CTCOU_UID IS NULL -- 989
-- CROSS APPLY -- = INNER JOIN
OUTER APPLY -- = LEFT JOIN
(
SELECT TOP 1
--MAP_CTCOU_UID
MAP_CTCOU_CT_UID
,MAP_CTCOU_COU_UID
,MAP_CTCOU_DateFrom
,MAP_CTCOU_DateTo
FROM T_MAP_Contacts_Ref_OrganisationalUnit
WHERE MAP_CTCOU_SoftDeleteStatus = 1
AND MAP_CTCOU_CT_UID = T_Contacts.CT_UID
/*
AND
(
(#in_DateFrom <= T_MAP_Contacts_Ref_OrganisationalUnit.MAP_KTKOE_DateTo)
AND
(#in_DateTo >= T_MAP_Contacts_Ref_OrganisationalUnit.MAP_KTKOE_DateFrom)
)
*/
ORDER BY MAP_CTCOU_DateFrom
) AS FirstOE
In PostgreSQL, starting from version 9.3, you can do that, too - just use the LATERAL keyword to achieve the same:
SELECT * FROM T_Contacts
--LEFT JOIN T_MAP_Contacts_Ref_OrganisationalUnit ON MAP_CTCOU_CT_UID = T_Contacts.CT_UID AND MAP_CTCOU_SoftDeleteStatus = 1
--WHERE T_MAP_Contacts_Ref_OrganisationalUnit.MAP_CTCOU_UID IS NULL -- 989
LEFT JOIN LATERAL
(
SELECT
--MAP_CTCOU_UID
MAP_CTCOU_CT_UID
,MAP_CTCOU_COU_UID
,MAP_CTCOU_DateFrom
,MAP_CTCOU_DateTo
FROM T_MAP_Contacts_Ref_OrganisationalUnit
WHERE MAP_CTCOU_SoftDeleteStatus = 1
AND MAP_CTCOU_CT_UID = T_Contacts.CT_UID
/*
AND
(
(__in_DateFrom <= T_MAP_Contacts_Ref_OrganisationalUnit.MAP_KTKOE_DateTo)
AND
(__in_DateTo >= T_MAP_Contacts_Ref_OrganisationalUnit.MAP_KTKOE_DateFrom)
)
*/
ORDER BY MAP_CTCOU_DateFrom
LIMIT 1
) AS FirstOE
Try using a subquery in your ON clause. An abstract example:
SELECT
*
FROM table1
JOIN table2 ON table2.id = (
SELECT id FROM table2 WHERE table2.table1_id = table1.id LIMIT 1
)
WHERE
...