How to get combination of value from single column? - sql

I'm trying to get distinct possible combination value from single column in BigQuery.
Suppose i have this table:
+---------------------------------------------+
| date |type |payment |customer_no|status|
+---------------------------------------------+
|2019-01-02|Shirt |Cashless| 101|Cancel|
|2019-01-02|Jeans |Cashless| 133|OK |
|2019-01-02|Jeans |Cash | 102|OK |
|2019-01-02|Cap |Cash | 144|OK |
|2019-01-02|Shirt |Cash | 132|OK |
|2019-01-01|Jeans |Cash | 111|Cancel|
|2019-01-01|Cap |Cash | 141|OK |
|2019-01-01|Shirt |Cash | 101|OK |
|2019-01-01|Jeans |Cash | 105|OK |
I wanna take with rules:
Only status = 'OK'
No repetition in combination like Shirt, Jeans and Jeans, Shirt is unacceptable
Group for each payment and its combination (Cash, Cassless, Cash&Cashless)
With this code:
#standardSQL
SELECT date,
type,
COUNT(customer_no) as total_customer_per_order_type,
order_payment
FROM `blabla.order`
WHERE status = 'OK'
GROUP BY date, type , payment
ORDER BY date DESC, payment ASC
i just got total customer for single type
How to get table something like this:
http://imgur.com/7aECjpSl.png

Below is for BigQuery Standard SQL and answers just the exact question in the title of your post which is:
How to get combination of value from single column?
#standardSQL
CREATE TEMP FUNCTION test(a ARRAY<INT64>)
RETURNS ARRAY<STRING>
LANGUAGE js AS '''
var combine = function(a) {
var fn = function(n, src, got, all) {
if (n == 0) {
if (got.length > 0) {
all[all.length] = got;
} return;
}
for (var j = 0; j < src.length; j++) {
fn(n - 1, src.slice(j + 1), got.concat([src[j]]), all);
} return;
}
var all = [];
for (var i = 1; i < a.length; i++) {
fn(i, a, [], all);
}
all.push(a);
return all;
}
return combine(a)
''';
WITH types AS (
SELECT DISTINCT type, CAST(DENSE_RANK() OVER(ORDER BY type) AS STRING) type_num
FROM `project.dataset.order`
WHERE status = 'OK'
)
SELECT items, STRING_AGG(type ORDER BY type_num) types
FROM UNNEST(test(GENERATE_ARRAY(1,(SELECT COUNT(1) FROM types)))) AS items,
UNNEST(SPLIT(items)) AS pos
JOIN types ON pos = type_num
GROUP BY items
You can test, play with above using sample data from your questions as in below
#standardSQL
CREATE TEMP FUNCTION test(a ARRAY<INT64>)
RETURNS ARRAY<STRING>
LANGUAGE js AS '''
var combine = function(a) {
var fn = function(n, src, got, all) {
if (n == 0) {
if (got.length > 0) {
all[all.length] = got;
} return;
}
for (var j = 0; j < src.length; j++) {
fn(n - 1, src.slice(j + 1), got.concat([src[j]]), all);
} return;
}
var all = [];
for (var i = 1; i < a.length; i++) {
fn(i, a, [], all);
}
all.push(a);
return all;
}
return combine(a)
''';
WITH `project.dataset.order` AS (
SELECT '2019-01-02' dt, 'Shirt' type, 'Cashless' payment, 101 customer_no, 'Cancel' status UNION ALL
SELECT '2019-01-02', 'Jeans', 'Cashless', 133, 'OK' UNION ALL
SELECT '2019-01-02', 'Jeans', 'Cash', 102, 'OK' UNION ALL
SELECT '2019-01-02', 'Cap', 'Cash', 144, 'OK' UNION ALL
SELECT '2019-01-02', 'Shirt', 'Cash', 132, 'OK' UNION ALL
SELECT '2019-01-01', 'Jeans', 'Cash', 111, 'Cancel' UNION ALL
SELECT '2019-01-01', 'Cap', 'Cash', 141, 'OK' UNION ALL
SELECT '2019-01-01', 'Shirt', 'Cash', 101, 'OK' UNION ALL
SELECT '2019-01-01', 'Jeans', 'Cash', 105, 'OK'
), types AS (
SELECT DISTINCT type, CAST(DENSE_RANK() OVER(ORDER BY type) AS STRING) type_num
FROM `project.dataset.order`
WHERE status = 'OK'
)
SELECT items, STRING_AGG(type ORDER BY type_num) types
FROM UNNEST(test(GENERATE_ARRAY(1,(SELECT COUNT(1) FROM types)))) AS items,
UNNEST(SPLIT(items)) AS pos
JOIN types ON pos = type_num
GROUP BY items
with result
Row items types
1 1 Cap
2 2 Jeans
3 3 Shirt
4 1,2 Cap,Jeans
5 1,3 Cap,Shirt
6 2,3 Jeans,Shirt
7 1,2,3 Cap,Jeans,Shirt

Related

All possible unique combination of a single column value partitioned by groups

I have a table like the following in Google BigQuery. I am trying to get all possible unique combination(all subsets except the null subset) of the Item column partitioned on Group.
Group Item
1 A
1 B
1 C
2 X
2 Y
2 Z
I am looking for an output like the following:
Group Item
1 A
1 B
1 C
1 A,B
1 B,C
1 A,C
1 A,B,C
2 X
2 Y
2 Z
2 X,Y
2 Y,Z
2 X,Z
2 X,Y,Z
I have tried to use this accepted answer to incorporate Group to no avail:
How to get combination of value from single column?
Consider below approach
CREATE TEMP FUNCTION generate_combinations(a ARRAY<STRING>)
RETURNS ARRAY<STRING>
LANGUAGE js AS '''
var combine = function(a) {
var fn = function(n, src, got, all) {
if (n == 0) {
if (got.length > 0) {
all[all.length] = got;
} return;
}
for (var j = 0; j < src.length; j++) {
fn(n - 1, src.slice(j + 1), got.concat([src[j]]), all);
} return;
}
var all = []; for (var i = 1; i < a.length; i++) {
fn(i, a, [], all);
}
all.push(a);
return all;
}
return combine(a)
''';
with your_table as (
select 1 as _Group,'A' as Item union all
select 1, 'B' union all
select 1, 'C' union all
select 2, 'X' union all
select 2, 'Y' union all
select 2, 'Z'
)
select _group, item
from (
select _group, generate_combinations(array_agg(item)) items
from your_table
group by _group
), unnest(items) item
with output
Try this
with _data as
(
select 1 as _Group,'A' as Item union all
select 1 as _Group,'B' as Item union all
select 1 as _Group,'C' as Item union all
select 2 as _Group,'X' as Item union all
select 2 as _Group,'Y' as Item union all
select 2 as _Group,'Z' as Item
)
select distinct _Group ,Item from
(
select _Group,
Item
from _data
union all
select _Group,
string_agg(Item ,',') over(partition by _Group order by Item ) as item
from _data
union all
select a._Group ,
concat(a.item,',',b.item)
from _data a left join _data b on a._group = b._group and a.Item < b.Item
)
where item is not null
order by _group

How to query Oracle grouping?

I have such a problem and I don't know how to solve it, can you help me? t
The query returns a result that is shown on the photo and I want to get it to be shown in one line instead of many based on type of age.
https://imgur.com/a/OA6CBpa
with x as (
select ai.invoice_id, ai.invoice_num, ai.invoice_amount, ai.amount_paid,
trial.entity_id, trial.acctd_amount, trial.entered_amount, trial.gl_date,
aps.amount_remaining, aps.gross_amount, aps.due_date, aps.payment_status_flag,
trial.gl_date - aps.due_date dni_opoznienia
from ap_invoices_all ai,
xla.xla_transaction_entities xte,
(
select nvl (tr.applied_to_entity_id, tr.source_entity_id) entity_id,
tr.source_application_id application_id,
sum (nvl (tr.acctd_unrounded_cr, 0)) - sum (nvl (tr.acctd_unrounded_dr, 0)) acctd_amount,
sum (nvl (tr.entered_unrounded_cr, 0)) - sum (nvl (tr.entered_unrounded_dr, 0)) entered_amount,
max(tr.gl_date) gl_date
from xla.xla_trial_balances tr
where 1=1
and tr.definition_code = 'AP_200_1001'
and tr.source_application_id = 200
and tr.gl_date <= fnd_date.canonical_to_date('2019-12-13') -- Data KG
group by nvl (tr.applied_to_entity_id, tr.source_entity_id),
tr.source_application_id
) trial,
ap_payment_schedules_all aps
where 1=1
and ai.invoice_id = 3568325
and nvl(xte.source_id_int_1, -99) = ai.invoice_id
and xte.ledger_id = 1001
and xte.entity_code = 'AP_INVOICES'
and xte.entity_id = trial.entity_id
and xte.application_id = trial.application_id
and ai.invoice_id = aps.invoice_id
)
select x.invoice_id, x.invoice_num, x.entity_id, x.acctd_amount, x.gl_date,
x.amount_remaining, x.gross_amount, x.due_date, x.payment_status_flag,
x.dni_opoznienia, aapl.days_start, aapl.days_to,
case
when x.dni_opoznienia between aapl.days_start and aapl.days_to then x.acctd_amount
else 0
end przedzial
from x,
ap_aging_periods aap,
ap_aging_period_lines aapl
where 1=1
and aap.period_name = 'TEST 5 okresow'
and aap.aging_period_id = aapl.aging_period_id
Based on your comment I guess you need the below
select * from (select x.invoice_id, x.invoice_num, x.entity_id, x.acctd_amount, x.gl_date,
x.amount_remaining, x.gross_amount, x.due_date, x.payment_status_flag,
x.dni_opoznienia, aapl.days_start, aapl.days_to,
case
when x.dni_opoznienia between aapl.days_start and aapl.days_to then x.acctd_amount
else 0
end przedzial
from x,
ap_aging_periods aap,
ap_aging_period_lines aapl
where 1=1
and aap.period_name = 'TEST 5 okresow'
and aap.aging_period_id = aapl.aging_period_id)
where przedzial > 0;

Using GROUP BY/CASE with WHEN or IF

note: edited query below.
I am looking to segment a data set according to two criteria:
If a customer has more or less than 4 txns at a specific restaurant
If a customer has more or less than 24 txns at all the other restaurants in that data set.
I am using a conjunction of GROUP BY, CASE and WHEN or IF. I am not sure which approach is best, if either?
SELECT
COUNT(Customer) AS number_of_customers,
AVG (CASE WHEN ItemPrice LIKE '-%' THEN NULL
WHEN ItemPrice LIKE '0%' THEN NULL
ELSE CAST (ItemPrice AS FLOAT) END) AS avg_item_price,
COUNT(DISTINCT(ReceiptIDDesc)) AS number_of_orders,
SUM(CAST(ItemPrice AS FLOAT)) AS total_spend
FROM Tacos
WHERE NOT (PurchaseDate > '01/01/2016 12:00' OR '03/01/2016 12:00'<
PurchaseDate)
GROUP BY
CASE
WHEN (COUNT('MerchantFamily' = %TacoTruck%)> 2) AND COUNT('MerchantFamily' != %TacoTruck%) >24)
THEN 'Fanatic'
WHEN (COUNT('MerchantFamily' = %TacoTruck%)> 2) AND COUNT('MerchantFamily' != %TacoTruck%) <24)
THEN 'Loyalist'
WHEN (COUNT('MerchantFamily' = %TacoTruck%)< 2) AND COUNT('MerchantFamily' != %TacoTruck%) <24)
THEN 'Seldom'
ELSE
'Potential'
END
OR
GROUP BY
CASE
IF(COUNT(IF( 'MerchantFamily' = 'TacoTruck', 1, 0 ) ) > 2, TRUE, FALSE)
AND
IF(COUNT(IF( 'MerchantFamily' != 'TacoTruck',1, 0) ) < 24, TRUE, FALSE), 'Loyalist', NULL )
IF(COUNT(IF( 'MerchantFamily' = 'TacoTruck', 1, 0 ) ) > 2, TRUE, FALSE)
AND
IF(COUNT(IF( 'MerchantFamily' != 'TacoTruck', 1, 0 ) ) > 24, TRUE, FALSE), 'Fanatic', NULL)
IF(COUNT(IF( 'MerchantFamily' = 'TacoTruck', 1, 0 ) ) < 2, TRUE, FALSE)
AND
IF(COUNT( IF( 'MerchantFamily' != 'TacoTruck', 1, 0 ) ) < 24, TRUE, FALSE), 'Seldom', NULL)
ELSE
'Potential'
END
Neither of those approaches will work, you need to group first then consider the aggregated count values through a having clause, or as a nested subquery ("derived table").
A case expression only evaluates values on a per row basis, it does not scan multiple rows.

multiple aggregates and subquery in slick 3.1

I am trying to translate this sql into a slick 3.1 style collection query (single call). This sql (postgres) returns what I am looking for:
select
minDate.min as lastModified,
(select count("id") from "Items" where "orderId" = 1) as totalItemCount,
(select count("id") from "Items" where "orderId" = 1 and "dateModified" >= minDate.min) as addedCount
from
(select min("dateModified") as "min" from "Items" where "orderId" = 1 and "state" = 'new') as minDate
Returns: for a specified set of Items (from orderId), returns:
date of item last modified
total number of items
number of items added since the lastModified
But after many attempts, I can't figure out how to translate this to a single slick-style query
This codes
import scala.slick.driver.PostgresDriver
case class Item(id: Int, orderId: Int, state: String, dateModified: Int)
object SlickComplexQuery {
def main(args: Array[String]) = {
val driver = PostgresDriver
import driver.simple._
class ItemsTable(tag: Tag) extends Table[Item](tag, "Items"){
def id = column[Int]("id")
def orderId = column[Int]("orderId")
def state = column[String]("state")
def dateModified = column[Int]("dateModified")
def * = (id, orderId, state, dateModified) <> (Item.tupled, Item.unapply)
}
val items = TableQuery[ItemsTable]
val query1 = items
.filter(i => i.orderId === 1 && i.state === "new")
.map(_.dateModified)
.min
val query2 = items
.filter(_.orderId === 1)
.map(_.id)
.length
val query3 = items
.filter(i => i.orderId === 1 && i.dateModified >= query1)
.map(_.id)
.length
val query = Query(query1, query2, query3)
results in such query:
select x2.x3, x4.x5, x6.x7
from (select min(x8.x9) as x3
from (select x10."dateModified" as x9
from "Items" x10
where (x10."orderId" = 1) and (x10."state" = 'new')) x8) x2,
(select count(1) as x5
from (select x11."id" as x12
from "Items" x11
where x11."orderId" = 1) x13) x4,
(select count(1) as x7
from (select x14."id" as x15
from "Items" x14, (select min(x16.x17) as x18
from (select x19."dateModified" as x17
from "Items" x19
where (x19."orderId" = 1) and (x19."state" = 'new')) x16) x20
where (x14."orderId" = 1) and (x14."dateModified" >= x20.x18)) x21) x6
This query is much alike yours, slick 2.0 was used.

How to retrieve unique rows where multiple children that reference it exist for different types?

SELECT * FROM Fruit
INNER JOIN Apple ON Fruit.Id = Apple.FruitId
WHERE Apple.Type = 1 AND Apple.Type = 3
I need to get unique rows of Fruit that have both Apples that are of type 1 AND 3. Apple.Type is considered unique, but I wouldn't think it matters though.
With these rows, this should return two rows with both Fruit #50 and #52. The most important part is the Fruit.Id, I don't need to return the Types, but just need to make sure every single Fruit returned has at least one Apple.Type = 1 and one Apple.Type = 3.
Apple { Id = 1, FruitId = 50, Type = 0 }
Apple { Id = 2, FruitId = 50, Type = 1 }
Apple { Id = 3, FruitId = 50, Type = 3 }
Apple { Id = 4, FruitId = 51, Type = 1 }
Apple { Id = 5, FruitId = 51, Type = 2 }
Apple { Id = 6, FruitId = 52, Type = 3 }
Apple { Id = 7, FruitId = 52, Type = 1 }
Apple { Id = 8, FruitId = 52, Type = 2 }
Fruit { Id = 50 }
Fruit { Id = 51 }
Fruit { Id = 52 }
I'm not quite sure how to use DISTINCT and/or GROUP BY in order to form this query.
Group your apples table by fruit id and pick the results that have both desired types. Use this to get your fruits.
SELECT *
FROM Fruit
WHERE id IN
(
SELECT FruitId
FROM Apple
WHERE Type IN (1,3)
GROUP BY FruitId
HAVING COUNT(DISTINCT Type) = 2
);
This would return the fruits with ID 50 and 52.
SELECT *
FROM Fruit
WHERE EXISTS (
SELECT 1 FROM Apple
WHERE Type = 1 AND Apple.FruitId = Fruit.Id
) AND EXISTS (
SELECT 1 FROM Apple
WHERE Type = 3 AND Apple.FruitId = Fruit.Id
)
Not the most efficient way, but transposing those columns out so you have multiple types per fruitid should do it.
create table type_1 as select FruitId, Type as Type1 from Apple where Type = 1;
create table type_3 as select FruitId, Type as Type3 from Apple where Type = 3;
create table Fruits as select distinct FruitId from Apple;
create table Fruit_Agg as select a.FruitId, b.Type1, c.Type3 from Fruits a left join type_1 b on a.FruitId = b.FruitId left join type_3 c on a.FruitId = c.FruitId;
create table Types_1and_3 as select FruitId from Fruit_Agg where Type1 = 1 and Type3 = 3;