perl - compare two column of two SQL query from 2 databases - sql

in perl, i want to insert year and parti from database 2 to database 1 if name from database 1 and name from database 2 is equal
database 1 database 2
----------------------------------- ----------------------------------
table truc table truc2
----------------------------------- ----------------------------------
id name year parti id name year parti
----------------------------------- ----------------------------------
1 Lapin 14 Lapin 2014 MODEM
118 Koala 33 Murène 1347 EELV
14 Murène 2 Ragondin 4218 SP
3 Ragondin 3 Koala 1512 CPNT
i want the result:
database 1
-----------------------------------
table truc
-----------------------------------
id name year parti
-----------------------------------
1 Lapin 2014 MODEM
118 Koala 1512 CPNT
14 Murène 1347 EELV
3 Ragondin 4218 SP
thanks for any response,
my code of perl and sql are here

Read from one database and update in the second one:
my $query2 = $db2->prepare('SELECT id, name, sex, year, parti FROM truc2');
my $query1 = $db1->prepare('UPDATE truc'
. ' SET year = ?, parti = ? WHERE name = ?');
$query2->execute;
while (my #row = $query2->fetchrow_array) {
$query1->execute($row[3], $row[4], $row[1]);
}
Tested with:
#!/usr/bin/perl
use warnings;
use strict;
use utf8;
use feature qw{ say };
use open IO => ':encoding(UTF-8)', ':std';
use DBI;
my $db1 = DBI->connect('dbi:SQLite:dbname=:memory:', "", "",
{ sqlite_unicode => 1 });
$db1->do('CREATE TABLE truc'
. ' (id INT, name TEXT, sex VARCHAR, year INT, parti TEXT)');
my $db2 = DBI->connect('dbi:SQLite:dbname=:memory:', "", "",
{ sqlite_unicode => 1 });
$db2->do('CREATE TABLE truc2'
. ' (id INT, name TEXT, sex VARCHAR, year INT, parti TEXT)');
my $insert1 = $db1->prepare('INSERT INTO truc (id, name, sex)'
. ' VALUES (?, ?, ?)');
$insert1->execute(#$_) for [ 1, 'Lapin', 'M'],
[118, 'Koala', 'F'],
[ 14, 'Murène', 'A'],
[ 3, 'Ragondin', 'F'];
my $insert2 = $db2->prepare('INSERT INTO truc2 (id, name, sex, year, parti)'
. ' VALUES (?, ?, ?, ?, ?)');
$insert2->execute(#$_) for [14, 'Lapin', 'A', 2014, 'MODEM'],
[33, 'Murène', 'F', 1347, 'EELV'],
[ 2, 'Ragondin', 'M', 4218, 'SP'],
[ 3, 'Koala', 'F', 1512, 'CPNT'];
my $query2 = $db2->prepare('SELECT id, name, sex, year, parti FROM truc2');
my $query1 = $db1->prepare('UPDATE truc'
. ' SET year = ?, parti = ? WHERE name = ?');
$query2->execute;
while (my #row = $query2->fetchrow_array) {
$query1->execute($row[3], $row[4], $row[1]);
}
my $verify = $db1->prepare('SELECT * from truc');
$verify->execute;
while (my #row = $verify->fetchrow_array) {
say "#row";
}

UPDATE database1.truc
JOIN database2.truc2 ON database1.truc.name = database2.truc2.name
SET database1.truc.year = database2.truc2.year
database1.truc.parti = database2.truc2.parti;
No binding, no copying data into arrays, no loss of id, etc.

Related

How to get combination of value from single column?

I'm trying to get distinct possible combination value from single column in BigQuery.
Suppose i have this table:
+---------------------------------------------+
| date |type |payment |customer_no|status|
+---------------------------------------------+
|2019-01-02|Shirt |Cashless| 101|Cancel|
|2019-01-02|Jeans |Cashless| 133|OK |
|2019-01-02|Jeans |Cash | 102|OK |
|2019-01-02|Cap |Cash | 144|OK |
|2019-01-02|Shirt |Cash | 132|OK |
|2019-01-01|Jeans |Cash | 111|Cancel|
|2019-01-01|Cap |Cash | 141|OK |
|2019-01-01|Shirt |Cash | 101|OK |
|2019-01-01|Jeans |Cash | 105|OK |
I wanna take with rules:
Only status = 'OK'
No repetition in combination like Shirt, Jeans and Jeans, Shirt is unacceptable
Group for each payment and its combination (Cash, Cassless, Cash&Cashless)
With this code:
#standardSQL
SELECT date,
type,
COUNT(customer_no) as total_customer_per_order_type,
order_payment
FROM `blabla.order`
WHERE status = 'OK'
GROUP BY date, type , payment
ORDER BY date DESC, payment ASC
i just got total customer for single type
How to get table something like this:
http://imgur.com/7aECjpSl.png
Below is for BigQuery Standard SQL and answers just the exact question in the title of your post which is:
How to get combination of value from single column?
#standardSQL
CREATE TEMP FUNCTION test(a ARRAY<INT64>)
RETURNS ARRAY<STRING>
LANGUAGE js AS '''
var combine = function(a) {
var fn = function(n, src, got, all) {
if (n == 0) {
if (got.length > 0) {
all[all.length] = got;
} return;
}
for (var j = 0; j < src.length; j++) {
fn(n - 1, src.slice(j + 1), got.concat([src[j]]), all);
} return;
}
var all = [];
for (var i = 1; i < a.length; i++) {
fn(i, a, [], all);
}
all.push(a);
return all;
}
return combine(a)
''';
WITH types AS (
SELECT DISTINCT type, CAST(DENSE_RANK() OVER(ORDER BY type) AS STRING) type_num
FROM `project.dataset.order`
WHERE status = 'OK'
)
SELECT items, STRING_AGG(type ORDER BY type_num) types
FROM UNNEST(test(GENERATE_ARRAY(1,(SELECT COUNT(1) FROM types)))) AS items,
UNNEST(SPLIT(items)) AS pos
JOIN types ON pos = type_num
GROUP BY items
You can test, play with above using sample data from your questions as in below
#standardSQL
CREATE TEMP FUNCTION test(a ARRAY<INT64>)
RETURNS ARRAY<STRING>
LANGUAGE js AS '''
var combine = function(a) {
var fn = function(n, src, got, all) {
if (n == 0) {
if (got.length > 0) {
all[all.length] = got;
} return;
}
for (var j = 0; j < src.length; j++) {
fn(n - 1, src.slice(j + 1), got.concat([src[j]]), all);
} return;
}
var all = [];
for (var i = 1; i < a.length; i++) {
fn(i, a, [], all);
}
all.push(a);
return all;
}
return combine(a)
''';
WITH `project.dataset.order` AS (
SELECT '2019-01-02' dt, 'Shirt' type, 'Cashless' payment, 101 customer_no, 'Cancel' status UNION ALL
SELECT '2019-01-02', 'Jeans', 'Cashless', 133, 'OK' UNION ALL
SELECT '2019-01-02', 'Jeans', 'Cash', 102, 'OK' UNION ALL
SELECT '2019-01-02', 'Cap', 'Cash', 144, 'OK' UNION ALL
SELECT '2019-01-02', 'Shirt', 'Cash', 132, 'OK' UNION ALL
SELECT '2019-01-01', 'Jeans', 'Cash', 111, 'Cancel' UNION ALL
SELECT '2019-01-01', 'Cap', 'Cash', 141, 'OK' UNION ALL
SELECT '2019-01-01', 'Shirt', 'Cash', 101, 'OK' UNION ALL
SELECT '2019-01-01', 'Jeans', 'Cash', 105, 'OK'
), types AS (
SELECT DISTINCT type, CAST(DENSE_RANK() OVER(ORDER BY type) AS STRING) type_num
FROM `project.dataset.order`
WHERE status = 'OK'
)
SELECT items, STRING_AGG(type ORDER BY type_num) types
FROM UNNEST(test(GENERATE_ARRAY(1,(SELECT COUNT(1) FROM types)))) AS items,
UNNEST(SPLIT(items)) AS pos
JOIN types ON pos = type_num
GROUP BY items
with result
Row items types
1 1 Cap
2 2 Jeans
3 3 Shirt
4 1,2 Cap,Jeans
5 1,3 Cap,Shirt
6 2,3 Jeans,Shirt
7 1,2,3 Cap,Jeans,Shirt

Group By with 'HAVING' clause on slick+play

Imagine I have a SQL table grades which has amongst other fields, the name of the student and the result of the grade:
| student | grade |
|----------|:---------:|
| Harry | Good |
| Ron | Good |
| Harry | Average |
| Harry | Fail |
| Hermione | Excellent |
| Hermione | Excellent |
| Ron | Average |
| ..... | .... |
If I wanted to select all the students with at least two 'Excellent' and zero 'Fail' grades one could do:
select student
from grades
group by student
having
sum(case when grade = 'Excellent' then 1 else 0 end) >= 2 and
sum(case when grade = 'Fail' then 1 else 0 end)
How could I translate such a query into Slick?
On the documentation the 'Having' clause they give seems simpler.
gradesTables
.groupBy(._student)
.map{ case(student, group) => (student, ???)}
.filter(???)
.list
On a related note, why do I get an error with the following:
gradesTables
.groupBy(._student)
.map{ case(student, group) => (student, group.filter(_.grade == "Fail").length)}
.list
The error is:
slick.SlickTreeException: Cannot convert node to SQL Comprehension
The following code in Slick will generate the SQL you need:
val query: Query[(Rep[String], Rep[Option[Int]], Rep[Option[Int]]), (String, Option[Int], Option[Int]), Seq] =
grades.groupBy( _.student ).map{ case (student, group) =>
val groupList = group.map(_.grade)
val gradeExcel = groupList.map( grade =>
Case.If(grade === "Excellent").Then(1).Else(0) ).sum
val gradeFail = groupList.map( grade =>
Case.If(grade === "Fail").Then(1).Else(0) ).sum
(student, gradeExcel, gradeFail)
}.
filter( g => g._2 >= 2 && g._3 === 0 )
// ...
println("Generated SQL:\n" + query.result.statements)
// Generated SQL:
// List(
// select "STUDENT", sum((case when ("GRADE" = 'Excellent') then 1 else 0 end)),
// sum((case when ("GRADE" = 'Fail') then 1 else 0 end)) from "GRADES" group by "STUDENT"
// having (sum((case when ("GRADE" = 'Excellent') then 1 else 0 end)) >= 2) and
// (sum((case when ("GRADE" = 'Fail') then 1 else 0 end)) = 0)
// )
db.run(query.result.map(println))
// Vector((Hermione,Some(2),Some(0)))

multiple aggregates and subquery in slick 3.1

I am trying to translate this sql into a slick 3.1 style collection query (single call). This sql (postgres) returns what I am looking for:
select
minDate.min as lastModified,
(select count("id") from "Items" where "orderId" = 1) as totalItemCount,
(select count("id") from "Items" where "orderId" = 1 and "dateModified" >= minDate.min) as addedCount
from
(select min("dateModified") as "min" from "Items" where "orderId" = 1 and "state" = 'new') as minDate
Returns: for a specified set of Items (from orderId), returns:
date of item last modified
total number of items
number of items added since the lastModified
But after many attempts, I can't figure out how to translate this to a single slick-style query
This codes
import scala.slick.driver.PostgresDriver
case class Item(id: Int, orderId: Int, state: String, dateModified: Int)
object SlickComplexQuery {
def main(args: Array[String]) = {
val driver = PostgresDriver
import driver.simple._
class ItemsTable(tag: Tag) extends Table[Item](tag, "Items"){
def id = column[Int]("id")
def orderId = column[Int]("orderId")
def state = column[String]("state")
def dateModified = column[Int]("dateModified")
def * = (id, orderId, state, dateModified) <> (Item.tupled, Item.unapply)
}
val items = TableQuery[ItemsTable]
val query1 = items
.filter(i => i.orderId === 1 && i.state === "new")
.map(_.dateModified)
.min
val query2 = items
.filter(_.orderId === 1)
.map(_.id)
.length
val query3 = items
.filter(i => i.orderId === 1 && i.dateModified >= query1)
.map(_.id)
.length
val query = Query(query1, query2, query3)
results in such query:
select x2.x3, x4.x5, x6.x7
from (select min(x8.x9) as x3
from (select x10."dateModified" as x9
from "Items" x10
where (x10."orderId" = 1) and (x10."state" = 'new')) x8) x2,
(select count(1) as x5
from (select x11."id" as x12
from "Items" x11
where x11."orderId" = 1) x13) x4,
(select count(1) as x7
from (select x14."id" as x15
from "Items" x14, (select min(x16.x17) as x18
from (select x19."dateModified" as x17
from "Items" x19
where (x19."orderId" = 1) and (x19."state" = 'new')) x16) x20
where (x14."orderId" = 1) and (x14."dateModified" >= x20.x18)) x21) x6
This query is much alike yours, slick 2.0 was used.

How to retrieve unique rows where multiple children that reference it exist for different types?

SELECT * FROM Fruit
INNER JOIN Apple ON Fruit.Id = Apple.FruitId
WHERE Apple.Type = 1 AND Apple.Type = 3
I need to get unique rows of Fruit that have both Apples that are of type 1 AND 3. Apple.Type is considered unique, but I wouldn't think it matters though.
With these rows, this should return two rows with both Fruit #50 and #52. The most important part is the Fruit.Id, I don't need to return the Types, but just need to make sure every single Fruit returned has at least one Apple.Type = 1 and one Apple.Type = 3.
Apple { Id = 1, FruitId = 50, Type = 0 }
Apple { Id = 2, FruitId = 50, Type = 1 }
Apple { Id = 3, FruitId = 50, Type = 3 }
Apple { Id = 4, FruitId = 51, Type = 1 }
Apple { Id = 5, FruitId = 51, Type = 2 }
Apple { Id = 6, FruitId = 52, Type = 3 }
Apple { Id = 7, FruitId = 52, Type = 1 }
Apple { Id = 8, FruitId = 52, Type = 2 }
Fruit { Id = 50 }
Fruit { Id = 51 }
Fruit { Id = 52 }
I'm not quite sure how to use DISTINCT and/or GROUP BY in order to form this query.
Group your apples table by fruit id and pick the results that have both desired types. Use this to get your fruits.
SELECT *
FROM Fruit
WHERE id IN
(
SELECT FruitId
FROM Apple
WHERE Type IN (1,3)
GROUP BY FruitId
HAVING COUNT(DISTINCT Type) = 2
);
This would return the fruits with ID 50 and 52.
SELECT *
FROM Fruit
WHERE EXISTS (
SELECT 1 FROM Apple
WHERE Type = 1 AND Apple.FruitId = Fruit.Id
) AND EXISTS (
SELECT 1 FROM Apple
WHERE Type = 3 AND Apple.FruitId = Fruit.Id
)
Not the most efficient way, but transposing those columns out so you have multiple types per fruitid should do it.
create table type_1 as select FruitId, Type as Type1 from Apple where Type = 1;
create table type_3 as select FruitId, Type as Type3 from Apple where Type = 3;
create table Fruits as select distinct FruitId from Apple;
create table Fruit_Agg as select a.FruitId, b.Type1, c.Type3 from Fruits a left join type_1 b on a.FruitId = b.FruitId left join type_3 c on a.FruitId = c.FruitId;
create table Types_1and_3 as select FruitId from Fruit_Agg where Type1 = 1 and Type3 = 3;

Confused about behavior of setResultsName in Pyparsing

I am trying to parse a few SQL statements. Here is a sample:
select
ms.member_sk a,
dd.date_sk b,
st.subscription_type,
(SELECT foo FROM zoo) e
from dim_member_subscription_all p,
dim_subs_type
where a in (select moo from t10)
I am interested in getting tables only at this time. So I would like to see
[zoo, dim_member_subscription_all, dim_subs_type] & [t10]
I have put together a small script looking at Paul McGuire's example
#!/usr/bin/env python
import sys
import pprint
from pyparsing import *
pp = pprint.PrettyPrinter(indent=4)
semicolon = Combine(Literal(';') + lineEnd)
comma = Literal(',')
lparen = Literal('(')
rparen = Literal(')')
update_kw, volatile_kw, create_kw, table_kw, as_kw, from_kw, \
where_kw, join_kw, left_kw, right_kw, cross_kw, outer_kw, \
on_kw , insert_kw , into_kw= \
map(lambda x: Keyword(x, caseless=True), \
['UPDATE', 'VOLATILE', 'CREATE', 'TABLE', 'AS', 'FROM',
'WHERE', 'JOIN' , 'LEFT', 'RIGHT' , \
'CROSS', 'OUTER', 'ON', 'INSERT', 'INTO'])
select_kw = Keyword('SELECT', caseless=True) | Keyword('SEL' , caseless=True)
reserved_words = (update_kw | volatile_kw | create_kw | table_kw | as_kw |
select_kw | from_kw | where_kw | join_kw |
left_kw | right_kw | cross_kw | on_kw | insert_kw |
into_kw)
ident = ~reserved_words + Word(alphas, alphanums + '_')
table = Combine(Optional(ident + Literal('.')) + ident)
column = Combine(Optional(ident + Literal('.')) + (ident | Literal('*')))
column_alias = Optional(Optional(as_kw).suppress() + ident)
table_alias = Optional(Optional(as_kw).suppress() + ident).suppress()
select_stmt = Forward()
nested_table = lparen.suppress() + select_stmt + rparen.suppress() + table_alias
table_list = delimitedList((nested_table | table) + table_alias)
column_list = delimitedList((nested_table | column) + column_alias)
txt = """
select
ms.member_sk a,
dd.date_sk b,
st.subscription_type,
(SELECT foo FROM zoo) e
from dim_member_subscription_all p,
dim_subs_type
where a in (select moo from t10)
"""
select_stmt << select_kw.suppress() + column_list + from_kw.suppress() + \
table_list.setResultsName('tables', listAllMatches=True)
print txt
for token in select_stmt.searchString(txt):
pp.pprint(token.asDict())
I am getting the following nested output. Can anybody please help me understand what I am doing wrong?
{ 'tables': ([(['zoo'], {}), (['dim_member_subscription_all', 'dim_subs_type'], {})], {})}
{ 'tables': ([(['t10'], {})], {})}
searchString will return a list of all matching ParseResults - you can see the tables value of each using:
for token in select_stmt.searchString(txt):
print token.tables
Giving:
[['zoo'], ['dim_member_subscription_all', 'dim_subs_type']]
[['t10']]
So searchString found two SELECT statements.
Recent versions of pyparsing support summing this list into a single consolidated using Python builtin sum. Accessing the tables value of this consolidated result looks like this:
print sum(select_stmt.searchString(txt)).tables
[['zoo'], ['dim_member_subscription_all', 'dim_subs_type'], ['t10']]
I think the parser is doing all you want, you just need to figure out how to process the returned results.
For further debugging, you should start using the dump method on ParseResults to see what you are getting, which will print the nested list of returned tokens, and then a hierarchical tree of all named results. For your example:
for token in select_stmt.searchString(txt):
print token.dump()
print
prints:
['ms.member_sk', 'a', 'dd.date_sk', 'b', 'st.subscription_type', 'foo', 'zoo', 'dim_member_subscription_all', 'dim_subs_type']
- tables: [['zoo'], ['dim_member_subscription_all', 'dim_subs_type']]
['moo', 't10']
- tables: [['t10']]