How to do a case sensitive GROUP BY? - sql

If I execute the code below:
with temp as
(
select 'Test' as name
UNION ALL
select 'TEST'
UNION ALL
select 'test'
UNION ALL
select 'tester'
UNION ALL
select 'tester'
)
SELECT name, COUNT(name)
FROM temp
group by name
It returns the results:
TEST 3
tester 2
Is there a way to have the group by be case sensitive so that the results would be:
Test 1
TEST 1
test 1
tester 2

You need to cast the text as binary (or use a case-sensitive collation).
With temp as
(
select 'Test' as name
UNION ALL
select 'TEST'
UNION ALL
select 'test'
UNION ALL
select 'tester'
UNION ALL
select 'tester'
)
Select Name, COUNT(name)
From temp
Group By Name, Cast(name As varbinary(100))
Using a collation:
Select Name Collate SQL_Latin1_General_CP1_CS_AS, COUNT(name)
From temp
Group By Name Collate SQL_Latin1_General_CP1_CS_AS

You can use an case sensitive collation:
with temp as
(
select 'Test' COLLATE Latin1_General_CS_AS as name
UNION ALL
select 'TEST'
UNION ALL
select 'test'
UNION ALL
select 'tester'
UNION ALL
select 'tester'
)
SELECT name, COUNT(name)
FROM temp
group by name

Simply:
SELECT count(*), CAST(lastname as BINARY) AS lastname_cs
FROM names
GROUP BY lastname_cs;

In MySQL/MariaDB, if you don't want to use collations or casting to binary, just use:
SELECT MAX(name), COUNT(name)
FROM (
select 'Test' as name
UNION ALL
select 'TEST'
UNION ALL
select 'test'
UNION ALL
select 'test'
UNION ALL
select 'tester'
UNION ALL
select 'tester'
) as tmp
group by MD5(name)

This works on my case:
SELECT BINARY example FROM table GROUP BY BINARY example;

Related

Count distinct letters in a string in bigquery

I have a string column in Biquery like:
select 'A'
union all (select 'ab')
union all (select 'abc')
union all (select 'aa')
union all (select 'aab')
I would like to count the number of distinct characters in every row of the column, in this case the results would be:
1
2
3
1
2
Can this be done in BigQuery? How?
How about this (assuming you don't want to differentiate between uppercase and lowercase)...
WITH data AS (select 'A' AS `val`
union all (select 'ab')
union all (select 'abc')
union all (select 'aa')
union all (select 'aab'))
SELECT `val`, 26 - LENGTH(REGEXP_REPLACE('abcdefghijklmnopqrstuvwxyz', '['||LOWER(`val`)||']', ''))
FROM `data`;
A simple approach is to use the SPLIT to convert your string to an array and UNNEST to convert the resulting array to a table. You may then use COUNT and DISTINCT to determine the number of unique characters as shown below:
with my_data AS (
select 'A' as col
union all (select 'ab')
union all (select 'abc')
union all (select 'aa')
union all (select 'aab')
)
select col, (SELECT COUNT(*) FROM (
SELECT DISTINCT element FROM UNNEST(SPLIT(col,'')) as element
)) n from my_data;
or simply
WITH my_data AS (
SELECT 'A' as col UNION ALL
SELECT 'ab' UNION ALL
SELECT 'abc' UNION ALL
SELECT 'aa' UNION ALL
SELECT 'aab'
)
SELECT
col,
(
SELECT
COUNT(DISTINCT element)
FROM
UNNEST(SPLIT(col,'')) as element
) cnt
FROM
my_data;
Like previous but using COUNT with DISTINCT
with my_data AS (
select 'A' as col
union all (select 'ab')
union all (select 'abc')
union all (select 'aa')
union all (select 'aab')
)
select col, COUNT(DISTINCT element) FROM
my_data,UNNEST(SPLIT(col,'')) as element
GROUP BY col
If the data is not quite huge, I would rather go with the user-defined functions to ease up the string manipulation across different columns
CREATE TEMP FUNCTION
get_unique_char_count(x STRING)
RETURNS INT64
LANGUAGE js AS r"""
str_split = new Set(x.split(""));
return str_split.size;
""";
WITH
result AS (
SELECT
'A' AS val
UNION ALL (
SELECT
'ab')
UNION ALL (
SELECT
'abc')
UNION ALL (
SELECT
'aa')
UNION ALL (
SELECT
'aab') )
SELECT
val,
get_unique_char_count(val) unique_char_count
FROM
result
RESULT:

SQL Server - Sorting column based on string and number

I have a table with one column which contains combination of string and number like as shown below. I need to sort the name column in descending or ascending but the problem is when I use ORDER BY it is not sorting as expected
My query is like as shown below
SELECT * FROM test ORDER BY `name` ASC
My expected result is like as shown blow
employee1
employee2
employee3
employee6
employee6
employee10
employee11
employee12
employee17
employee82
employee100
employee111
employee129
employee299
Can anyone please help me on this
You can try below by only extracting the number part from the field
SELECT * FROM test
ORDER BY cast(replace(`name`,'employee','') as int) ASC
Please try the code below:
-- ASSUMES NUMBERS ARE IN THE LAST CHARACTER
-- WONT WORK IF NUMBER IS THERE IN THE MIDDLE
DROP TABLE IF EXISTS #data
select
Name = REPLACE(name,' ','')
,NameWithNum = REPLACE(name,' ','') + cast(object_id as varchar(100))
INTO #data
from sys.tables
SELECT
NameWithNum
,NameRemovedNumbers= SUBSTRING(NameWithNum, PATINDEX('%[0-9]%', NameWithNum), LEN(NameWithNum))
from #data
ORDER BY Name,SUBSTRING(NameWithNum, PATINDEX('%[0-9]%', NameWithNum), LEN(NameWithNum))
Since the number always in the end, you can do as
WITH CTE AS
(
SELECT 'employee1' Name
union all select 'employee2'
union all select 'employee3'
union all select 'employee6'
union all select 'employee6'
union all select 'employee10'
union all select 'employee11'
union all select 'employee12'
union all select 'employee17'
union all select 'employee82'
union all select 'employee100'
union all select 'employee111'
union all select 'employee129'
union all select 'employee299'
)
SELECT Name
FROM CTE
ORDER BY CAST(SUBSTRING(Name, PATINDEX('%[^a-z, '' '']%', Name), LEN(Name)) AS INT)
--OR PATINDEX('%[0-9]%', Name)
Or
WITH CTE AS
(
SELECT 'The First Employee 1' Name
union all select 'The Second One 2'
union all select 'Employee Number 4'
union all select 'Employee Number 3'
)
SELECT Name
FROM CTE
ORDER BY CAST(SUBSTRING(Name, PATINDEX('%[0-9]%', Name), LEN(Name)) AS INT)
Here is a Live Demo where you can change the strings and see the results.
Finnaly, I would note to the real problem, which is a table with just one column and no PK.
The "base" part of the name is the same. So, you can order by the length and then the name:
SELECT t.*
FROM test t
ORDER BY LEN(name), name;

SQL order by in union all query

I have following query which produces following below data.
All I want to show list of users in Alphabetically order and First Record should be All , All.
Query:
SELECT 'All' created_by,
'All' Prepby
FROM dual
UNION ALL
SELECT DISTINCT
to_char(d.created_by) AS created_by,
get_user_name(d.created_by) Prepby
FROM Hpml_Gp_dtl d
WHERE d.created_by IS NOT NULL
ORDER BY 2;
Use a CASE expression in ORDER BY.
Query
select t.* from (
select 'All' created_by, 'All' Prepby
from dual
union all
select distinct to_char(d.created_by) as created_by,
get_user_name(d.created_by) Prepby
from Hpml_Gp_dtl d
where d.created_by is not null
) t
order by case Prepby when 'All' then 1 else 2 end, Prepby;
Perform the ORDER BY in a sub-query:
SELECT 'All' AS created_by,
'All' AS Prepby
FROM DUAL
UNION ALL
SELECT *
FROM (
SELECT DISTINCT
to_char(created_by),
get_user_name(created_by)
FROM Hpml_Gp_dtl
WHERE created_by IS NOT null
ORDER BY 2
)

how to sort by case insensitive alphabetical order using COLLATE NOCASE

I am trying to sort alphabetically case insensitive using COLLATE NOCASE
but getting error
ORA - 00933 SQL command not properly ended.
below is the query I am firing:
SELECT LPN.LPN_ID,
LPN.TC_ORDER_ID,
ORDERS.D_NAME,
ORDERS.PURCHASE_ORDER_NUMBER AS ORDER_PURCHASE_ORDER_NUMBER,
ORDERS.D_NAME AS D_NAME_2, LPN.LPN_NBR_X_OF_Y
FROM ORDERS ORDERS,
LPN LPN
WHERE ORDERS.ORDER_ID=LPN.ORDER_ID
ORDER BY ORDERS.D_NAME COLLATE NOCASE DESC
I checked here to try this but still getting error
How to use SQL Order By statement to sort results case insensitive?
Any suggestions please ?
Oracle does not support COLLATE NOCASE option of the order by clause. To be able to perform case-insensitive ordering you have two options:
Set NLS_COMP='ANSI' and 'NLS_SORT=BINARY_CI', CI suffix means case-insensitive, session or system wide by using alter session or alter system statement:
alter session set nls_comp='ANSI';
alter session set nls_sort='BINARY_CI';
with t1(col) as(
select 'A' from dual union all
select 'a' from dual union all
select 'b' from dual union all
select 'B' from dual
)
select *
from t1
order by col
Result:
COL
---
A
a
b
B
Change case of the character literal by using either upper() or lower() function.
with t1(col) as(
select 'A' from dual union all
select 'a' from dual union all
select 'b' from dual union all
select 'B' from dual
)
select *
from t1
order by upper(col)
result:
COL
---
A
a
b
B
Edit
but i need the UpperCase to preceed any LowerCase eg. Alan, alan, Brian, brian, Cris
This is not the case-insensitive ordering, rather quite contrary in some sense. As one of the options you could do the following to produce desired result:
with t1(col) as(
select 'alan' from dual union all
select 'Alan' from dual union all
select 'brian' from dual union all
select 'Brian' from dual union all
select 'Cris' from dual
)
select col
from ( select col
, case
when row_number() over(partition by lower(col)
order by col) = 1
then 1
else 0
end as rn_grp
from t1
)
order by sum(rn_grp) over(order by lower(col))
Result:
COL
-----
Alan
alan
Brian
brian
Cris
COLLATE NOCASE does not work with Oracle, Try this:
SELECT LPN.LPN_ID,
LPN.TC_ORDER_ID,
ORDERS.D_NAME,
ORDERS.PURCHASE_ORDER_NUMBER AS ORDER_PURCHASE_ORDER_NUMBER,
ORDERS.D_NAME AS D_NAME_2,
LPN.LPN_NBR_X_OF_Y
FROM orders orders,
lpn lpn
where orders.order_id=lpn.order_id
ORDER BY lower(orders.d_name) DESC;
Since 10g there is a function NLSSORT which does pretty much what Nicholas Krasnov described but doesn't require altering the system or session.
so you can try something like this:
SELECT LPN.LPN_ID, LPN.TC_ORDER_ID, ORDERS.D_NAME, ORDERS.PURCHASE_ORDER_NUMBER
AS ORDER_PURCHASE_ORDER_NUMBER, ORDERS.D_NAME AS D_NAME_2, LPN.LPN_NBR_X_OF_Y
FROM ORDERS ORDERS, LPN LPN
WHERE ORDERS.ORDER_ID=LPN.ORDER_ID
ORDER BY nlssort(ORDERS.D_NAME, 'NLS_SORT = binary_ci') desc
Note you can't use this directly in a UNION or you'll get the following error:
ORA-01785: ORDER BY item must be the number of a SELECT-list
expression.
Instead, you need to wrap it:
SELECT * FROM (SELECT a, b FROM x, y UNION SELECT c, d FROM m, n)
ORDER BY nlssort(a, 'nls_sort=binary_ci') DESC

cross-dbms way to check if string is numeric

Ok, I have this field: code varchar(255). It contains some values used in our export routine like
DB84
DB34
3567
3568
I need to select only auto-generated (fully numeric) fields
WHERE is_numeric(table.code)
is_numeric() checks if code field contains only positive digits.
Can you propose anything that will work both under mysql 5.1 and oracle 10g?
Below are three separate implementations for each of SQL Server, MySQL and Oracle. None use (or can) the same approach, so there doesn't seem to be a cross DBMS way to do it.
For MySQL and Oracle, only the simple integer test is show; for SQL Server, the full numeric test is shown.
For SQL Server:
note that isnumeric('.') returns 1.. but it can not actually be converted to float. Some text like '1e6' cannot be converted to numeric directly, but you can pass through float, then numeric.
;with tmp(x) as (
select 'db01' union all select '1' union all select '1e2' union all
select '1234' union all select '' union all select null union all
select '1.2e4' union all select '1.e10' union all select '0' union all
select '1.2e+4' union all select '1.e-10' union all select '1e--5' union all
select '.' union all select '.123' union all select '1.1.23' union all
select '-.123' union all select '-1.123' union all select '--1' union all
select '---1.1' union all select '+1.123' union all select '++3' union all
select '-+1.123' union all select '1 1' union all select '1e1.3' union all
select '1.234' union all select 'e4' union all select '+.123' union all
select '1-' union all select '-3e-4' union all select '+3e-4' union all
select '+3e+4' union all select '-3.2e+4' union all select '1e1e1' union all
select '-1e-1-1')
select x, isnumeric(x),
case when x not like '%[^0-9]%' and x >'' then convert(int, x) end as SimpleInt,
case
when x is null or x = '' then null -- blanks
when x like '%[^0-9e.+-]%' then null -- non valid char found
when x like 'e%' or x like '%e%[e.]%' then null -- e cannot be first, and cannot be followed by e/.
when x like '%e%_%[+-]%' then null -- nothing must come between e and +/-
when x='.' or x like '%.%.%' then null -- no more than one decimal, and not the decimal alone
when x like '%[^e][+-]%' then null -- no more than one of either +/-, and it must be at the start
when x like '%[+-]%[+-]%' and not x like '%[+-]%e[+-]%' then null
else convert(float,x)
end
from tmp order by 2, 3
For MySQL
create table tmp(x varchar(100));
insert into tmp
select 'db01' union all select '1' union all select '1e2' union all
select '1234' union all select '' union all select null union all
select '1.2e4' union all select '1.e10' union all select '0' union all
select '1.2e+4' union all select '1.e-10' union all select '1e--5' union all
select '.' union all select '.123' union all select '1.1.23' union all
select '-.123' union all select '-1.123' union all select '--1' union all
select '---1.1' union all select '+1.123' union all select '++3' union all
select '-+1.123' union all select '1 1' union all select '1e1.3' union all
select '1.234' union all select 'e4' union all select '+.123' union all
select '1-' union all select '-3e-4' union all select '+3e-4' union all
select '+3e+4' union all select '-3.2e+4' union all select '1e1e1' union all
select '-1e-1-1';
select x,
case when x not regexp('[^0-9]') then x*1 end as SimpleInt
from tmp order by 2
For Oracle
case when REGEXP_LIKE(col, '[^0-9]') then col*1 end