Insert into rows based on percentage - sql

I have a table with employees. I need to populate the table with data based on 10 values from another table which contains 10 rows with columns ID and a name. I have to divide the employees in almost equal groups and add the IDs from the other table.
For example, 10% of employees should have first ID from the other table, other 10% of employees should have second ID from the same table etc.
Here is an example table of the employees
EmployeeID | Name | LevelID
----------------
1 | Name 1 | NULL
2 | Name 2 | NULL
3 | Name 3 | NULL
...
998 | Name 998 | NULL
999 | Name 999 | NULL
1000 | Name 1000 | NULL
Here is an example table of levels
LevelID | Level
----------------
1 | Level 1
2 | Level 2
3 | Level 3
...
8 | Level 8
9 | Level 9
10 | Level 10
I want my employee table to be updated like this:
EmployeeID | Name | LevelID
----------------
1 | Name 1 | 1
2 | Name 2 | 1
...
100 | Name 100 | 1
101 | Name 101 | 2
...
300 | Name 300 | 3
301 | Name 301 | 4
302 | Name 302 | 4
...
998 | Name 998 | 10
999 | Name 999 | 10
1000 | Name 1000 | 10
How can I achieve this in a query from SQL Server.
Kindly help me out.
Thanks

You can assign the values using window functions:
select t1.*, t2.*
from (select t1.*, ntile(10) over (order by (select null)) as tile
from table1 t1
) t1 join
(select t2.*, row_number() over (order by (select null)) as seqnum
from table2 t2
) t2
on t2.seqnum = t1.tile;
If you want to do this as an update:
with toupdate as (
select t1.*, ntile(10) over (order by (select null)) as tile
from table1 t1
)
update toupdate
set . . .
from toupdate join
(select t2.*, row_number() over (order by (select null)) as seqnum
from table2 t2
) t2
on t2.seqnum = toupdate.tile;

Related

Get some values from the table by selecting

I have a table:
| id | Number |Address
| -----| ------------|-----------
| 1 | 0 | NULL
| 1 | 1 | NULL
| 1 | 2 | 50
| 1 | 3 | NULL
| 2 | 0 | 10
| 3 | 1 | 30
| 3 | 2 | 20
| 3 | 3 | 20
| 4 | 0 | 75
| 4 | 1 | 22
| 4 | 2 | 30
| 5 | 0 | NULL
I need to get: the NUMBER of the last ADDRESS change for each ID.
I wrote this select:
select dh.id, dh.number from table dh where dh =
(select max(min(t.history)) from table t where t.id = dh.id group by t.address)
But this select not correctly handling the case when the address first changed, and then changed to the previous value. For example id=1: group by return:
| Number |
| -------- |
| NULL |
| 50 |
I have been thinking about this select for several days, and I will be happy to receive any help.
You can do this using row_number() -- twice:
select t.id, min(number)
from (select t.*,
row_number() over (partition by id order by number desc) as seqnum1,
row_number() over (partition by id, address order by number desc) as seqnum2
from t
) t
where seqnum1 = seqnum2
group by id;
What this does is enumerate the rows by number in descending order:
Once per id.
Once per id and address.
These values are the same only when the value is 1, which is the most recent address in the data. Then aggregation pulls back the earliest row in this group.
I answered my question myself, if anyone needs it, my solution:
select * from table dh1 where dh1.number = (
select max(x.number)
from (
select
dh2.id, dh2.number, dh2.address, lag(dh2.address) over(order by dh2.number asc) as prev
from table dh2 where dh1.id=dh2.id
) x
where NVL(x.address, 0) <> NVL(x.prev, 0)
);

How to delete the rows with three same data columns and one different data column

I have a table "MARK_TABLE" as below.
How can I delete the rows with same "STUDENT", "COURSE" and "SCORE" values?
| ID | STUDENT | COURSE | SCORE |
|----|---------|--------|-------|
| 1 | 1 | 1 | 60 |
| 3 | 1 | 2 | 81 |
| 4 | 1 | 3 | 81 |
| 9 | 2 | 1 | 80 |
| 10 | 1 | 1 | 60 |
| 11 | 2 | 1 | 80 |
Now I already filtered the data I want to KEEP, but without the "ID"...
SELECT student, course, score FROM mark_table
INTERSECT
SELECT student, course, score FROM mark_table
The output:
| STUDENT | COURSE | SCORE |
|---------|--------|-------|
| 1 | 1 | 60 |
| 1 | 2 | 81 |
| 1 | 3 | 81 |
| 2 | 1 | 80 |
Use the following query to delete the desired rows:
DELETE FROM MARK_TABLE M
WHERE
EXISTS (
SELECT
1
FROM
MARK_TABLE M_IN
WHERE
M.STUDENT = M_IN.STUDENT
AND M.COURSE = M_IN.COURSE
AND M.SCORE = M_IN.SCORE
AND M.ID < M_IN.ID
)
OUTPUT
db<>fiddle demo
Cheers!!
use distinct
SELECT distinct student, course, score FROM mark_table
Assuming you don't just want to select the unique data you want to keep (you mention you've already done this), you can proceed as follows:
Create a temporary table to hold the data you want to keep
Insert the data you want to keep into the temporary table
Empty the source table
Re-Insert the data you want to keep into the source table.
select * from
(
select row_number() over (partition by student,course,score order by score)
rn,student,course,score from mark_table
) t
where rn=1
Use CTE with RowNumber
create table #MARK_TABLE (ID int, STUDENT int, COURSE int, SCORE int)
insert into #MARK_TABLE
values
(1,1,1,60),
(3,1,2,81),
(4,1,3,81),
(9,2,1,80),
(10,1,1,60),
(11,2,1,80)
;with cteDeleteID as(
Select id, row_number() over (partition by student,course,score order by score) [row_number] from #MARK_TABLE
)
delete from #MARK_TABLE where id in
(
select id from cteDeleteID where [row_number] != 1
)
select * from #MARK_TABLE
drop table #MARK_TABLE

Get sum over a column value that is determined by two other column values in the same table

I have the following table MY_TABLE
ID | SEQ | TYPE | VAL
1 | 2 | A | 100
1 | 3 | A | 100
1 | 2 | B | 200
1 | 3 | A | 100
1 | 3 | B | 200
2 | 25 | X | 100
2 | 24 | Y | 200
2 | 24 | X | 300
2 | 25 | Y | 400
2 | 25 | X | 50
Here in MY_TABLE, each ID has a set of Seq values and Type values. I want to get the sum of VAL rows per TYPE that belong to each IDs max(Seq).
Expected output:
ID| SEQ | TYPE | SUM(VAL)
1 | 3 | A | 200 <- 100 + 100
1 | 3 | B | 200
2 | 25 | X | 150 <- 100 + 50
2 | 25 | Y | 400
What I tried:
-- this sub query finds the max(seq) for each ID
with max_seq as (
select id, max(seq) max_seq
from my_table t
group by id)
-- select query on my_table
select
bd.id,
bd.seq,
bd.type,
sum(bd.val)
from my_table bd
-- joining on id-max_seq pair
inner join max_seq
on
(max_seq.id = bd.id)
and
(max_seq.max_seq = bd.seq)
-- sum(val) per ID, MAX(SEQ), TYPE
group by bd.id, bd.seq, bd.type;
Question:
The above query works well for smaller tables but gets slower when the table is bigger. Is there an efficient way of getting this output? (Maybe without using two joins on the same table with a sub query?)
You could avoid the self-join by using a subquery which gets a ranking for each row based on the id and seq:
select id, seq, type, sum(val)
from (
select id, seq, type, val, rank() over (partition by id order by seq desc) as rnk
from my_table
)
where rnk = 1
group by id, seq, type
order by id, seq, type;
ID SEQ T SUM(VAL)
---------- ---------- - ----------
1 3 A 200
1 3 B 200
2 25 X 150
2 25 Y 400
Because of the order by seq desc, the rnk value is 1 for the highest seq for each id. The outer query then just filters on rnk = 1, limiting the output and the aggregation to those lowest-rank (highest-seq) rows.
db<>fiddle demo

RANK, ROW_NUMBER on T-SQL

I have rows like this in SQL Server 2014:
id | fld1
---+-----
1 | 100
2 | 100
3 | 80
4 | 102
5 | 100
6 | 80
7 | 102
I would need a partition that without changing order would return:
NewFld | id | fld1
-------+----+------
1 | 1 | 100
1 | 2 | 100
2 | 3 | 80
3 | 4 | 102
1 | 5 | 100
2 | 6 | 80
3 | 7 | 102
Newfld should return the same value based on fld1 without changing ordering given by id.
I tried with ROW_NUMBER, RANK, DENSE_RANK but nothing works for me.
View this fiddle
Use min() over() in a subquery to establish the ordering values needed for the dense_rank().
SELECT id
, Fld1
, DENSE_RANK() OVER (order by fld1_idmin) AS Rank
FROM (
SELECT id
, fld1
, Min (id) over (partition by fld1) fld1_idmin
FROM yourtable
) d
ORDER BY ID
With an index on FLD1 using these window functions needs just a single index scan for this query. See this SQLfiddle
you may use this
with mytab as
(
SELECT *
,(SELECT MIN(ID) FROM yourtable sub where sub.fld1 = yourtable.fld1) as ranks
FROM yourtable
)
SELECT ID ,fld1 , DENSE_RANK()OVER(ORDER BY Ranks)
FROM mytab
ORDER BY ID
view this fiddle

How to get Previous Value for Null Values

I have the Below Data in my Table.
| Id | FeeModeId |Name | Amount|
---------------------------------------------
| 1 | NULL | NULL | 20 |
| 2 | 1 | Quarter-1 | 5000 |
| 3 | NULL | NULL | 2000 |
| 4 | 2 | Quarter-2 | 8000 |
| 5 | NULL | NULL | 5000 |
| 6 | NULL | NULL | 2000 |
| 7 | 3 | Quarter-3 | 6000 |
| 8 | NULL | NULL | 4000 |
How to write such query to get below output...
| Id | FeeModeId |Name | Amount|
---------------------------------------------
| 1 | NULL | NULL | 20 |
| 2 | 1 | Quarter-1 | 5000 |
| 3 | 1 | Quarter-1 | 2000 |
| 4 | 2 | Quarter-2 | 8000 |
| 5 | 2 | Quarter-2 | 5000 |
| 6 | 2 | Quarter-2 | 2000 |
| 7 | 3 | Quarter-3 | 6000 |
| 8 | 3 | Quarter-3 | 4000 |
Since you are on SQL Server 2012... here is a version that uses that. It might be faster than other solutions but you have to test that on your data.
sum() over() will do a running sum ordered by Id adding 1 when there are a value in the column and keeping the current value for null values. The calculated running sum is then used to partition the result in first_value() over(). The first value ordered by Id for each "group" of rows generated by the running sum has the value you want.
select T.Id,
first_value(T.FeeModeId)
over(partition by T.NF
order by T.Id
rows between unbounded preceding and current row) as FeeModeId,
first_value(T.Name)
over(partition by T.NS
order by T.Id
rows between unbounded preceding and current row) as Name,
T.Amount
from (
select Id,
FeeModeId,
Name,
Amount,
sum(case when FeeModeId is null then 0 else 1 end)
over(order by Id) as NF,
sum(case when Name is null then 0 else 1 end)
over(order by Id) as NS
from YourTable
) as T
SQL Fiddle
Something that will work pre SQL Server 2012:
select T1.Id,
T3.FeeModeId,
T2.Name,
T1.Amount
from YourTable as T1
outer apply (select top(1) Name
from YourTable as T2
where T1.Id >= T2.Id and
T2.Name is not null
order by T2.Id desc) as T2
outer apply (select top(1) FeeModeId
from YourTable as T3
where T1.Id >= T3.Id and
T3.FeeModeId is not null
order by T3.Id desc) as T3
SQL Fiddle
Please try:
select
a.ID,
ISNULL(a.FeeModeId, x.FeeModeId) FeeModeId,
ISNULL(a.Name, x.Name) Name,
a.Amount
from tbl a
outer apply
(select top 1 FeeModeId, Name
from tbl b
where b.ID<a.ID and
b.Amount is not null and
b.FeeModeId is not null and
a.FeeModeId is null order by ID desc)x
OR
select
ID,
ISNULL(FeeModeId, bFeeModeId) FeeModeId,
ISNULL(Name, bName) Name,
Amount
From(
select
a.ID , a.FeeModeId, a.Name, a.Amount,
b.ID bID, b.FeeModeId bFeeModeId, b.Name bName,
MAX(b.FeeModeId) over (partition by a.ID) mx
from tbl a left join tbl b on b.ID<a.ID
and b.FeeModeId is not null
)x
where bFeeModeId=mx or mx is null
SELECT
T.ID,
ISNULL(T.FeeModeId,
(SELECT TOP 1 FeeModeId
FROM TableName AS T1
WHERE ID < T.ID AND FeeModeId IS NOT NULL
ORDER BY ID DESC)) AS FeeModeId,
ISNULL(Name,
(SELECT TOP 1 Name
FROM TableName
WHERE ID < T.ID AND Name IS NOT NULL
ORDER BY ID DESC)) AS Name,
T.Amount
FROM
TableName AS T
try this -
SELECT Id,
CASE
WHEN Feemodeid IS NOT NULL THEN
Feemodeid
ELSE
(SELECT Feemodeid
FROM Table_Name t_2
WHERE t_2.Id = (SELECT MAX(Id)
FROM Table_Name t_3
WHERE t_3.Id < t_1.Id
AND Feemodeid IS NOT NULL))
END Feemodeid,
CASE
WHEN NAME IS NOT NULL THEN
NAME
ELSE
(SELECT NAME
FROM Table_Name t_2
WHERE t_2.Id = (SELECT MAX(Id)
FROM Table_Name t_3
WHERE t_3.Id < t_1.Id
AND NAME IS NOT NULL))
END NAME,
Amount
FROM Table_Name t_1
id name
1 toto
2 NULL
3 NULL
4 titi
5 NULL
6 NULL
7 tutu
8 NULL
9 NULL
SELECT
id_table
,name
FROM
(
SELECT
T_01.id AS 'id_table'
,max(T_02.id) AS 'id_name'
FROM
names AS T_01
cross join
(
SELECT
id
,name
FROM
names
WHERE
name IS NOT NULL
) AS T_02
WHERE
T_02.id <= T_01.id
GROUP BY
T_01.id
) AS tt02
left join names
ON names.id = tt02.id_name
id_table name
1 toto
2 toto
3 toto
4 titi
5 titi
6 titi
7 tutu
8 tutu
9 tutu