SQL query to calculate average and insert into a table - sql

I'm working on a song archive database and I'm stuck on some queries. I would like to -
Calculate the rating of each user by their average Comments value of score and inserting the rating into Users
Calculating how much Purchases each user has
Calculate the average score of a Song from the Comments table
Calculating how many credits each user has spent on their purchases
Below you can find my tables...
CREATE TABLE Users
(
username NVARCHAR( 30 ) NOT NULL PRIMARY KEY,
pass NVARCHAR( 16 ),
email NVARCHAR( 50 ),
city NVARCHAR( 10 ),
credits INT,
rating INT
)
CREATE TABLE Songs
(
song_id INT NOT NULL IDENTITY ( 1, 1 ) PRIMARY KEY,
song_name NVARCHAR( 30 ),
username NVARCHAR( 30 ),
genre INT,
price INT,
song_length INT,
listens INT
)
CREATE TABLE Genres
(
genre_id INT NOT NULL IDENTITY ( 1, 1 ) PRIMARY KEY,
genre_name NVARCHAR( 16 )
)
CREATE TABLE Purchases
(
purchase_id INT NOT NULL IDENTITY ( 1, 1 ) PRIMARY KEY,
song_id INT,
username NVARCHAR( 30 )
date_purchased DATETIME
)
CREATE TABLE Comments
(
comment_id INT NOT NULL IDENTITY ( 1, 1 ) PRIMARY KEY,
username NVARCHAR( 30 ),
song_id INT,
text NVARCHAR( 30 ),
score INT
)

I answered some of your questions. In addition to the respective queries I arranged them as common table expressions, which I think could be a convenient way to use them...
Calculating how much credits each user has spent on his purchases, might require to know your logic about how users invest their credits.
WITH CTE_PurchasesByUser AS
(
SELECT p.username as username, count(*) as NrOfPurchases
FROM Purchases p
GROUP BY p.username
),
CTE_AverageScoreBySong AS
(
SELECT c.song_id as song_id, (sum(c.score)/count(c.score)) as AverageScore
FROM Comments c
GROUP BY c.song_id
),
CTE_AverageScoreByUser AS
(
SELECT u.username as username, (sum(c.score)/count(c.score)) as AverageScore
FROM Users u
INNER JOIN Comments c ON u.username = c.username
GROUP BY u.username
)
SELECT u.*, ISNULL(bbu.NrOfPurchases,0), asu.AverageScore
FROM Users u
LEFT JOIN CTE_PurchasesByUser bbu ON u.username = bbu.username
LEFT JOIN CTE_AverageScoreByUser asu ON u.username = asu.username
This SQL ran with your tables, yet I didn't test it with data rows...

Related

Column 'Users.Name' is invalid in the select list because it is not contained in either an aggregate function or the GROUP BY clause

I have the following tables:
create table User
(
Id int not null primary key clustered (Id),
Name nvarchar(255) not null
)
create table dbo.UserSkill
(
UserId int not null,
SkillId int not null,
primary key clustered (UserId, SkillId)
)
Given a set of Skills Ids I need to get the users that have all these Skills Ids:
select Users.*
from Users
inner join UserSkills on Users.Id = UserSkills.UserId
where UserSkills.SkillId in (149, 305)
group by Users.Id
having count(*) = 2
I get the following error:
Column 'Users.Name' is invalid in the select list because it is not contained in either an aggregate function or the GROUP BY clause.
What am I missing?
Side questions:
Is there a faster query to accomplish the same result?
How can I pass the SkillsIds, e.g. (149, 305) as a parameter? And set the #SkillsIds count in having count(*) = 2 instead of 2?
UPDATE
The following code is working and I get the User John.
declare #Users table
(
Id int not null primary key clustered (Id),
[Name] nvarchar(255) not null
);
declare #Skills table
(
SkillId int not null primary key clustered (SkillId)
);
declare #UserSkills table
(
UserId int not null,
SkillId int not null,
primary key clustered (UserId, SkillId)
);
insert into #Users
values (1, 'John'), (2, 'Mary');
insert into #Skills
values (148), (149), (304), (305);
insert into #UserSkills
values (1, 149), (1, 305), (2, 148), (2, 149);
select u.Id, u.Name
from #Users as u
inner join #UserSkills as us on u.Id = us.UserId
where us.SkillId in (149, 305)
group by u.Id, u.Name
having count(*) = 2
If user has 40 columns, is there a way to not enumerate all the columns in the Select and Group By since Id is the only column needed to group?
First, your tables are broken, unless Name has only a single character. You need a length:
create table User (
UserId int not null primary key clustered (Id),
Name nvarchar(255) not null
);
Always use a length when specifying char(), varchar(), and related types in SQL Server.
For your query, SQL Server, is not going to process select * with group by. List each column in both the select and group by:
select u.id, u.name
from Users u join
UserSkills us
on u.Id = us.UserId
where us.SkillId in (149, 305)
group by u.Id, u.name
having count(*) = 2;

SQL Query Problems With Counting Frequency

I've been trying to list all yelp users who haven't reviewed any businesses but have provided at least 2 comments on other user's reviews for the following table:
But I've been having some issues. These issues mainly are derived from my attempts to count the elements listed as a varchar. For example, the questions states that I need to return users who have commented on atleast two other user's reviews. Currently I have the List_Of_Comments stored as a varchar with the characters looking like the following: "Y3, Y2". How am I supposed to determine how often a user posts a comment through a varchar?This is what I have so far:
SELECT U.YELP_ID FROM REVIEWS R, YELP_USER U
WHERE R.Author = U.YELP_ID AND R.Author = NULL AND R.Number_Of_Comments >= 2;
Assuming the following tables:
CREATE TABLE REVIEWS (
REVIEW_ID VARCHAR(3),
Stars INT,
Author VARCHAR(3),
Publish_Date VARCHAR(22),
BUSSINESS_ID VARCHAR(3),
List_Of_Comments VARCHAR(7),
Number_Of_Comments INT
);
CREATE TABLE YELP_USER (
YELP_ID VARCHAR(3),
Email VARCHAR(17),
First_Name VARCHAR(8),
Last_Name VARCHAR(17),
DOB DATE,
BirthPlace VARCHAR(3),
Gender VARCHAR(1),
Friendlist VARCHAR(9),
Complimented_Friendlist VARCHAR(6),
Checkedin_Businesses VARCHAR(36)
);
If anyone could help me figure this out I would greatly appreciate it. I've been stuck on this for hours. Thanks!
To answer what I think you are asking... How to count the number of entries in a comma separated list:
Oracle Setup:
INSERT INTO REVIEWS VALUES ( 1, 1, 'A1', DATE '2016-02-02', 'B1', 'C1,C2', NULL );
INSERT INTO REVIEWS VALUES ( 2, 1, 'A2', DATE '2016-02-01', 'B1', 'C3', NULL );
INSERT INTO REVIEWS VALUES ( 3, 1, 'A3', DATE '2016-02-01', 'B1', NULL, NULL );
Query:
SELECT REVIEW_ID,
COALESCE( REGEXP_COUNT( List_of_comments, '[^,]+' ), 0 ) AS Number_of_comments
FROM REVIEWS;
Results:
REVIEW_ID NUMBER_OF_COMMENTS
--------- ------------------
1 2
2 1
3 0
A better solution:
Storing it how you are doing with a VARCHAR2(7) column for a list of comments will only allow you to store, at most, 4 comment IDs (if each ID is a single character).
It would be better to move them to their own tables using something like:
CREATE TABLE REVIEW_COMMENTS (
COMMENT_ID NUMBER(8,0) PRIMARY KEY,
REVIEW_ID VARCHAR2(3) REFERENCES REVIEWS( REVIEW_ID ),
YELP_ID VARCHAR2(3) REFERENCES YELP_USER( YELP_ID ),
COMMENT_VALUE VARCHAR2(140)
);
COMMENT ON TABLE REVIEW_COMMENTS IS 'The comments on a review by a user.';
COMMENT ON COLUMN REVIEW_COMMENTS( COMMENT_ID ) IS 'A unique identifier for the comment by a user on a review.';
COMMENT ON COLUMN REVIEW_COMMENTS( REVIEW_ID ) IS 'The identifier for the review the comment was left against.';
COMMENT ON COLUMN REVIEW_COMMENTS( YELP_ID ) IS 'The identifier for the user who left the comment.';
COMMENT ON COLUMN REVIEW_COMMENTS( COMMENT_VALUE ) IS 'The text of the comment.';
Also, do not store dates as VARCHAR2 column.
Your data structure only allows 2 comments for each review (at least 2 chars for each review, plus two commas. Any other comment will not fit in 7 chars), but assuming that's what you want, you could try to get all the users that are not in the review table
... from yelp_users yu
where not exists in (select 1 from reviews r where r.author = yu.yelp_id)
and his id is on the list of comments. I would search it using instr:
and exists (
select 1
from reviews r
where instr(',' || r.list_of_comments || ',', ',' || yu.yelp_id || ',' , 1, 1) > 0)
I've concatenated ',' to avoid you the case where you look for Y1 and end up getting a false positive when Y11 is the one that commented.
As your goal is to get users who commented at least twice, you could move the review table to from and put all the SQL in a subquery, grouping the user id on the external SQL.
=)

Tag Cloud based on weighted usaage

I'd like to create a weighted usage ranking / popularity query (or batch update, if the query proves to strenuous for real-time use!) but I've been drawing a blank. Hopefully you'll have a better idea as to how to do this.
I've simplified my database to help illustrate the problem (see diagram, below!) Basically, when a User selects a specific Blog via a Tag, I add an entry to the TagLog table. Assume for this example that the collection of Blogs and Tags remain static. Assuming the above, I'd like to do the following:
Find the Top 10 Blogs for any given Tag
Find the Top 10 Blogs for any given Tag and User
The real difficulty comes from the fact that I'd like to weight the results such that more recent TagLog entries have greater significance.
Any help in this regard would be greatly appreciated! Thanks...
This should get you headed somewhere useful:
-- Sample data.
declare #Blogs as Table ( BlogId Int Identity, URL VarChar(256) )
insert into #Blogs ( URL ) values
( 'www.google.com' ), ( 'www.java.com' )
declare #Tags as Table ( TagId Int Identity, BlogId Int, Tag VarChar(64) )
insert into #Tags ( BlogId, Tag ) values
( 1, 'Not Evil' ), ( 2, 'Buggy' )
declare #TagLog as Table ( TagId Int, UserGuid UniqueIdentifier, Visited DateTime )
insert into #TagLog ( TagId, UserGuid, Visited ) values
( 1, NewId(), '20130502' ), ( 1, NewId(), '20130508' ), ( 1, NewId(), '20130515' ),
( 2, NewId(), '20130501' ), ( 2, NewId(), '20130508' ), ( 2, NewId(), '20130515' )
declare #Now as DateTime = '20130516' -- Test value.
-- Display all sample data.
select *, DateDiff( day, TL.Visited, #Now ) as Age -- Use appropriate units, e.g. week, minute.
from #Blogs as B inner join
#Tags as T on T.BlogId = B.BlogId inner join
#TagLog as TL on TL.TagId = T.TagId
-- Compute a weight based on age.
-- Use the reciprocal of the age so that newer visits have higher weight.
-- Add 1.0 to avoid divide by zero errors.
select T.TagId, Count( 42 ) as Visits, Sum( 1.0 / ( DateDiff( day, TL.Visited, #Now ) + 1.0 ) ) as AgeWeight
from #Blogs as B inner join
#Tags as T on T.BlogId = B.BlogId inner join
#TagLog as TL on TL.TagId = T.TagId
group by T.TagId

How to create a stored procedure to find cliques in the table of connections between users

Loooking for a way to retrieve community from a large dataset I came across an article about the algorithm which seems to be apropriate for large datasets. Anyway the data is stored two tables: users (nodes) and connections and I would like to retrieve the communities by pure sql queries without help of custom applications (I'm using SQL Server 2008).
The algorithm to retrieve the cliques is the following:
Read the graph G
Generate set neighbors(v) for every vertex of G
for each vertex v of G
call recursive_find_cliques(v, neighbors(v))
end for
Function recursive_find_cliques(x, n)
for each vertex t ∈ n by ascending order calculate set sigma
if sigma is not empty
extend x with t
call recursive_find_cliques(x, sigma)
end if
end for
where sigma is the set of vertices that could constitute triangles with v and its neighbors.
I already created a stored procedure which returns a table of neighbors of selected node but so far I haven't delat with sql functions and advanced queries so the question is the following:
Does anyone know how to rewrite the
algorithm above in sql in order to get
the set of cliques? As the question
might be a little abstract, I may
point out that the main problem is to
create a recursive function
(recursive_find_cliques(x, n)) which
takes a table (n) as an argument).
Thank you!
EDIT:
Here is sthe stored procedure created so far:
CREATE PROCEDURE [dbo].[Peamc_Test]
AS
BEGIN
SET XACT_ABORT ON
BEGIN TRAN
SET NOCOUNT ON;
CREATE TABLE #Users
(
UserId int NOT NULL,
userLabel varchar(50) PRIMARY KEY NOT NULL,
Observed bit NOT NULL
)
CREATE TABLE #Neighbors
(
UserId int NOT NULL,
userLabel varchar(50) NOT NULL PRIMARY KEY,
Retrieved bit NOT NULL
)
CREATE TABLE #ConnectedVertices
(
UserId int NOT NULL,
userLabel varchar(50) NOT NULL PRIMARY KEY,
)
CREATE TABLE #Cliques
(
CliqueId int NOT NULL,
UserId varchar(50) NOT NULL,
)
DECLARE #UsersCount int
DECLARE #ii int
DECLARE #User varchar(50)
DECLARE #NeighborsCount int
INSERT INTO #Users(UserId, userLabel, Observed) SELECT user_id, userLabel, 0 FROM dbo.test_users WHERE user_id IS NOT NULL
SELECT #UsersCount = COUNT(*) FROM #Users
SELECT #ii = 1
WHILE #ii <= #UsersCount
BEGIN
--select user
SELECT TOP 1 #User = userLabel FROM #Users WHERE Observed = 0 ORDER BY UserId
UPDATE #Users SET Observed = 1 WHERE userLabel = #User
--Get user's neighbors
DELETE FROM #Neighbors
INSERT INTO #Neighbors(UserId, userLabel, Retrieved)
SELECT u.user_id, t2.neighbor, 0 FROM ( SELECT CALLING_NEIGHBORS.neighbor FROM ( SELECT mc.calling_party AS neighbor FROM monthly_connections_test mc WHERE mc.called_party = #User) AS CALLING_NEIGHBORS INNER JOIN (SELECT mc.called_party AS neighbor FROM monthly_connections_test mc WHERE mc.calling_party = #User) AS CALLED_NEIGHBORS ON CALLING_NEIGHBORS.neighbor = CALLED_NEIGHBORS.neighbor) AS t2 INNER JOIN test_users u ON t2.neighbor = u.userLabel
SELECT #NeighborsCount = COUNT(*) FROM #Neighbors
SELECT #ii = #ii + 1
--HERE the function recursive_find_cliques has to search for cliques and insert the found ones in #cliques
END
SELECT * FROM #Cliques
END
It does'not return anything yet as it is not finished. It though retrieves all neighbors for the currently selected nodes and the next step is to implement recursive_find_cliques function.
I realised that my first answer only works when each clique has at least one user who is not referred to by any others in that clique. In other words, closed cliques like A-B, B-C, C-A will not be found.
Here is a solution which solves this. Again we have users with IDs, now 1..20. There are several cases of neighbouring relations that need to be handled:
Compared to the simple case, it is harder to find a unique starter for each clique.
We achieve this with a little sleight of hand:
Reorder the neighbours so that for all references A-B, A is less than B, ignoring any A=B.
From these, remove any A-X references if there are any X-A, which could cause a loop. This will never remove references to A completely because X-A remains and A-X will be added in the recursion.
The resultant set are the 'starting' users and we use them to prime the CTE:
-- Get all pairs, where UserA < UserB, dropping any A=B or B=A
WITH LRNeighbours(A, B) AS (
SELECT
Neighbours.UserA, Neighbours.UserB
FROM
Neighbours
WHERE
Neighbours.UserA < Neighbours.UserB
UNION ALL
SELECT DISTINCT
Neighbours.UserB, Neighbours.UserA
FROM
Neighbours
WHERE
Neighbours.UserA > Neighbours.UserB
),
-- Isolate those that are not referred to by a higher numbered key
Starters(userid) AS (
SELECT DISTINCT
A
FROM
LRNeighbours
WHERE
A NOT IN (
SELECT
B
FROM
LRNeighbours
)
),
-- The recursive Common Table Expression
cliques(userid, clique) AS (
-- Number starters 1..N
SELECT
userid, ROW_NUMBER() OVER(ORDER BY userid) AS clique
FROM
Starters
UNION ALL
-- Recurse, adding users referred by siblings, avoiding starters themselves
SELECT
B, clique
FROM
LRNeighbours INNER JOIN
cliques ON
LRNeighbours.A = cliques.userid
AND B NOT IN (
SELECT
userid
FROM
starters
)
)
SELECT DISTINCT
clique, userid
FROM
cliques
ORDER BY
clique, userid
Results:
1 1
1 2
2 3
2 4
3 5
3 6
3 7
3 8
4 9
4 10
4 11
4 12
4 13
5 14
5 15
5 16
5 17
5 18
5 19
5 20
CREATE TABLE [dbo].[Users](
[UserID] [int] IDENTITY(1,1) NOT NULL,
[UserName] [varchar](50) NOT NULL
) ON [PRIMARY]
CREATE TABLE [dbo].[Neighbours](
[UserA] [int] NOT NULL,
[UserB] [int] NOT NULL
) ON [PRIMARY]
Users populated with 1..8 and Neighbours
UserA UserB
1 2
2 3
4 5
4 6
5 7
7 8
Then:
WITH cliques(userid, clique) AS (
SELECT
userid, ROW_NUMBER() OVER(ORDER BY userid) AS clique
FROM
Users
WHERE
users.UserID NOT IN (
SELECT
UserB
FROM
Neighbours
)
UNION ALL
SELECT
Neighbours.UserB, clique
FROM
neighbours
INNER JOIN cliques
ON Neighbours.UserA = cliques.userid
)
SELECT
clique, cliques.userid
FROM
cliques
ORDER BY
clique, userid
Result:
clique userid
1 1
1 2
1 3
2 4
2 5
2 6
2 7
2 8
See : Recursive Queries Using Common Table Expressions
I've added a two LABELS and two GOTO statements
CREATE PROCEDURE [dbo].[Peamc_Test]
AS
BEGIN
SET XACT_ABORT ON
BEGIN TRAN
SET NOCOUNT ON;
CREATE TABLE #Users
(
UserId int NOT NULL,
userLabel varchar(50) PRIMARY KEY NOT NULL,
Observed bit NOT NULL
)
CREATE TABLE #Neighbors
(
UserId int NOT NULL,
userLabel varchar(50) NOT NULL PRIMARY KEY,
Retrieved bit NOT NULL
)
CREATE TABLE #ConnectedVertices
(
UserId int NOT NULL,
userLabel varchar(50) NOT NULL PRIMARY KEY,
)
CREATE TABLE #Cliques
(
CliqueId int NOT NULL,
UserId varchar(50) NOT NULL,
)
DECLARE #UsersCount int
DECLARE #ii int
DECLARE #User varchar(50)
DECLARE #NeighborsCount int
INSERT INTO #Users(UserId, userLabel, Observed) SELECT user_id, userLabel, 0 FROM dbo.test_users WHERE user_id IS NOT NULL
SELECT #UsersCount = COUNT(*) FROM #Users
SELECT #ii = 1
WHILE #ii <= #UsersCount
BEGIN
--select user
SELECT TOP 1 #User = userLabel FROM #Users WHERE Observed = 0 ORDER BY UserId
UPDATE #Users SET Observed = 1 WHERE userLabel = #User
--Get user's neighbors
DELETE FROM #Neighbors
INSERT INTO #Neighbors(UserId, userLabel, Retrieved)
SELECT u.user_id, t2.neighbor, 0 FROM ( SELECT CALLING_NEIGHBORS.neighbor FROM ( SELECT mc.calling_party AS neighbor FROM monthly_connections_test mc WHERE mc.called_party = #User) AS CALLING_NEIGHBORS INNER JOIN (SELECT mc.called_party AS neighbor FROM monthly_connections_test mc WHERE mc.calling_party = #User) AS CALLED_NEIGHBORS ON CALLING_NEIGHBORS.neighbor = CALLED_NEIGHBORS.neighbor) AS t2 INNER JOIN test_users u ON t2.neighbor = u.userLabel
SELECT #NeighborsCount = COUNT(*) FROM #Neighbors
SELECT #ii = #ii + 1
GOTO Clique_Find
--HERE the function recursive_find_cliques has to search for cliques and insert the found ones in #cliques
--------------------
Clique_Return:
--------------------
END
SELECT * FROM #Cliques
END
--------------------
Clique_Find:
--------------------
-- Code goes here
-- Code goes here
-- Code goes here
-- Code goes here
-- Code goes here
-- Code goes here
GOTO Clique_Return

How do I tally votes in MySQL?

I've got a database table called votes with three columns 'timestamp', 'voter', and 'voted_for'.
Each entry in the table represents one vote. I want to tally all of the votes for each 'voted_for' with some conditions.
The conditions are as follows:
Each voter can vote only once, in the case of multiple votes by a single voter the most recent vote counts.
Only votes made before a specified time are counted.
try this:
SELECT voted_for, count(*)
FROM votes v
INNER JOIN (SELECT Voter, Max(timestamp) as lastTime from votes group by Voter) A
on A.Voter = v.voter and a.lasttime = v.timestamp
WHERE timestamp < {date and time of last vote allowed}
Group by voted_for
the following may prove helpful:
drop table if exists users;
create table users
(
user_id int unsigned not null auto_increment primary key,
username varbinary(32) not null,
unique key users_username_idx(username)
)engine=innodb;
insert into users (username) values
('f00'),('foo'),('bar'),('bAr'),('bish'),('bash'),('bosh');
drop table if exists picture;
create table picture
(
picture_id int unsigned not null auto_increment primary key,
user_id int unsigned not null, -- owner of the picture, the user who uploaded it
tot_votes int unsigned not null default 0, -- total number of votes
tot_rating int unsigned not null default 0, -- accumulative ratings
avg_rating decimal(5,2) not null default 0, -- tot_rating / tot_votes
key picture_user_idx(user_id)
)engine=innodb;
insert into picture (user_id) values
(1),(2),(3),(4),(5),(6),(7),(1),(1),(2),(3),(6),(7),(7),(5);
drop table if exists picture_vote;
create table picture_vote
(
picture_id int unsigned not null,
user_id int unsigned not null,-- voter
rating tinyint unsigned not null default 0, -- rating 0 to 5
primary key (picture_id, user_id)
)engine=innodb;
delimiter #
create trigger picture_vote_before_ins_trig before insert on picture_vote
for each row
proc_main:begin
declare total_rating int unsigned default 0;
declare total_votes int unsigned default 0;
if exists (select 1 from picture_vote where
picture_id = new.picture_id and user_id = new.user_id) then
leave proc_main;
end if;
select tot_rating + new.rating, tot_votes + 1 into total_rating, total_votes
from picture where picture_id = new.picture_id;
-- counts/stats
update picture set
tot_votes = total_votes,
tot_rating = total_rating,
avg_rating = total_rating / total_votes
where picture_id = new.picture_id;
end proc_main #
delimiter ;
insert into picture_vote (picture_id, user_id, rating) values
(1,1,5),(1,2,3),(1,3,3),(1,4,2),(1,5,1),
(2,1,1),(2,2,2),(2,3,3),(2,4,4),(2,5,5),(2,6,1),(2,7,2),
(3,1,5),(3,2,5),(3,3,5),(3,4,5),(3,5,5),(3,6,5),(3,7,5);
select * from users order by user_id;
select * from picture order by picture_id;
select * from picture_vote order by picture_id, user_id;
SELECT voted_for,COUNT(DISTINCT voter)
FROM votes
WHERE timestamp < '2010-11-18 21:05:00'
GROUP BY voted_for