Does anyone know if there's an efficient way to extract all the words from a single column and count the frequency of each word in SQL Server? I only have read-only access to my database so I can't create a self-defined function to do this.
Here's a reproducible example:
CREATE TABLE words
(
id INT PRIMARY KEY,
text_column VARCHAR(1000)
);
INSERT INTO words (id, text_column)
VALUES
(1, 'SQL Server is a popular database management system'),
(2, 'It is widely used for data storage and retrieval'),
(3, 'SQL Server is a powerful tool for data analysis');
I have found this code but it's not working correctly, and I think it's too complicated to understand:
WITH E1(N) AS
(
SELECT 1
FROM (VALUES
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)
) t(N)
),
E2(N) AS (SELECT 1 FROM E1 a CROSS JOIN E1 b),
E4(N) AS (SELECT 1 FROM E2 a CROSS JOIN E2 b)
SELECT
LOWER(x.Item) AS [Word],
COUNT(*) AS [Counts]
FROM
(SELECT * FROM words) a
CROSS APPLY
(SELECT
ItemNumber = ROW_NUMBER() OVER(ORDER BY l.N1),
Item = LTRIM(RTRIM(SUBSTRING(a.text_column, l.N1, l.L1)))
FROM
(SELECT
s.N1,
L1 = ISNULL(NULLIF(CHARINDEX(' ',a.text_column,s.N1),0)-s.N1,4000)
FROM
(SELECT 1
UNION ALL
SELECT t.N+1
FROM
(SELECT TOP (ISNULL(DATALENGTH(a.text_column)/2,0))
ROW_NUMBER() OVER (ORDER BY (SELECT NULL))
FROM E4) t(N)
WHERE SUBSTRING(a.text_column ,t.N,1) = ' '
) s(N1)
) l(N1, L1)
) x
WHERE
x.item <> ''
AND x.Item NOT IN ('0o', '0s', '3a', '3b', '3d', '6b', '6o', 'a', 'a1', 'a2', 'a3', 'a4', 'ab', 'able', 'about', 'above', 'abst', 'ac', 'accordance', 'according', 'accordingly', 'across', 'act', 'actually', 'ad', 'added', 'adj', 'ae', 'af', 'affected', 'affecting', 'affects', 'after', 'afterwards', 'ag', 'again', 'against', 'ah', 'ain', 'ain''t', 'aj', 'al', 'all', 'allow', 'allows', 'almost', 'alone', 'along', 'already', 'also', 'although', 'always', 'am', 'among', 'amongst', 'amoungst', 'amount', 'an', 'and', 'announce', 'another', 'any', 'anybody', 'anyhow', 'anymore', 'anyone', 'anything', 'anyway', 'anyways', 'anywhere', 'ao', 'ap', 'apart', 'apparently', 'appear', 'appreciate', 'appropriate', 'approximately', 'ar', 'are', 'aren', 'arent', 'aren''t', 'arise', 'around', 'as', 'a''s', 'aside', 'ask', 'asking', 'associated', 'at', 'au', 'auth', 'av', 'available', 'aw', 'away', 'awfully', 'ax', 'ay', 'az', 'b', 'b1', 'b2', 'b3', 'ba', 'back', 'bc', 'bd', 'be', 'became', 'because', 'become', 'becomes', 'becoming', 'been', 'before', 'beforehand', 'begin', 'beginning', 'beginnings', 'begins', 'behind', 'being', 'believe', 'below', 'beside', 'besides', 'best', 'better', 'between', 'beyond', 'bi', 'bill', 'biol', 'bj', 'bk', 'bl', 'bn', 'both', 'bottom', 'bp', 'br', 'brief', 'briefly', 'bs', 'bt', 'bu', 'but', 'bx', 'by', 'c', 'c1', 'c2', 'c3', 'ca', 'call', 'came', 'can', 'cannot', 'cant', 'can''t', 'cause', 'causes', 'cc', 'cd', 'ce', 'certain', 'certainly', 'cf', 'cg', 'ch', 'changes', 'ci', 'cit', 'cj', 'cl', 'clearly', 'cm', 'c''mon', 'cn', 'co', 'com', 'come', 'comes', 'con', 'concerning', 'consequently', 'consider', 'considering', 'contain', 'containing', 'contains', 'corresponding', 'could', 'couldn', 'couldnt', 'couldn''t', 'course', 'cp', 'cq', 'cr', 'cry', 'cs', 'c''s', 'ct', 'cu', 'currently', 'cv', 'cx', 'cy', 'cz', 'd', 'd2', 'da', 'date', 'dc', 'dd', 'de', 'definitely', 'describe', 'described', 'despite', 'detail', 'df', 'di', 'did', 'didn', 'didn''t', 'different', 'dj', 'dk', 'dl', 'do', 'does', 'doesn', 'doesn''t', 'doing', 'don', 'done', 'don''t', 'down', 'downwards', 'dp', 'dr', 'ds', 'dt', 'du', 'due', 'during', 'dx', 'dy', 'e', 'e2', 'e3', 'ea', 'each', 'ec', 'ed', 'edu', 'ee', 'ef', 'effect', 'eg', 'ei', 'eight', 'eighty', 'either', 'ej', 'el', 'eleven', 'else', 'elsewhere', 'em', 'empty', 'en', 'end', 'ending', 'enough', 'entirely', 'eo', 'ep', 'eq', 'er', 'es', 'especially', 'est', 'et', 'et-al', 'etc', 'eu', 'ev', 'even', 'ever', 'every', 'everybody', 'everyone', 'everything', 'everywhere', 'ex', 'exactly', 'example', 'except', 'ey', 'f', 'f2', 'fa', 'far', 'fc', 'few', 'ff', 'fi', 'fifteen', 'fifth', 'fify', 'fill', 'find', 'fire', 'first', 'five', 'fix', 'fj', 'fl', 'fn', 'fo', 'followed', 'following', 'follows', 'for', 'former', 'formerly', 'forth', 'forty', 'found', 'four', 'fr', 'from', 'front', 'fs', 'ft', 'fu', 'full', 'further', 'furthermore', 'fy', 'g', 'ga', 'gave', 'ge', 'get', 'gets', 'getting', 'gi', 'give', 'given', 'gives', 'giving', 'gj', 'gl', 'go', 'goes', 'going', 'gone', 'got', 'gotten', 'gr', 'greetings', 'gs', 'gy', 'h', 'h2', 'h3', 'had', 'hadn', 'hadn''t', 'happens', 'hardly', 'has', 'hasn', 'hasnt', 'hasn''t', 'have', 'haven', 'haven''t', 'having', 'he', 'hed', 'he''d', 'he''ll', 'hello', 'help', 'hence', 'her', 'here', 'hereafter', 'hereby', 'herein', 'heres', 'here''s', 'hereupon', 'hers', 'herself', 'hes', 'he''s', 'hh', 'hi', 'hid', 'him', 'himself', 'his', 'hither', 'hj', 'ho', 'home', 'hopefully', 'how', 'howbeit', 'however', 'how''s', 'hr', 'hs', 'http', 'hu', 'hundred', 'hy', 'i', 'i2', 'i3', 'i4', 'i6', 'i7', 'i8', 'ia', 'ib', 'ibid', 'ic', 'id', 'i''d', 'ie', 'if', 'ig', 'ignored', 'ih', 'ii', 'ij', 'il', 'i''ll', 'im', 'i''m', 'immediate', 'immediately', 'importance', 'important', 'in', 'inasmuch', 'inc', 'indeed', 'index', 'indicate', 'indicated', 'indicates', 'information', 'inner', 'insofar', 'instead', 'interest', 'into', 'invention', 'inward', 'io', 'ip', 'iq', 'ir', 'is', 'isn', 'isn''t', 'it', 'itd', 'it''d', 'it''ll', 'its', 'it''s', 'itself', 'iv', 'i''ve', 'ix', 'iy', 'iz', 'j', 'jj', 'jr', 'js', 'jt', 'ju', 'just', 'k', 'ke', 'keep', 'keeps', 'kept', 'kg', 'kj', 'km', 'know', 'known', 'knows', 'ko', 'l', 'l2', 'la', 'largely', 'last', 'lately', 'later', 'latter', 'latterly', 'lb', 'lc', 'le', 'least', 'les', 'less', 'lest', 'let', 'lets', 'let''s', 'lf', 'like', 'liked', 'likely', 'line', 'little', 'lj', 'll', 'll', 'ln', 'lo', 'look', 'looking', 'looks', 'los', 'lr', 'ls', 'lt', 'ltd', 'm', 'm2', 'ma', 'made', 'mainly', 'make', 'makes', 'many', 'may', 'maybe', 'me', 'mean', 'means', 'meantime', 'meanwhile', 'merely', 'mg', 'might', 'mightn', 'mightn''t', 'mill', 'million', 'mine', 'miss', 'ml', 'mn', 'mo', 'more', 'moreover', 'most', 'mostly', 'move', 'mr', 'mrs', 'ms', 'mt', 'mu', 'much', 'mug', 'must', 'mustn', 'mustn''t', 'my', 'myself', 'n', 'n2', 'na', 'name', 'namely', 'nay', 'nc', 'nd', 'ne', 'near', 'nearly', 'necessarily', 'necessary', 'need', 'needn', 'needn''t', 'needs', 'neither', 'never', 'nevertheless', 'new', 'next', 'ng', 'ni', 'nine', 'ninety', 'nj', 'nl', 'nn', 'no', 'nobody', 'non', 'none', 'nonetheless', 'noone', 'nor', 'normally', 'nos', 'not', 'noted', 'nothing', 'novel', 'now', 'nowhere', 'nr', 'ns', 'nt', 'ny', 'o', 'oa', 'ob', 'obtain', 'obtained', 'obviously', 'oc', 'od', 'of', 'off', 'often', 'og', 'oh', 'oi', 'oj', 'ok', 'okay', 'ol', 'old', 'om', 'omitted', 'on', 'once', 'one', 'ones', 'only', 'onto', 'oo', 'op', 'oq', 'or', 'ord', 'os', 'ot', 'other', 'others', 'otherwise', 'ou', 'ought', 'our', 'ours', 'ourselves', 'out', 'outside', 'over', 'overall', 'ow', 'owing', 'own', 'ox', 'oz', 'p', 'p1', 'p2', 'p3', 'page', 'pagecount', 'pages', 'par', 'part', 'particular', 'particularly', 'pas', 'past', 'pc', 'pd', 'pe', 'per', 'perhaps', 'pf', 'ph', 'pi', 'pj', 'pk', 'pl', 'placed', 'please', 'plus', 'pm', 'pn', 'po', 'poorly', 'possible', 'possibly', 'potentially', 'pp', 'pq', 'pr', 'predominantly', 'present', 'presumably', 'previously', 'primarily', 'probably', 'promptly', 'proud', 'provides', 'ps', 'pt', 'pu', 'put', 'py', 'q', 'qj', 'qu', 'que', 'quickly', 'quite', 'qv', 'r', 'r2', 'ra', 'ran', 'rather', 'rc', 'rd', 're', 'readily', 'really', 'reasonably', 'recent', 'recently', 'ref', 'refs', 'regarding', 'regardless', 'regards', 'related', 'relatively', 'research', 'research-articl', 'respectively', 'resulted', 'resulting', 'results', 'rf', 'rh', 'ri', 'right', 'rj', 'rl', 'rm', 'rn', 'ro', 'rq', 'rr', 'rs', 'rt', 'ru', 'run', 'rv', 'ry', 's', 's2', 'sa', 'said', 'same', 'saw', 'say', 'saying', 'says', 'sc', 'sd', 'se', 'sec', 'second', 'secondly', 'section', 'see', 'seeing', 'seem', 'seemed', 'seeming', 'seems', 'seen', 'self', 'selves', 'sensible', 'sent', 'serious', 'seriously', 'seven', 'several', 'sf', 'shall', 'shan', 'shan''t', 'she', 'shed', 'she''d', 'she''ll', 'shes', 'she''s', 'should', 'shouldn', 'shouldn''t', 'should''ve', 'show', 'showed', 'shown', 'showns', 'shows', 'si', 'side', 'significant', 'significantly', 'similar', 'similarly', 'since', 'sincere', 'six', 'sixty', 'sj', 'sl', 'slightly', 'sm', 'sn', 'so', 'some', 'somebody', 'somehow', 'someone', 'somethan', 'something', 'sometime', 'sometimes', 'somewhat', 'somewhere', 'soon', 'sorry', 'sp', 'specifically', 'specified', 'specify', 'specifying', 'sq', 'sr', 'ss', 'st', 'still', 'stop', 'strongly', 'sub', 'substantially', 'successfully', 'such', 'sufficiently', 'suggest', 'sup', 'sure', 'sy', 'system', 'sz', 't', 't1', 't2', 't3', 'take', 'taken', 'taking', 'tb', 'tc', 'td', 'te', 'tell', 'ten', 'tends', 'tf', 'th', 'than', 'thank', 'thanks', 'thanx', 'that', 'that''ll', 'thats', 'that''s', 'that''ve', 'the', 'their', 'theirs', 'them', 'themselves', 'then', 'thence', 'there', 'thereafter', 'thereby', 'thered', 'therefore', 'therein', 'there''ll', 'thereof', 'therere', 'theres', 'there''s', 'thereto', 'thereupon', 'there''ve', 'these', 'they', 'theyd', 'they''d', 'they''ll', 'theyre', 'they''re', 'they''ve', 'thickv', 'thin', 'think', 'third', 'this', 'thorough', 'thoroughly', 'those', 'thou', 'though', 'thoughh', 'thousand', 'three', 'throug', 'through', 'throughout', 'thru', 'thus', 'ti', 'til', 'tip', 'tj', 'tl', 'tm', 'tn', 'to', 'together', 'too', 'took', 'top', 'toward', 'towards', 'tp', 'tq', 'tr', 'tried', 'tries', 'truly', 'try', 'trying', 'ts', 't''s', 'tt', 'tv', 'twelve', 'twenty', 'twice', 'two', 'tx', 'u', 'u201d', 'ue', 'ui', 'uj', 'uk', 'um', 'un', 'under', 'unfortunately', 'unless', 'unlike', 'unlikely', 'until', 'unto', 'uo', 'up', 'upon', 'ups', 'ur', 'us', 'use', 'used', 'useful', 'usefully', 'usefulness', 'uses', 'using', 'usually', 'ut', 'v', 'va', 'value', 'various', 'vd', 've', 've', 'very', 'via', 'viz', 'vj', 'vo', 'vol', 'vols', 'volumtype', 'vq', 'vs', 'vt', 'vu', 'w', 'wa', 'want', 'wants', 'was', 'wasn', 'wasnt', 'wasn''t', 'way', 'we', 'wed', 'we''d', 'welcome', 'well', 'we''ll', 'well-b', 'went', 'were', 'we''re', 'weren', 'werent', 'weren''t', 'we''ve', 'what', 'whatever', 'what''ll', 'whats', 'what''s', 'when', 'whence', 'whenever', 'when''s', 'where', 'whereafter', 'whereas', 'whereby', 'wherein', 'wheres', 'where''s', 'whereupon', 'wherever', 'whether', 'which', 'while', 'whim', 'whither', 'who', 'whod', 'whoever', 'whole', 'who''ll', 'whom', 'whomever', 'whos', 'who''s', 'whose', 'why', 'why''s', 'wi', 'widely', 'will', 'willing', 'wish', 'with', 'within', 'without', 'wo', 'won', 'wonder', 'wont', 'won''t', 'words', 'world', 'would', 'wouldn', 'wouldnt', 'wouldn''t', 'www', 'x', 'x1', 'x2', 'x3', 'xf', 'xi', 'xj', 'xk', 'xl', 'xn', 'xo', 'xs', 'xt', 'xv', 'xx', 'y', 'y2', 'yes', 'yet', 'yj', 'yl', 'you', 'youd', 'you''d', 'you''ll', 'your', 'youre', 'you''re', 'yours', 'yourself', 'yourselves', 'you''ve', 'yr', 'ys', 'yt', 'z', 'zero', 'zi', 'zz')
GROUP BY x.Item
ORDER BY COUNT(*) DESC
Here's the result of the above code, as you can see it's not counting correctly:
Word Counts
server 2
sql 2
data 1
database 1
popular 1
powerful 1
Can anyone help on this? Would be really appreciated!
You can make use of String_split here, such as
select value Word, Count(*) Counts
from words
cross apply String_Split(text_column, ' ')
where value not in(exclude list)
group by value
order by counts desc;
You should should the string_split function -- like this
SELECT id, value as aword
FROM words
CROSS APPLY STRING_SPLIT(text_column, ',');
This will create a table with all the words by id -- to get the count do this:
SELECT aword, count(*) as counts
FROM (
SELECT id, value as aword
FROM words
CROSS APPLY STRING_SPLIT(text_column, ',');
) x
GROUP BY aword
You may need to lower case the LOWER(text_column) if you want it to not matter
If you don't have access to STRING_SPLIT function, you can use weird xml trick to convert space to a word node and then shred it with nodes function:
select word, COUNT(*)
from (
select n.value('.', 'nvarchar(50)') AS word
from (
VALUES
(1, 'SQL Server is a popular database management system'),
(2, 'It is widely used for data storage and retrieval'),
(3, 'SQL Server is a powerful tool for data analysis')
) AS t (id, txt)
CROSS APPLY (
SELECT CAST('<x>' + REPLACE(txt, ' ', '</x><x>') + '</x>' AS XML) x
) x
CROSS APPLY x.nodes('x') z(n)
) w
GROUP BY word
Of course, this will fail on "bad" words and invalid xml-characters but it can be worked on. Text processing has never been SQL Server's strong-point though, so probably better to use some NLP library to do this kind of stuff
I am attempting to build a dynamic query based on a list of tables as received from a user.
I have a couple attempted solutions below.
I think the CTE is the way to go but am having difficulty figuring out how to make it happen.
I would really appreciate whatever genius can let me know how to do this!
these are the tables:
W, WD, WE, WSF, WSFE, XDF, XDFE, Y, YD, Z, ZD
these are the columns to join by for each group of tables:
W, Y, WD, WE, WSF
WID
WSF, WSFE, XDF
WSFID
XDF, XDFE
XDFID
Y, YD, Z
YID
Z, ZD
ZID
if the user selects W, Y, Z then build this query (which could then be executed by exec or sp_executesql):
select * from #W w join #Y y on y.WID = w.WID join #Z z on z.YID = y.YID
declare #Fields table (
ID int identity not NULL,
Name varchar(200)
)
declare #Tables table (
ID int identity not NULL,
Field varchar(200),
TempTable varchar(200)
)
declare #QueryTables table (
ID int identity not NULL,
[Table] varchar(200),
Alias varchar(20)
)
declare #QueryJoins table (
ID int identity not NULL,
Table1 varchar(20),
Col1 varchar(200),
Table2 varchar(20),
Col2 varchar(200)
)
insert #Fields
values
('W'),
('Y'),
('Z')
insert #Tables
values
('W', '#W'),
('WD', '#WD'),
('WE', '#WE'),
('WSF', '#WSF'),
('WSFE', '#WSFE'),
('XDF', '#XDF'),
('XDFE', '#XDFE'),
('Y', '#Y'),
('YD', '#YD'),
('Z', '#Z'),
('ZD', '#ZD')
insert #QueryTables
values
('#W', 'w'),
('#WD', 'wd'),
('#WE', 'we'),
('#WSF', 'wsf'),
('#WSFE', 'wsfe'),
('#XDF', 'xdf'),
('#XDFE', 'xdfe'),
('#Y', 'y'),
('#YD', 'yd'),
('#Z', 'z'),
('#ZD', 'zd')
insert #QueryJoins
values
('w', 'WID', 'wd', 'WID'),
('w', 'WID', 'we', 'WID'),
('w', 'WID', 'wsf', 'WID'),
('w', 'WID', 'xdf', 'WID'),
('w', 'WID', 'y', 'WID'),
('wd', 'WID', 'w', 'WID'),
('we', 'WID', 'wd', 'WID'),
('wsf', 'WID', 'wd', 'WID'),
('wsf', 'WSFID', 'wsfe', 'WSFID'),
('wsfe', 'WSFID', 'wsf', 'WSFID'),
('wsf', 'WSFID', 'xdf', 'WSFID'),
('xdf', 'WID', 'w', 'WID'),
('xdf', 'WSFID', 'wsf', 'WSFID'),
('xdf', 'XDFID', 'xdfe', 'XDFID'),
('xdfe', 'XDFID', 'xdf', 'XDFID'),
('y', 'WID', 'w', 'WID'),
('y', 'YID', 'yd', 'YID'),
('yd', 'YID', 'y', 'YID'),
('y', 'YID', 'z', 'YID'),
('z', 'YID', 'y', 'YID'),
('z', 'ZID', 'zd', 'ZID'),
('zd', 'ZID', 'z', 'ZID')
--attempted solution number 1:
select
*
from #Fields vf
join #Tables vt
on vt.Field = vf.Name
join #QueryTables vqt
on vqt.[Table] = vt.TempTable
join #QueryJoins vqj
on vqj.Table1 = vqt.Alias
join #QueryTables vqt2
on vqt2.Alias = vqj.Table2
join #Tables vt2
on vt2.TempTable = vqt2.[Table]
join #Fields vf2
on vf2.Name = vt2.Field
--attempted solution number 2:
;with cte (FieldID, [Table], Table1, Col1, Table2, Col2, I) as (
select
vf.ID as FieldID,
vqt.[Table],
vqj.Table1,
vqj.Col1,
vqj.Table2,
vqj.Col2,
1
from #Fields vf
join #Tables vt
on vt.Field = vf.Name
join #QueryTables vqt
on vqt.[Table] = vt.TempTable
join #QueryJoins vqj
on vqj.Table1 = vqt.Alias
union all
select
vf.ID as FieldID,
vqt.[Table],
vqj.Table1,
vqj.Col1,
vqj.Table2,
vqj.Col2,
I + 1
from #Fields vf
join #Tables vt
on vt.Field = vf.Name
join #QueryTables vqt
on vqt.[Table] = vt.TempTable
join #QueryJoins vqj
on vqj.Table1 = vqt.Alias
join cte cte
on cte.Table1 = vqj.Table2
and cte.Table2 = vqj.Table1
where I <= FieldID --a feeble attempt to control the recursion
)
select * from cte
An interesting problem, definitely a data modeling code smell (if Z, Y, and YD all have a YID column, sounds like you need a bridge table or similar construct to manage the relationships there. Or if Z and YD are small lookups, just join them all and let the optimizer handle the overhead.)
Note: I made the fields self join on Name < Name, so you only need the QueryJoins data where Table1 < Table2. This would cut your QueryJoins table in half, but you're also missing some of the "correct" joins (ie you have we/wd but not wd/we)
declare #Fields table (
ID int identity not NULL,
Name varchar(200)
)
declare #Tables table (
ID int identity not NULL,
Field varchar(200),
TempTable varchar(200)
)
declare #QueryTables table (
ID int identity not NULL,
[Table] varchar(200),
Alias varchar(20)
)
declare #QueryJoins table (
ID int identity not NULL,
Table1 varchar(20),
Col1 varchar(200),
Table2 varchar(20),
Col2 varchar(200)
)
insert #Fields
values
('W'),
('Y'),
('Z')
insert #Tables
values
('W', '#W'),
('WD', '#WD'),
('WE', '#WE'),
('WSF', '#WSF'),
('WSFE', '#WSFE'),
('XDF', '#XDF'),
('XDFE', '#XDFE'),
('Y', '#Y'),
('YD', '#YD'),
('Z', '#Z'),
('ZD', '#ZD')
insert #QueryTables
values
('#W', 'w'),
('#WD', 'wd'),
('#WE', 'we'),
('#WSF', 'wsf'),
('#WSFE', 'wsfe'),
('#XDF', 'xdf'),
('#XDFE', 'xdfe'),
('#Y', 'y'),
('#YD', 'yd'),
('#Z', 'z'),
('#ZD', 'zd')
insert #QueryJoins
values
('w', 'WID', 'wd', 'WID'),
('w', 'WID', 'we', 'WID'),
('w', 'WID', 'wsf', 'WID'),
('w', 'WID', 'xdf', 'WID'),
('w', 'WID', 'y', 'WID'),
('wd', 'WID', 'w', 'WID'),
('we', 'WID', 'wd', 'WID'),
('wsf', 'WID', 'wd', 'WID'),
('wsf', 'WSFID', 'wsfe', 'WSFID'),
('wsfe', 'WSFID', 'wsf', 'WSFID'),
('wsf', 'WSFID', 'xdf', 'WSFID'),
('xdf', 'WID', 'w', 'WID'),
('xdf', 'WSFID', 'wsf', 'WSFID'),
('xdf', 'XDFID', 'xdfe', 'XDFID'),
('xdfe', 'XDFID', 'xdf', 'XDFID'),
('y', 'YID', 'yd', 'YID'),
('yd', 'YID', 'y', 'YID'),
('y', 'YID', 'z', 'YID'),
('z', 'ZID', 'zd', 'ZID'),
('zd', 'ZID', 'z', 'ZID')
;
with a as (
select
row_number() over (order by Name) as rn, Name, Field, TempTable, [Table], Alias
from #Fields vf
join #Tables vt
on vt.Field = vf.Name
join #QueryTables vqt
on vqt.[Table] = vt.TempTable )
select 'select * from ' + stuff((
select
concat(
case when a.rn =1 then a.TempTable else '' end,
' ',
case when a.rn =1 then a.Alias else '' end,
' join ',
a2.TempTable,
' ' ,
a2.alias,
' on ',
q.Table1,
'.',
q.Col1,
' = ',
q.Table2 ,
'.',
q.Col2 ) from a
left join a a2
on a.name < a2.name
inner join #QueryJoins q
on q.Table1 = a.alias
and q.Table2 = a2.alias
for xml path('')), 1, 1, '')
In the Enrollment_Changes table, the phone model listed is the phone the subscriber changed FROM on that date.
If there is no subsequent change on Enrollment_Changes, the phone the subscriber changed TO is listed on the P_Enrollment table
For example, subscriber 12345678 enrolled on 1/5/2011 with a RAZR. On 11/1/2011 he changed FROM the RAZR. You can see what he changed TO with the next transaction on Enrollment_Changes on 05/19/2012.
How would you find the Count of subs that first enrolled with the iPhone 3?
Here is the code I have for creating the tables
Create Tables: TBL 1
USE [Test2]
GO
/****** Object: Table [dbo].[P_ENROLLMENT] ******/
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE TABLE [dbo].[P_ENROLLMENT](
[Subid ] [float] NULL,
[Enrollment_Date] [datetime] NULL,
[Channel] [nvarchar](255) NULL,
[Region] [nvarchar](255) NULL,
[Active_Status] [float] NULL,
[Drop_Date] [datetime] NULL,
[Phone_Model] [nvarchar](255) NULL
) ON [PRIMARY]
GO
TBL 2
USE [Test2]
GO
/****** Object: Table [dbo].[ENROLLMENT_CHANGES] ******/
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE TABLE [dbo].[ENROLLMENT_CHANGES](
[Subid] [float] NULL,
[Cdate] [datetime] NULL,
[Phone_Model] [nvarchar](255) NULL
) ON [PRIMARY]
GO
Insert TBL1
INSERT INTO [P_ENROLLMENT]([Subid ], [Enrollment_Date], [Channel], [Region], [Active_Status], [Drop_Date], [Phone_Model])
VALUES(12345678, '2011-01-05 00:00:00', 'Retail', 'Southeast', 1, NULL, 'iPhone 4');
INSERT INTO [P_ENROLLMENT]([Subid ], [Enrollment_Date], [Channel], [Region], [Active_Status], [Drop_Date], [Phone_Model])
VALUES(12346178, '2011-03-13 00:00:00', 'Indirect Dealers', 'West', 1, NULL, 'HTC Hero');
INSERT INTO [P_ENROLLMENT]([Subid ], [Enrollment_Date], [Channel], [Region], [Active_Status], [Drop_Date], [Phone_Model])
VALUES(12346679, '2011-05-19 00:00:00', 'Indirect Dealers', 'Southeast', 0, '2012-03-15 00:00:00', 'Droid 2');
INSERT INTO [P_ENROLLMENT]([Subid ], [Enrollment_Date], [Channel], [Region], [Active_Status], [Drop_Date], [Phone_Model])
VALUES(12347190, '2011-07-25 00:00:00', 'Retail', 'Northeast', 0, '2012-05-21 00:00:00', 'iPhone 4');
INSERT INTO [P_ENROLLMENT]([Subid ], [Enrollment_Date], [Channel], [Region], [Active_Status], [Drop_Date], [Phone_Model])
VALUES(12347701, '2011-08-14 00:00:00', 'Indirect Dealers', 'West', 1, NULL, 'HTC Hero');
INSERT INTO [P_ENROLLMENT]([Subid ], [Enrollment_Date], [Channel], [Region], [Active_Status], [Drop_Date], [Phone_Model])
VALUES(12348212, '2011-09-30 00:00:00', 'Retail', 'West', 1, NULL, 'Droid 2');
INSERT INTO [P_ENROLLMENT]([Subid ], [Enrollment_Date], [Channel], [Region], [Active_Status], [Drop_Date], [Phone_Model])
VALUES(12348723, '2011-10-20 00:00:00', 'Retail', 'Southeast', 1, NULL, 'Southeast');
INSERT INTO [P_ENROLLMENT]([Subid ], [Enrollment_Date], [Channel], [Region], [Active_Status], [Drop_Date], [Phone_Model])
VALUES(12349234, '2012-01-06 00:00:00', 'Indirect Dealers', 'West', 0, '2012-02-14 00:00:00', 'West');
INSERT INTO [P_ENROLLMENT]([Subid ], [Enrollment_Date], [Channel], [Region], [Active_Status], [Drop_Date], [Phone_Model])
VALUES(12349745, '2012-01-26 00:00:00', 'Retail', 'Northeast', 0, '2012-04-15 00:00:00', 'HTC Hero');
INSERT INTO [P_ENROLLMENT]([Subid ], [Enrollment_Date], [Channel], [Region], [Active_Status], [Drop_Date], [Phone_Model])
VALUES(12350256, '2012-02-11 00:00:00', 'Retail', 'Southeast', 1, NULL, 'iPhone 4');
INSERT INTO [P_ENROLLMENT]([Subid ], [Enrollment_Date], [Channel], [Region], [Active_Status], [Drop_Date], [Phone_Model])
VALUES(12350767, '2012-03-02 00:00:00', 'Indirect Dealers', 'West', 1, NULL, 'Sidekick');
INSERT INTO [P_ENROLLMENT]([Subid ], [Enrollment_Date], [Channel], [Region], [Active_Status], [Drop_Date], [Phone_Model])
VALUES(12351278, '2012-04-18 00:00:00', 'Retail', 'Midwest', 1, NULL, 'iPhone 3');
INSERT INTO [P_ENROLLMENT]([Subid ], [Enrollment_Date], [Channel], [Region], [Active_Status], [Drop_Date], [Phone_Model])
VALUES(12351789, '2012-05-08 00:00:00', 'Indirect Dealers', 'West', 0, '2012-07-04 00:00:00', 'iPhone 3');
INSERT INTO [P_ENROLLMENT]([Subid ], [Enrollment_Date], [Channel], [Region], [Active_Status], [Drop_Date], [Phone_Model])
VALUES(12352300, '2012-06-24 00:00:00', 'Retail', 'Midwest', 1, NULL, 'Droid 2');
INSERT INTO [P_ENROLLMENT]([Subid ], [Enrollment_Date], [Channel], [Region], [Active_Status], [Drop_Date], [Phone_Model])
VALUES(12352811, '2012-06-25 00:00:00', 'Retail', 'Southeast', 1, NULL, 'Sidekick');
Insert TBL2
INSERT INTO [ENROLLMENT_CHANGES]([Subid], [Cdate], [Phone_Model])
VALUES(12345678, '2011-11-01 00:00:00', 'RAZR');
INSERT INTO [ENROLLMENT_CHANGES]([Subid], [Cdate], [Phone_Model])
VALUES(12346178, '2012-01-07 00:00:00', 'HTC Hero');
INSERT INTO [ENROLLMENT_CHANGES]([Subid], [Cdate], [Phone_Model])
VALUES(12348723, '2012-01-28 00:00:00', 'RAZR');
INSERT INTO [ENROLLMENT_CHANGES]([Subid], [Cdate], [Phone_Model])
VALUES(12350256, '2012-02-21 00:00:00', 'Blackberry Bold');
INSERT INTO [ENROLLMENT_CHANGES]([Subid], [Cdate], [Phone_Model])
VALUES(12349745, '2012-05-05 00:00:00', 'HTC Hero');
INSERT INTO [ENROLLMENT_CHANGES]([Subid], [Cdate], [Phone_Model])
VALUES(12345678, '2012-05-19 00:00:00', 'Palm Pre');
INSERT INTO [ENROLLMENT_CHANGES]([Subid], [Cdate], [Phone_Model])
VALUES(12347190, '2012-05-20 00:00:00', 'HTC Hero');
INSERT INTO [ENROLLMENT_CHANGES]([Subid], [Cdate], [Phone_Model])
VALUES(12350256, '2012-05-21 00:00:00', 'Blackberry Bold');
INSERT INTO [ENROLLMENT_CHANGES]([Subid], [Cdate], [Phone_Model])
VALUES(12349234, '2012-06-04 00:00:00', 'Palm Pre');
INSERT INTO [ENROLLMENT_CHANGES]([Subid], [Cdate], [Phone_Model])
VALUES(12346178, '2012-06-05 00:00:00', 'iPhone 3');
INSERT INTO [ENROLLMENT_CHANGES]([Subid], [Cdate], [Phone_Model])
VALUES(12350767, '2012-06-10 00:00:00', 'iPhone 3');
For the count
select COUNT(*) Total
from
(
select e.*,
rn = row_number() over (partition by e.subid order by c.cdate desc),
first_model = coalesce(c.phone_model, e.phone_model)
from [P_ENROLLMENT] e
left join [ENROLLMENT_CHANGES] c on c.subid = e.subid
) x
where rn=1 and first_model = 'iPhone 3'
For all the records
select *
from
(
select e.*,
rn = row_number() over (partition by e.subid order by c.cdate desc),
first_model = coalesce(c.phone_model, e.phone_model)
from [P_ENROLLMENT] e
left join [ENROLLMENT_CHANGES] c on c.subid = e.subid
) x
where rn=1 and first_model = 'iPhone 3'
order by subid
You want to know if the first record in the table is an iPhone 3. Something like this:
select count(*)
from (select e.*,
row_number() over (partition by subid order by enrollment_date) as seqnum
from p_enrollment e
) e
where seqnum = 1 and phone_model = 'iPhone 3'
Perhaps I'm thinking too simply, but wouldn't either of the following do what you're looking for?:
SELECT Phone_Model
, COUNT(*) AS Initially_Enrolled
FROM p_enrollment
GROUP BY Phone_Model
(working SQLFiddle: http://sqlfiddle.com/#!3/68258/4)
or
SELECT COUNT(*) AS Initially_Enrolled
FROM p_enrollment
WHERE Phone_Model = 'iPhone 3'
(working SQLFiddle: http://sqlfiddle.com/#!3/68258/3)
Since you only want initial enrollment, the ENROLLMENT_CHANGES table is irrelevant.