Postgresql Crosstab with Array row_name - sql

I have the following SQL statement. The inner query ('SELECT ARRAY...ORDER BY 1,2') works correctly and gives the correct totals for each row_name. When I run the crosstab, the result is incorrect. Changing the 'ORDER BY' in the innner query doesn't seem to change its result, but changes the outer query result. I have verified the types match for crosstab(text,text) for column headings.
SELECT
ct.row_name[1:2] AS zonenumber,
sum(ct.amount1) AS "sumEmploymentamount",
sum(ct.amount3) AS "sumExport_Consumersamount"
FROM output.crosstab('
SELECT
ARRAY[
zonenumber::text,
comTypes.commodity_type_name::text,
year_run::text
] as row_name,
tab.activity_type_id as attribute,
amount as value
FROM
output.all_zonalmakeuse_3 tab,
output.activity_numbers actNums,
output.activity_types actTypes,
output.commodity_numbers comNums,
output.commodity_types comTypes
WHERE
scenario = ''S03'' AND year_run = ''2005'' AND
amount != ''-Infinity'' AND moru = ''M'' AND
actNums.activity_type_id = ActTypes.activity_type_id AND
tab.activity = actNums.activitynumber AND
comNums.commodity_type_id = comTypes.commodity_type_id AND
tab.commodity = comNums.commoditynumber AND
(
comTypes.commodity_type_name =''Financial''OR
comNums.commodity = ''Financial'' OR
comTypes.commodity_type_name =''Goods''OR
comNums.commodity = ''Goods''
) AND
(
actTypes.activity_type_name =''Employment'' OR
actNums.activity = ''Employment'' OR
actTypes.activity_type_name =''Export Consumers'' OR
actNums.activity = ''Export Consumers''
)
ORDER BY 1,2
'::text, '
SELECT activity_type_id AS activity
FROM output.activity_types
WHERE activity_type_id = 1 OR activity_type_id = 3
'::text
) ct (row_name text[], amount1 double precision, amount3 double precision)
GROUP BY ct.row_name[1:2]
ORDER BY ct.row_name[1:2]::text;
Tables
CREATE TABLE activity_numbers
("activitynumber" int, "activity" varchar(46), "activity_type_id" int)
;
INSERT INTO activity_numbers
("activitynumber", "activity", "activity_type_id")
VALUES
(0, '"AI01AgMinMan"', 1),
(1, '"AI02AgMinProd"', 1),
(2, '"AI03ConMan"', 1),
(3, '"AI04ConProd"', 1),
(4, '"AI05MfgMan"', 1),
(5, '"AI06MfgProd"', 1),
(6, '"AI07TCUMan"', 1),
(7, '"AI08TCUProd"', 1),
(8, '"AI09Whole"', 1),
(9, '"AI10Retail"', 1),
(10, '"AI11FIRE"', 1),
(11, '"AI12PTSci"', 1),
(12, '"AI13ManServ"', 1),
(13, '"AI14PBSOff"', 1),
(14, '"AI15PBSRet"', 1),
(15, '"AI16PSInd"', 1),
(16, '"AI17Religion"', 1),
(17, '"AI18BSOnsite"', 1),
(18, '"AI19PSOnsite"', 1);
CREATE TABLE activity_types
("activity_type_id" int, "activity_type_name" varchar(18))
;
INSERT INTO activity_types
("activity_type_id", "activity_type_name")
VALUES
(1, '"Employment"'),
(2, '"Households"'),
(3, '"Export Consumers"')
;
CREATE TABLE commodity_numbers
("commoditynumber" int, "commodity" varchar(29), "commodity_type_id" int)
;
INSERT INTO commodity_numbers
("commoditynumber", "commodity", "commodity_type_id")
VALUES
(0, '"CG01AgMinDirection"', 1),
(1, '"CG02AgMinOutput"', 1),
(2, '"CG03ConDirection"', 1),
(3, '"CG04ConOutput"', 1),
(4, '"CG05MfgDirection"', 1),
(5, '"CG06MfgOutput"', 1),
(6, '"CS07TCUDirection"', 2),
(7, '"CS08TCUOutput"', 2),
(8, '"CS09WsOutput"', 2),
(9, '"CS10RetailOutput"', 2),
(10, '"CS11FIREOutput"', 2),
(11, '"CS13OthServOutput"', 2),
(12, '"CS14HealthOutput"', 2),
(13, '"CS15GSEdOutput"', 2),
(14, '"CS16HiEdOutput"', 2),
(15, '"CS17GovOutput"', 2),
(16, '"CF18TaxReceipts"', 4),
(17, '"CF19GovSupReceipts"', 4),
(18, '"CF20InvestReceipts"', 4),
(19, '"CF21ReturnInvestReceipts"', 4),
(20, '"CF22CapitalTransferReceipts"', 4)
;
CREATE TABLE commodity_types
("commodity_type_id" int, "commodity_type_name" varchar(23))
;
INSERT INTO commodity_types
("commodity_type_id", "commodity_type_name")
VALUES
(1, '"Goods"'),
(4, '"Financial"')
;
CREATE TABLE all_zonalmakeuse_3
("year_run" int, "scenario" varchar(6), "activity" int, "zonenumber" int, "commodity" int, "moru" varchar(3), "amount" numeric, "activity_type_id" int, "commodity_type_id" int)
;
INSERT INTO all_zonalmakeuse_3
("year_run", "scenario", "activity", "zonenumber", "commodity", "moru", "amount", "activity_type_id", "commodity_type_id")
VALUES
(2005, '"C11a"', 0, 1, 0, '"M"', 1752708.30900861, 1, 1),
(2005, '"C11a"', 0, 3, 0, '"M"', 2785972.97039016, 1, 1),
(2005, '"C11a"', 0, 4, 0, '"M"', 3847879.45910403, 1, 1),
(2005, '"C11a"', 1, 1, 1, '"M"', 26154618.3893068, 1, 1),
(2005, '"C11a"', 1, 3, 1, '"M"', 1663.49609248196, 1, 1),
(2005, '"C11a"', 1, 4, 1, '"M"', 91727.9065950723, 1, 1),
(2005, '"C11a"', 1, 1, 5, '"M"', 855899.319689473, 1, 1),
(2005, '"C11a"', 1, 3, 5, '"M"', 54.4372375336784, 1, 1),
(2005, '"C11a"', 1, 4, 5, '"M"', 3001.75868302327, 1, 1),
(2005, '"C11a"', 2, 1, 2, '"M"', 150885191.664482, 1, 1),
(2005, '"C11a"', 2, 2, 2, '"M"', 99242746.1181359, 1, 1),
(2005, '"C11a"', 2, 3, 2, '"M"', 90993266.1879518, 1, 1),
(2005, '"C11a"', 2, 4, 2, '"M"', 60169908.2975819, 1, 1),
(2005, '"C11a"', 3, 1, 3, '"M"', 642982844.104623, 1, 1),
(2005, '"C11a"', 3, 2, 3, '"M"', 421379496.576106, 1, 1),
(2005, '"C11a"', 3, 3, 3, '"M"', 592125233.320609, 1, 1),
(2005, '"C11a"', 3, 4, 3, '"M"', 400206994.693349, 1, 1),
(2005, '"C11a"', 4, 1, 4, '"M"', 449206658.578704, 1, 1),
(2005, '"C11a"', 4, 2, 4, '"M"', 103823580.173348, 1, 1),
(2005, '"C11a"', 4, 3, 4, '"M"', 181300924.388112, 1, 1),
(2005, '"C11a"', 4, 4, 4, '"M"', 143113096.547075, 1, 1),
(2005, '"C11a"', 5, 1, 1, '"M"', 83889.8852772168, 1, 1),
(2005, '"C11a"', 5, 2, 1, '"M"', 25716.5837854808, 1, 1),
(2005, '"C11a"', 5, 3, 1, '"M"', 10243.7021847824, 1, 1),
(2005, '"C11a"', 5, 4, 1, '"M"', 22406.3296935502, 1, 1),
(2005, '"C11a"', 5, 1, 5, '"M"', 408669650.696034, 1, 1),
(2005, '"C11a"', 5, 2, 5, '"M"', 125278360.769936, 1, 1),
(2005, '"C11a"', 5, 3, 5, '"M"', 49902204.2985933, 1, 1),
(2005, '"C11a"', 5, 4, 5, '"M"', 109152455.018677, 1, 1),
(2005, '"C11a"', 5, 1, 20, '"M"', 161822.743734245, 1, 4),
(2005, '"C11a"', 5, 2, 20, '"M"', 49607.031096612, 1, 4),
(2005, '"C11a"', 5, 3, 20, '"M"', 19759.998336631, 1, 4),
(2005, '"C11a"', 5, 4, 20, '"M"', 43221.5842952059, 1, 4),
(2005, '"C11a"', 7, 1, 1, '"M"', 122316.017730318, 1, 1),
(2005, '"C11a"', 7, 2, 1, '"M"', 20514.5008361246, 1, 1),
(2005, '"C11a"', 7, 3, 1, '"M"', 8431.33094615992, 1, 1),
(2005, '"C11a"', 7, 4, 1, '"M"', 75842.631567318, 1, 1),
(2005, '"C11a"', 13, 1, 5, '"M"', 1195626.97941868, 1, 1),
(2005, '"C11a"', 13, 2, 5, '"M"', 567002.352487648, 1, 1),
(2005, '"C11a"', 13, 3, 5, '"M"', 1104908.87426762, 1, 1),
(2005, '"C11a"', 13, 4, 5, '"M"', 1071325.74253601, 1, 1),
(2005, '"C11a"', 17, 1, 1, '"M"', 751648.370711072, 1, 1),
(2005, '"C11a"', 17, 2, 1, '"M"', 340439.936040081, 1, 1),
(2005, '"C11a"', 17, 3, 1, '"M"', 800477.767008582, 1, 1),
(2005, '"C11a"', 17, 4, 1, '"M"', 489745.223392316, 1, 1),
(2005, '"C11a"', 17, 1, 20, '"M"', 3154907.39011312, 1, 4),
(2005, '"C11a"', 17, 2, 20, '"M"', 1428934.74123601, 1, 4),
(2005, '"C11a"', 17, 3, 20, '"M"', 3359859.9041298, 1, 4),
(2005, '"C11a"', 17, 4, 20, '"M"', 2055616.54193613, 1, 4),
(2005, '"C11a"', 18, 1, 20, '"M"', 2088003.66854949, 1, 4),
(2005, '"C11a"', 18, 2, 20, '"M"', 1310122.52506653, 1, 4),
(2005, '"C11a"', 18, 3, 20, '"M"', 1481450.29636847, 1, 4),
(2005, '"C11a"', 18, 4, 20, '"M"', 3035710.53213605, 1, 4)
;
I have manipulated the query in several ways (changed type casting, order by, etc), but always get incorrect values. The row and column headers are at least consistently correct.

Related

matplotlib scatterplot - only a few labels are displayed on x axis

While plotting using scatterplot in matplotlib, I find some of the values from x-axis are missing in the labels. I want to have all the x-axis legends to be displayed in the graph.
This might be related to tick spacing but I am not sure how to set it to display all the x-axis values.
In the sample code, I want to have all the dates displayed on x-axis
x = [datetime.date(2019, 6, 16), datetime.date(2019, 6, 17), datetime.date(2019, 6, 18), datetime.date(2019, 6, 19),
datetime.date(2019, 6, 20), datetime.date(2019, 6, 21), datetime.date(2019, 6, 22), datetime.date(2019, 6, 23),
datetime.date(2019, 6, 24), datetime.date(2019, 6, 25), datetime.date(2019, 6, 26), datetime.date(2019, 6, 27),
datetime.date(2019, 6, 28), datetime.date(2019, 6, 29), datetime.date(2019, 6, 30), datetime.date(2019, 7, 1),
datetime.date(2019, 7, 2), datetime.date(2019, 7, 3), datetime.date(2019, 7, 4), datetime.date(2019, 7, 5),
datetime.date(2019, 7, 6), datetime.date(2019, 7, 7), datetime.date(2019, 7, 8), datetime.date(2019, 7, 9),
datetime.date(2019, 7, 10), datetime.date(2019, 7, 11), datetime.date(2019, 7, 12), datetime.date(2019, 7, 13),
datetime.date(2019, 7, 15)]
y = [0.15338331291011087, 0.15340904024033467, 0.1534195786228156, 0.15343290378685995, 0.15331644003478487,
0.1533570064827251, 0.1531156771286262, 0.15307150988142237, 0.15306137109205153, 0.15302301551230038,
0.15295889536607005, 0.15298157619113423, 0.15286883583977182, 0.15283539558962958, 0.15284508041253356,
0.15281542656182034, 0.1527844647725921, 0.15277054534676898, 0.1527339281127108, 0.15270419704783855,
0.15261812595095475, 0.15255120245035042, 0.15251650362641, 0.15257536163149088, 0.15253967278547242,
0.15249871561808356, 0.15248591103997422, 0.15242121840852002, 0.15248773465596907]
fig = plt.figure()
ax1 = fig.add_subplot(111)
ax1.scatter(x, y, s=10, c='b', marker="s", label='y')
plt.legend(loc='upper left')
plt.xticks(rotation=90)
plt.show()
Plot that I get with the sample code
Just pass the value of x in the plt.xticks() and set x-axis using 'plt.gcf' it will work.
I have create a random list for the x and plot the graph check it.
from matplotlib import pyplot as plt
from datetime import datetime
def std(a):
return datetime.strptime(a, '%Y, %m, %d').date()
x = [std('2019, 6, 16'), std('2019, 6, 17'), std('2019, 6, 18'), std('2019, 6, 19'),
std('2019, 6, 20'), std('2019, 6, 21'), std('2019, 6, 22'), std('2019, 6, 23'),
std('2019, 6, 24'), std('2019, 6, 25'), std('2019, 6, 26'), std('2019, 6, 27'),
std('2019, 6, 28'), std('2019, 6, 29'), std('2019, 6, 30'), std('2019, 7, 1'),
std('2019, 7, 2'), std('2019, 7, 3'), std('2019, 7, 4'), std('2019, 7, 5'),
std('2019, 7, 6'), std('2019, 7, 7'), std('2019, 7, 8'), std('2019, 7, 9'),
std('2019, 7, 10'), std('2019, 7, 11'), std('2019, 7, 12'), std('2019, 7, 13'),
std('2019, 7, 15')]
y = [0.15338331291011087, 0.15340904024033467, 0.1534195786228156, 0.15343290378685995, 0.15331644003478487,
0.1533570064827251, 0.1531156771286262, 0.15307150988142237, 0.15306137109205153, 0.15302301551230038,
0.15295889536607005, 0.15298157619113423, 0.15286883583977182, 0.15283539558962958, 0.15284508041253356,
0.15281542656182034, 0.1527844647725921, 0.15277054534676898, 0.1527339281127108, 0.15270419704783855,
0.15261812595095475, 0.15255120245035042, 0.15251650362641, 0.15257536163149088, 0.15253967278547242,
0.15249871561808356, 0.15248591103997422, 0.15242121840852002, 0.15248773465596907]
fig = plt.figure(figsize=(8,5))
ax1 = fig.add_subplot(111)
ax1.scatter(x, y, s=10, c='b', marker="s", label='y')
plt.legend(loc='upper left')
#plt.xticks(x,rotation=90)
#plt.xticks(range(len(x)))
plt.gca().margins(x=0)
plt.gcf().canvas.draw()
t_l = plt.gca().get_xticklabels()
maxsize = max([t.get_window_extent().width for t in t_l])
m = .2 # inch margin
s = maxsize/plt.gcf().dpi*len(x)+3*m
margin = m/plt.gcf().get_size_inches()[1]
plt.gcf().subplots_adjust(left=margin, right=0.8-margin)
plt.gcf().set_size_inches(s, plt.gcf().get_size_inches()[1])
plt.xticks(x,rotation=90)
plt.show()

SQL help for selecting most recent non-Null value for a unique plant

I have a SQL Server table with data on various factories (plants), with rows identified by a root plant ID, and a sub plant ID. The root ID is the same for the facility for its entire life. And the sub ID is added each time the plant data is changed with the regulatory agency.
Sometimes when the plant data was re-filed with the regulator, only the changed data was submitted, and other fields were left blank (Null).
I'm looking for an elegant way to write a query that will return all of the data from the most recent sub ID record, except that for Capacity, it will pull the most recent sub for which a non-Null Capacity was actually specified.
Assume that these are the fields in the Plant table:
RecordId (primary key)
RootId
SubId
Fuel
Capacity
Here is the SQL for selecting the data for the most recent SubId:
SELECT p1.* FROM Plant as p1
WHERE
p1.SubId = (
SELECT TOP 1 p2.SubId FROM Plant as p2
WHERE p1.RootId = p2.RootId
ORDER BY p2.SubId DESC)
I've been thinking about this for a while, but haven't come up with an approach. Even just a push in the right direction would be appreciated. Here is some SQL code to generate sample data:
CREATE TABLE Plant (
RecordId INTEGER PRIMARY KEY,
RootId VARCHAR(12) not null,
SubID INTEGER not null,
Fuel INTEGER not null,
Capacity DECIMAL(10,4)
);
INSERT INTO Plant
VALUES
(451, 'PLT03-39', 3, 1, 4399.67),
(471, 'PLT03-39', 4, 1, 4399.67),
(1809, 'PLT03-39', 5, 1, 4399.67),
(4888, 'PLT03-39', 6, 1, Null),
(6111, 'PLT03-39', 7, 1, Null),
(450, 'PLT03-40', 3, 1, 15531.67),
(472, 'PLT03-40', 4, 1, Null),
(1810, 'PLT03-40', 5, 1, 14767.61),
(4882, 'PLT03-40', 6, 1, Null),
(6113, 'PLT03-40', 7, 1, Null),
(454, 'PLT03-41', 5, 1, 23726.34),
(455, 'PLT03-41', 6, 1, 23726.34),
(469, 'PLT03-41', 7, 1, 23726.34),
(1807, 'PLT03-41', 8, 1, 22850.96),
(4884, 'PLT03-41', 9, 1, 22850.96),
(6110, 'PLT03-41', 10, 1, 22850.96),
(452, 'PLT03-42', 3, 1, 9120.65),
(470, 'PLT03-42', 4, 1, Null),
(1808, 'PLT03-42', 5, 1, 9120.65),
(4883, 'PLT03-42', 6, 1, 9120.65),
(6109, 'PLT03-42', 7, 1, Null),
(449, 'PLT03-43', 4, 1, 7923.96),
(474, 'PLT03-43', 5, 1, 7923.96),
(1811, 'PLT03-43', 6, 1, 7357.24),
(4881, 'PLT03-43', 7, 1, Null),
(5107, 'PLT03-43', 7, 1, 7711.44),
(5133, 'PLT03-43', 7, 1, Null),
(6112, 'PLT03-43', 8, 1, 7711.44),
(98, 'PLT05-25', 2, 18, 26.565),
(528, 'PLT05-25', 2, 18, 26033.7),
(139, 'PLT05-25', 2, 18, 26565),
(380, 'PLT05-25', 2, 18, Null),
(381, 'PLT05-25', 2, 18, 51854.88),
(7398, 'PLT06-143', 0, 18, 4091.01),
(4112, 'PLT06-143', 1, 18, 4091.01),
(5309, 'PLT06-143', 2, 18, 4091.01),
(73982, 'PLT06-143', 2, 18, 4091.01),
(73981, 'PLT06-143', 3, 18, Null),
(7397, 'PLT06-145', 0, 18, 4091.01),
(73971, 'PLT06-145', 1, 18, 4091.01),
(4109, 'PLT06-145', 1, 18, Null),
(5314, 'PLT06-145', 2, 18, 4091.01),
(73972, 'PLT06-145', 2, 18, Null),
(73973, 'PLT06-145', 3, 18, 4091.01),
(177, 'PLT06-342', 2, 1, 35420),
(1307, 'PLT06-342', 3, 1, 30360),
(5946, 'PLT06-342', 4, 1, 30360),
(6220, 'PLT06-342', 5, 1, Null),
(13264, 'PLT06-342', 6, 1, Null),
(1312, 'PLT06-344', 2, 1, 15180),
(5106, 'PLT06-344', 3, 1, 15180),
(5945, 'PLT06-344', 4, 1, 15180),
(6218, 'PLT06-344', 5, 1, Null),
(10550, 'PLT06-344', 6, 1, 10120),
(13271, 'PLT06-344', 7, 1, 10120),
(2724, 'PLT06-87', 2, 6, 143.451),
(5039, 'PLT06-87', 3, 6, 143.451),
(5886, 'PLT06-87', 4, 6, Null),
(10586, 'PLT06-87', 5, 6, 143.451),
(22759, 'PLT06-87', 6, 6, Null),
(158, 'PLT07-234', 1, 18, 21274.77),
(341, 'PLT07-234', 2, 18, 21274.77),
(7813, 'PLT07-234', 3, 18, 21274.77),
(24562, 'PLT07-234', 4, 18, Null),
(24584, 'PLT07-234', 4, 18, 2488.508),
(5965, 'PLT07-328', 2, 1, 19607.5),
(6073, 'PLT07-328', 2, 1, 19607.5),
(5996, 'PLT07-328', 2, 1, 19607.5),
(6644, 'PLT07-328', 3, 1, 19607.5),
(6701, 'PLT07-328', 3, 1, Null),
(7664, 'PLT07-328', 4, 1, Null),
(227, 'PLT07-39', 2, 18, 50347),
(1269, 'PLT07-39', 3, 18, 50258.45),
(1821, 'PLT07-39', 4, 18, 50258.45),
(1976, 'PLT07-39', 4, 18, 50258.45),
(5282, 'PLT07-39', 5, 18, Null),
(374, 'PLT08-25', 2, 18, 55331.1),
(135, 'PLT08-25', 2, 18, 30.36),
(134, 'PLT08-25', 2, 18, 56.925),
(533, 'PLT08-25', 2, 18, 55.7865),
(93, 'PLT08-25', 2, 18, 56.925),
(4081, 'PLT08-437', 1, 18, 5206.74),
(4241, 'PLT08-437', 2, 18, 5206.74),
(4242, 'PLT08-437', 3, 18, 5206.74),
(4532, 'PLT08-437', 4, 18, 4946.656),
(24344, 'PLT08-437', 5, 18, Null),
(460, 'PLT10-574', 0, 18, 198207.284),
(943, 'PLT10-574', 2, 18, 198207.284),
(1248, 'PLT10-574', 3, 18, 198207.284),
(2371, 'PLT10-574', 4, 18, 198207.284),
(6173, 'PLT10-574', 5, 18, 198207.284),
(17787, 'PLT10-574', 6, 18, 198207.284),
(23533, 'PLT10-574', 7, 18, 198207.284)
;
And here is the expected result of the query I'm seeking:
RecordId RootId SubId Fuel Capacity
6111 PLT03-39 7 1 4399.67
6113 PLT03-40 7 1 14767.61
6110 PLT03-41 10 1 22850.96
6109 PLT03-42 7 1 9120.65
6112 PLT03-43 8 1 7711.44
381 PLT05-25 2 18 51854.88
7398 PLT06-143 3 18 4091.01
7397 PLT06-145 3 18 4091.01
13264 PLT06-342 6 1 30360
13271 PLT06-344 7 1 10120
22759 PLT06-87 6 6 143.451
24584 PLT07-234 4 18 2488.508
7664 PLT07-328 4 1 19607.5
5282 PLT07-39 5 18 50258.45
93 PLT08-25 2 18 56.925
24344 PLT08-437 5 18 4946.656
23533 PLT10-574 7 18 198207.284
Below is one solution to this problem. I used a CTE and MAX aggregate to determine the latest RecordId for each RootId. After joining that back to the Plant table used an OUTER APPLY to retrieve the most recent capacity.
WITH LATEST AS
(
SELECT RootId, MAX(RecordId) AS RecordId
FROM Plant
GROUP BY RootId
)
SELECT
P.RecordId
, P.RootId
, P.SubID
, P.Fuel
, CAP.Capacity
FROM
LATEST AS L
JOIN Plant AS P
ON L.RecordId = P.RecordId
OUTER APPLY
(
SELECT TOP 1 Capacity
FROM Plant
WHERE RootId = P.RootId AND Capacity IS NOT NULL
ORDER BY SubID DESC
) AS CAP
ORDER BY
L.RootId

SQL syntax converting self JOIN to PARTITION BY

I can re-write this query
Query1:
SELECT t1.*, COUNT(t2.t_Name) AS CountMatches
FROM
Table1 t1
LEFT OUTER JOIN
(SELECT * FROM Table1) t2
ON t1.[t_Area] = t2.[t_Area]
AND t1.[t_Name] = t2.[t_Name]
AND t2.[t_Date] < t1.[t_Date]
AND t2.[t_Value] = '1'
WHERE t1.[t_Date] = '2018-01-01'
GROUP BY t1.[t_Date], t1.[t_Area], t1.[t_Name], t1.[t_Length], t1.[t_Value]
as the following
Query2:
SELECT t2.*
FROM
(
SELECT t1.*, SUM(CASE WHEN t1.[t_Value] = '1' THEN 1 ELSE 0 END)
OVER (PARTITION BY t1.[t_Area], t1.[t_Name]
ORDER BY t1.[t_Date] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS CountMatches
FROM Table1 t1
) t2
WHERE t2.[t_Date] = '2018-01-01'
I'm trying to add an additional term but struggling with the syntax
How do I add the following term to Query2 AND ABS(t1.[t_Length] - t2.[t_Length]) <= 1 ?
The equivalent self join query which gives desired result is as follows
SELECT t1.*, COUNT(t2.t_Name) AS CountMatches
FROM
Table1 t1
LEFT OUTER JOIN
(SELECT * FROM Table1) t2
ON t1.[t_Area] = t2.[t_Area]
AND t1.[t_Name] = t2.[t_Name]
AND t2.[t_Date] < t1.[t_Date]
AND t2.[t_Value] = '1'
AND ABS(t1.[t_Length] - t2.[t_Length]) <= 1 --(Need this term as well)
WHERE t1.[t_Date] = '2018-01-01'
GROUP BY t1.[t_Date], t1.[t_Area], t1.[t_Name], t1.[t_Length], t1.[t_Value]
Example SQLFiddle: http://sqlfiddle.com/#!18/f15ec/10
Desired output:
Sample Data:
CREATE TABLE Table1
([t_Date] datetime, [t_Area] varchar(10), [t_Name] varchar(10), [t_Length] int, [t_Value] int);
INSERT INTO Table1 ([t_Date], [t_Area], [t_Name], [t_Length], [t_Value])
VALUES
('2018-01-01 00:00:00.000',' Area6', 'Name1', 10, 6),
('2018-01-01 00:00:00.000',' Area5', 'Name2', 9, 2),
('2017-12-15 00:00:00.000',' Area6', 'Name1', 8, 5),
('2017-11-25 00:00:00.000',' Area14', 'Name2', 7, 5),
('2017-11-18 00:00:00.000',' Area6', 'Name1', 10, 2),
('2017-11-13 00:00:00.000',' Area3', 'Name2', 9, 8),
('2017-10-28 00:00:00.000',' Area6', 'Name1', 8, 1),
('2017-10-04 00:00:00.000',' Area2', 'Name1', 7, 2),
('2017-08-26 00:00:00.000',' Area4', 'Name1', 10, 3),
('2017-02-25 00:00:00.000',' Area7', 'Name1', 9, 8),
('2017-02-18 00:00:00.000',' Area12', 'Name1', 8, 5),
('2017-01-25 00:00:00.000',' Area18', 'Name2', 7, 2),
('2017-01-01 00:00:00.000',' Area5', 'Name2', 10, 2),
('2016-12-27 00:00:00.000',' Area7', 'Name1', 9, 1),
('2016-12-09 00:00:00.000',' Area6', 'Name1', 8, 5),
('2016-12-01 00:00:00.000',' Area16', 'Name2', 7, 2),
('2016-11-12 00:00:00.000',' Area6', 'Name1', 10, 1),
('2016-10-27 00:00:00.000',' Area24', 'Name2', 9, 8),
('2016-10-22 00:00:00.000',' Area6', 'Name1', 8, 7),
('2016-09-30 00:00:00.000',' Area13', 'Name2', 7, 2),
('2016-09-24 00:00:00.000',' Area19', 'Name1', 10, 1),
('2016-08-20 00:00:00.000',' Area21', 'Name2', 9, 3),
('2016-07-26 00:00:00.000',' Area21', 'Name2', 8, 1),
('2016-07-17 00:00:00.000',' Area26', 'Name2', 7, 2),
('2016-06-24 00:00:00.000',' Area4', 'Name1', 10, 3),
('2016-06-01 00:00:00.000',' Area4', 'Name1', 9, 2),
('2016-05-29 00:00:00.000',' Area15', 'Name2', 8, 8),
('2016-05-10 00:00:00.000',' Area25', 'Name1', 7, 3),
('2016-05-05 00:00:00.000',' Area31', 'Name2', 10, 1),
('2016-04-13 00:00:00.000',' Area6', 'Name1', 9, 2),
('2016-04-03 00:00:00.000',' Area3', 'Name2', 8, 7),
('2016-03-09 00:00:00.000',' Area5', 'Name2', 7, 3),
('2016-02-27 00:00:00.000',' Area7', 'Name1', 10, 10),
('2016-02-06 00:00:00.000',' Area23', 'Name1', 9, 1),
('2016-01-01 00:00:00.000',' Area6', 'Name1', 8, 3),
('2015-12-08 00:00:00.000',' Area28', 'Name1', 7, 4),
('2015-11-10 00:00:00.000',' Area17', 'Name1', 10, 3),
('2015-05-17 00:00:00.000',' Area19', 'Name2', 9, 7),
('2015-04-24 00:00:00.000',' Area7', 'Name1', 8, 1),
('2015-04-21 00:00:00.000',' Area18', 'Name2', 7, 3),
('2015-04-07 00:00:00.000',' Area9', 'Name1', 10, 8),
('2015-03-11 00:00:00.000',' Area6', 'Name1', 9, 10),
('2015-02-27 00:00:00.000',' Area8', 'Name2', 8, 5),
('2015-02-24 00:00:00.000',' Area5', 'Name2', 7, 1),
('2015-02-17 00:00:00.000',' Area30', 'Name2', 10, 1),
('2015-01-31 00:00:00.000',' Area23', 'Name1', 9, 6),
('2015-01-23 00:00:00.000',' Area8', 'Name2', 8, 1),
('2015-01-10 00:00:00.000',' Area29', 'Name1', 7, 4),
('2014-12-26 00:00:00.000',' Area24', 'Name2', 10, 5),
('2014-12-13 00:00:00.000',' Area6', 'Name1', 9, 2),
('2014-11-15 00:00:00.000',' Area6', 'Name1', 8, 8),
('2014-10-25 00:00:00.000',' Area7', 'Name1', 7, 6),
('2014-05-24 00:00:00.000',' Area10', 'Name1', 10, 1),
('2014-05-05 00:00:00.000',' Area10', 'Name1', 9, 1),
('2014-04-21 00:00:00.000',' Area7', 'Name1', 8, 2),
('2013-12-26 00:00:00.000',' Area27', 'Name1', 7, 4),
('2013-12-03 00:00:00.000',' Area25', 'Name1', 10, 2),
('2013-11-14 00:00:00.000',' Area18', 'Name1', 9, 4),
('2013-10-23 00:00:00.000',' Area11', 'Name1', 8, 3),
('2013-06-26 00:00:00.000',' Area32', 'Name2', 7, 4),
('2013-06-14 00:00:00.000',' Area1', 'Name2', 10, 6),
('2013-05-18 00:00:00.000',' Area2', 'Name2', 9, 5),
('2013-03-30 00:00:00.000',' Area12', 'Name2', 8, 9),
('2013-02-25 00:00:00.000',' Area22', 'Name1', 7, 1),
('2013-01-08 00:00:00.000',' Area7', 'Name1', 10, 1)
;
You are mixing values from the "current" row and "previous" rows in the window function. Unfortunately, this cannot be expressed.
In SQL Server, you can use apply instead:
SELECT t1.*, tt1.CountMatches
FROM table1 t1 OUTER APPLY
(SELECT COUNT(*) as CountMatches
FROM table1 tt1
WHERE tt1.[t_Area] = t1.[t_Area] AND
tt1.[t_Name] = t1.[t_Name] AND
tt1.[t_Date] < t1.[t_Date] AND
tt1.[t_Value] = '1' AND
ABS(t1.[t_Length] - tt1.[t_Length]) <= 1
) tt1
WHERE t1.[t_Date] = '2018-01-01';
IN terms of performance, this saves you the outer aggregation.

SQL Server Management: The INSERT statement conflicted with the FOREIGN KEY constraint

I'm using SQL Server Management to create a database (fot a work in the university), I'm trying to insert new values into a table but recieving error.
The two relevant tables I created are:
create table ballotBox
(
bno integer,
cid numeric(4,0) references city,
street varchar(20),
hno integer,
totvoters integer,
primary key (bno)
);
create table votes
(
cid numeric(4,0) references city,
bno integer references ballotBox,
pid numeric(3,0) references party,
nofvotes integer
);
I've entered first the values to ballotBox (as well as for the table 'city' and 'party'):
insert into ballotBox values
(1, 1, 'street1', 10, 1500),
(2, 1, 'street2', 15, 490),
(3, 1, 'street2', 15, 610),
(4, 1, 'street2', 15, 650),
(5, 2, 'street3', 10, 900),
(6, 2, 'street3', 55, 800),
(7, 2, 'street4', 67, 250),
(8, 2, 'street4', 67, 990),
(9, 2, 'street5', 5, 600),
(10, 3, 'street1', 72, 1000),
(11, 3, 'street6', 25, 610),
(12, 3, 'street6', 25, 600),
(13, 4, 'street2', 3, 550),
(14, 4, 'street7', 15, 500),
(15, 5, 'street8', 44, 1100),
(16, 5, 'street9', 7, 710),
(17, 5, 'street10', 13, 950);
And then I tried to enter values to votes:
insert into votes values
(1, 1, 200, 100),
(1, 11, 210, 220),
(1, 1, 220, 2),
(1, 1, 230, 400),
(1, 1, 240, 313),
(1, 1, 250, 99),
(2, 1, 200, 55),
(2, 10, 210, 150),
(2, 10, 220, 2),
(2, 1, 230, 16),
(2, 1, 240, 210),
(2, 9, 250, 54),
(3, 9, 200, 234),
(3, 9, 210, 123),
(3, 1, 220, 8),
(3, 1, 230, 87),
(3, 1, 240, 76),
(3, 1, 250, 6),
(4, 1, 200, 135),
(4, 1, 210, 246),
(4, 17, 220, 7),
(4, 1, 230, 18),
(4, 1, 240, 44),
(4, 1, 250, 66),
(1, 2, 200, 373),
(1, 2, 210, 12),
(1, 2, 220, 3),
(1, 2, 230, 74),
(1, 2, 240, 58),
(1, 2, 250, 272),
(2, 6, 200, 139),
(2, 6, 210, 2580),
(2, 2, 220, 6),
(2, 2, 230, 73),
(2, 2, 240, 7),
(2, 2, 250, 99),
(3, 2, 200, 15),
(3, 2, 210, 68),
(3, 2, 220, 12),
(3, 2, 230, 12),
(3, 2, 240, 15),
(3, 2, 250, 25),
(4, 2, 200, 7),
(4, 2, 210, 245),
(4, 2, 220, 8),
(1, 0, 0.0, 361),
(4, 2, 240, 67),
(4, 2, 250, 144),
(5, 2, 200, 123),
(5, 2, 210, 76),
(5, 2, 220, 15),
(5, 2, 230, 158),
(5, 2, 240, 76),
(5, 2, 250, 132),
(1, 3, 200, 152),
(1, 3, 210, 517),
(1, 3, 220, 0),
(1, 3, 230, 267),
(2, 3, 200, 87),
(2, 3, 210, 134),
(2, 3, 220, 4),
(2, 3, 230, 11),
(2, 3, 240, 256),
(2, 3, 250, 76),
(3, 3, 200, 105),
(3, 3, 210, 132),
(3, 3, 3220, 3),
(3, 3, 230, 24),
(3, 3, 240, 254),
(3, 3, 250, 12),
(1, 4, 200, 61),
(1, 4, 210, 54),
(1, 4, 220, 5),
(1, 4, 230, 19),
(1, 4, 240, 1),
(1, 4, 250, 47),
(2, 4, 200, 17),
(2, 4, 210, 23),
(2, 4, 220, 0),
(2, 4, 230, 64),
(2, 4, 240, 11),
(2, 4, 250, 149),
(1, 5, 0200, 187),
(1, 5, 210, 88),
(1, 5, 220, 1),
(1, 5, 230, 255),
(1, 5, 240, 12),
(1, 5, 250, 373),
(2, 2, 500, 245),
(2, 5, 210, 120),
(2, 5, 220, 9),
(2, 5, 230, 19),
(2, 5, 240, 234),
(2, 5, 250, 5),
(3, 5, 200, 107),
(3, 5, 210, 18),
(3, 5, 220, 11),
(3, 5, 230, 54),
(3, 5, 240, 378),
(3, 5, 250, 243);
But I'm getting an error:
Msg 547, Level 16, State 0, Line 1
The INSERT statement conflicted with the FOREIGN KEY constraint "FK__votes__bno__1920BF5C". The conflict occurred in database "Voting", table "dbo.ballotBox", column 'bno'. The statement has been terminated.
you are trying to insert
(1, 0, 0.0, 361),
in votes table have FK bno
there are no record corresponding to 0 in ballotbox table
please remove this line from votes statement then insert
The relationship between two tables is not correct.
Make it so:
create table votes
(
cid numeric(4,0),
bno integer,
pid numeric(3,0),
nofvotes integer
foreing key cid reference city(cid)
foreing key bno reference ballotbox (bno)
foreing key pid reference party(pid)
);

How to select unique subsequences in SQL?

In generic terms I have a sequence of events, from which i'd like to select unique non-repeatable sequences using MS SQL Server 2008 R2.
Specifically in this case, each test has a series of recordings, each of which have a specific sequence of stimuli. I'd like to select the unique sequences of stimuli from inside the recordings of one test, insert them into another table and assign the sequence group id to the original table.
DECLARE #Sequence TABLE
([ID] INT
,[TestID] INT
,[StimulusID] INT
,[RecordingID] INT
,[PositionInRecording] INT
,[SequenceGroupID] INT
)
INSERT #Sequence
VALUES
(1, 1, 101, 1000, 1, NULL),
(2, 1, 102, 1000, 2, NULL),
(3, 1, 103, 1000, 3, NULL),
(4, 1, 103, 1001, 1, NULL),
(5, 1, 103, 1001, 2, NULL),
(6, 1, 101, 1001, 3, NULL),
(7, 1, 102, 1002, 1, NULL),
(8, 1, 103, 1002, 2, NULL),
(9, 1, 101, 1002, 3, NULL),
(10, 1, 102, 1003, 1, NULL),
(11, 1, 103, 1003, 2, NULL),
(12, 1, 101, 1003, 3, NULL),
(13, 2, 106, 1004, 1, NULL),
(14, 2, 107, 1004, 2, NULL),
(15, 2, 107, 1005, 1, NULL),
(16, 2, 106, 1005, 2, NULL)
After correctly identifying the unique sequences, the results should look like this
DECLARE #SequenceGroup TABLE
([ID] INT
,[TestID] INT
,[SequenceGroupName] NVARCHAR(50)
)
INSERT #SequenceGroup VALUES
(1, 1, '101-102-103'),
(2, 1, '103-103-101'),
(3, 1, '102-103-101'),
(4, 2, '106-107'),
(5, 2, '107-106')
DECLARE #OutcomeSequence TABLE
([ID] INT
,[TestID] INT
,[StimulusID] INT
,[RecordingID] INT
,[PositionInRecording] INT
,[SequenceGroupID] INT
)
INSERT #OutcomeSequence
VALUES
(1, 1, 101, 1000, 1, 1),
(2, 1, 102, 1000, 2, 1),
(3, 1, 103, 1000, 3, 1),
(4, 1, 103, 1001, 1, 2),
(5, 1, 103, 1001, 2, 2),
(6, 1, 101, 1001, 3, 2),
(7, 1, 102, 1002, 1, 3),
(8, 1, 103, 1002, 2, 3),
(9, 1, 101, 1002, 3, 3),
(10, 1, 102, 1003, 1, 3),
(11, 1, 103, 1003, 2, 3),
(12, 1, 101, 1003, 3, 3),
(13, 2, 106, 1004, 1, 4),
(14, 2, 107, 1004, 2, 4),
(15, 2, 107, 1005, 1, 5),
(16, 2, 106, 1005, 2, 5)
This is fairly easy to do in MySQL and other databases that support some version of GROUP_CONCAT functionality. It's apparently a good deal harder in SQL Server. Here's a stackoverflow question that discusses one technique. Here's another with some information about SQL Server 2008 specific solutions that might also get you started.
This will do it. Had to add an column to #SequenceGroup.
DECLARE #Sequence TABLE
([ID] INT
,[TestID] INT
,[StimulusID] INT
,[RecordingID] INT
,[PositionInRecording] INT
,[SequenceGroupID] INT
)
INSERT #Sequence
VALUES
(1, 1, 101, 1000, 1, NULL),
(2, 1, 102, 1000, 2, NULL),
(3, 1, 103, 1000, 3, NULL),
(4, 1, 103, 1001, 1, NULL),
(5, 1, 103, 1001, 2, NULL),
(6, 1, 101, 1001, 3, NULL),
(7, 1, 102, 1002, 1, NULL),
(8, 1, 103, 1002, 2, NULL),
(9, 1, 101, 1002, 3, NULL),
(10, 1, 102, 1003, 1, NULL),
(11, 1, 103, 1003, 2, NULL),
(12, 1, 101, 1003, 3, NULL),
(13, 2, 106, 1004, 1, NULL),
(14, 2, 107, 1004, 2, NULL),
(15, 2, 107, 1005, 1, NULL),
(16, 2, 106, 1005, 2, NULL)
DECLARE #SequenceGroup TABLE
([ID] INT IDENTITY(1, 1)
,[TestID] INT
,[SequenceGroupName] NVARCHAR(50)
,[RecordingID] INT
)
insert into #SequenceGroup
select TestID, (stuff((select '-' + cast([StimulusID] as nvarchar(100))
from #Sequence t1
where t2.RecordingID = t1.RecordingID
for xml path('')), 1, 1, '')), RecordingID
from #Sequence t2
group by RecordingID, TestID
order by RecordingID
select * from #SequenceGroup
update #Sequence
set SequenceGroupID = sg.ID
from #Sequence s
join #SequenceGroup sg on s.RecordingID=sg.RecordingID and s.TestID=sg.testid
select * from #Sequence