SQL syntax converting self JOIN to PARTITION BY - sql

I can re-write this query
Query1:
SELECT t1.*, COUNT(t2.t_Name) AS CountMatches
FROM
Table1 t1
LEFT OUTER JOIN
(SELECT * FROM Table1) t2
ON t1.[t_Area] = t2.[t_Area]
AND t1.[t_Name] = t2.[t_Name]
AND t2.[t_Date] < t1.[t_Date]
AND t2.[t_Value] = '1'
WHERE t1.[t_Date] = '2018-01-01'
GROUP BY t1.[t_Date], t1.[t_Area], t1.[t_Name], t1.[t_Length], t1.[t_Value]
as the following
Query2:
SELECT t2.*
FROM
(
SELECT t1.*, SUM(CASE WHEN t1.[t_Value] = '1' THEN 1 ELSE 0 END)
OVER (PARTITION BY t1.[t_Area], t1.[t_Name]
ORDER BY t1.[t_Date] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS CountMatches
FROM Table1 t1
) t2
WHERE t2.[t_Date] = '2018-01-01'
I'm trying to add an additional term but struggling with the syntax
How do I add the following term to Query2 AND ABS(t1.[t_Length] - t2.[t_Length]) <= 1 ?
The equivalent self join query which gives desired result is as follows
SELECT t1.*, COUNT(t2.t_Name) AS CountMatches
FROM
Table1 t1
LEFT OUTER JOIN
(SELECT * FROM Table1) t2
ON t1.[t_Area] = t2.[t_Area]
AND t1.[t_Name] = t2.[t_Name]
AND t2.[t_Date] < t1.[t_Date]
AND t2.[t_Value] = '1'
AND ABS(t1.[t_Length] - t2.[t_Length]) <= 1 --(Need this term as well)
WHERE t1.[t_Date] = '2018-01-01'
GROUP BY t1.[t_Date], t1.[t_Area], t1.[t_Name], t1.[t_Length], t1.[t_Value]
Example SQLFiddle: http://sqlfiddle.com/#!18/f15ec/10
Desired output:
Sample Data:
CREATE TABLE Table1
([t_Date] datetime, [t_Area] varchar(10), [t_Name] varchar(10), [t_Length] int, [t_Value] int);
INSERT INTO Table1 ([t_Date], [t_Area], [t_Name], [t_Length], [t_Value])
VALUES
('2018-01-01 00:00:00.000',' Area6', 'Name1', 10, 6),
('2018-01-01 00:00:00.000',' Area5', 'Name2', 9, 2),
('2017-12-15 00:00:00.000',' Area6', 'Name1', 8, 5),
('2017-11-25 00:00:00.000',' Area14', 'Name2', 7, 5),
('2017-11-18 00:00:00.000',' Area6', 'Name1', 10, 2),
('2017-11-13 00:00:00.000',' Area3', 'Name2', 9, 8),
('2017-10-28 00:00:00.000',' Area6', 'Name1', 8, 1),
('2017-10-04 00:00:00.000',' Area2', 'Name1', 7, 2),
('2017-08-26 00:00:00.000',' Area4', 'Name1', 10, 3),
('2017-02-25 00:00:00.000',' Area7', 'Name1', 9, 8),
('2017-02-18 00:00:00.000',' Area12', 'Name1', 8, 5),
('2017-01-25 00:00:00.000',' Area18', 'Name2', 7, 2),
('2017-01-01 00:00:00.000',' Area5', 'Name2', 10, 2),
('2016-12-27 00:00:00.000',' Area7', 'Name1', 9, 1),
('2016-12-09 00:00:00.000',' Area6', 'Name1', 8, 5),
('2016-12-01 00:00:00.000',' Area16', 'Name2', 7, 2),
('2016-11-12 00:00:00.000',' Area6', 'Name1', 10, 1),
('2016-10-27 00:00:00.000',' Area24', 'Name2', 9, 8),
('2016-10-22 00:00:00.000',' Area6', 'Name1', 8, 7),
('2016-09-30 00:00:00.000',' Area13', 'Name2', 7, 2),
('2016-09-24 00:00:00.000',' Area19', 'Name1', 10, 1),
('2016-08-20 00:00:00.000',' Area21', 'Name2', 9, 3),
('2016-07-26 00:00:00.000',' Area21', 'Name2', 8, 1),
('2016-07-17 00:00:00.000',' Area26', 'Name2', 7, 2),
('2016-06-24 00:00:00.000',' Area4', 'Name1', 10, 3),
('2016-06-01 00:00:00.000',' Area4', 'Name1', 9, 2),
('2016-05-29 00:00:00.000',' Area15', 'Name2', 8, 8),
('2016-05-10 00:00:00.000',' Area25', 'Name1', 7, 3),
('2016-05-05 00:00:00.000',' Area31', 'Name2', 10, 1),
('2016-04-13 00:00:00.000',' Area6', 'Name1', 9, 2),
('2016-04-03 00:00:00.000',' Area3', 'Name2', 8, 7),
('2016-03-09 00:00:00.000',' Area5', 'Name2', 7, 3),
('2016-02-27 00:00:00.000',' Area7', 'Name1', 10, 10),
('2016-02-06 00:00:00.000',' Area23', 'Name1', 9, 1),
('2016-01-01 00:00:00.000',' Area6', 'Name1', 8, 3),
('2015-12-08 00:00:00.000',' Area28', 'Name1', 7, 4),
('2015-11-10 00:00:00.000',' Area17', 'Name1', 10, 3),
('2015-05-17 00:00:00.000',' Area19', 'Name2', 9, 7),
('2015-04-24 00:00:00.000',' Area7', 'Name1', 8, 1),
('2015-04-21 00:00:00.000',' Area18', 'Name2', 7, 3),
('2015-04-07 00:00:00.000',' Area9', 'Name1', 10, 8),
('2015-03-11 00:00:00.000',' Area6', 'Name1', 9, 10),
('2015-02-27 00:00:00.000',' Area8', 'Name2', 8, 5),
('2015-02-24 00:00:00.000',' Area5', 'Name2', 7, 1),
('2015-02-17 00:00:00.000',' Area30', 'Name2', 10, 1),
('2015-01-31 00:00:00.000',' Area23', 'Name1', 9, 6),
('2015-01-23 00:00:00.000',' Area8', 'Name2', 8, 1),
('2015-01-10 00:00:00.000',' Area29', 'Name1', 7, 4),
('2014-12-26 00:00:00.000',' Area24', 'Name2', 10, 5),
('2014-12-13 00:00:00.000',' Area6', 'Name1', 9, 2),
('2014-11-15 00:00:00.000',' Area6', 'Name1', 8, 8),
('2014-10-25 00:00:00.000',' Area7', 'Name1', 7, 6),
('2014-05-24 00:00:00.000',' Area10', 'Name1', 10, 1),
('2014-05-05 00:00:00.000',' Area10', 'Name1', 9, 1),
('2014-04-21 00:00:00.000',' Area7', 'Name1', 8, 2),
('2013-12-26 00:00:00.000',' Area27', 'Name1', 7, 4),
('2013-12-03 00:00:00.000',' Area25', 'Name1', 10, 2),
('2013-11-14 00:00:00.000',' Area18', 'Name1', 9, 4),
('2013-10-23 00:00:00.000',' Area11', 'Name1', 8, 3),
('2013-06-26 00:00:00.000',' Area32', 'Name2', 7, 4),
('2013-06-14 00:00:00.000',' Area1', 'Name2', 10, 6),
('2013-05-18 00:00:00.000',' Area2', 'Name2', 9, 5),
('2013-03-30 00:00:00.000',' Area12', 'Name2', 8, 9),
('2013-02-25 00:00:00.000',' Area22', 'Name1', 7, 1),
('2013-01-08 00:00:00.000',' Area7', 'Name1', 10, 1)
;

You are mixing values from the "current" row and "previous" rows in the window function. Unfortunately, this cannot be expressed.
In SQL Server, you can use apply instead:
SELECT t1.*, tt1.CountMatches
FROM table1 t1 OUTER APPLY
(SELECT COUNT(*) as CountMatches
FROM table1 tt1
WHERE tt1.[t_Area] = t1.[t_Area] AND
tt1.[t_Name] = t1.[t_Name] AND
tt1.[t_Date] < t1.[t_Date] AND
tt1.[t_Value] = '1' AND
ABS(t1.[t_Length] - tt1.[t_Length]) <= 1
) tt1
WHERE t1.[t_Date] = '2018-01-01';
IN terms of performance, this saves you the outer aggregation.

Related

matplotlib scatterplot - only a few labels are displayed on x axis

While plotting using scatterplot in matplotlib, I find some of the values from x-axis are missing in the labels. I want to have all the x-axis legends to be displayed in the graph.
This might be related to tick spacing but I am not sure how to set it to display all the x-axis values.
In the sample code, I want to have all the dates displayed on x-axis
x = [datetime.date(2019, 6, 16), datetime.date(2019, 6, 17), datetime.date(2019, 6, 18), datetime.date(2019, 6, 19),
datetime.date(2019, 6, 20), datetime.date(2019, 6, 21), datetime.date(2019, 6, 22), datetime.date(2019, 6, 23),
datetime.date(2019, 6, 24), datetime.date(2019, 6, 25), datetime.date(2019, 6, 26), datetime.date(2019, 6, 27),
datetime.date(2019, 6, 28), datetime.date(2019, 6, 29), datetime.date(2019, 6, 30), datetime.date(2019, 7, 1),
datetime.date(2019, 7, 2), datetime.date(2019, 7, 3), datetime.date(2019, 7, 4), datetime.date(2019, 7, 5),
datetime.date(2019, 7, 6), datetime.date(2019, 7, 7), datetime.date(2019, 7, 8), datetime.date(2019, 7, 9),
datetime.date(2019, 7, 10), datetime.date(2019, 7, 11), datetime.date(2019, 7, 12), datetime.date(2019, 7, 13),
datetime.date(2019, 7, 15)]
y = [0.15338331291011087, 0.15340904024033467, 0.1534195786228156, 0.15343290378685995, 0.15331644003478487,
0.1533570064827251, 0.1531156771286262, 0.15307150988142237, 0.15306137109205153, 0.15302301551230038,
0.15295889536607005, 0.15298157619113423, 0.15286883583977182, 0.15283539558962958, 0.15284508041253356,
0.15281542656182034, 0.1527844647725921, 0.15277054534676898, 0.1527339281127108, 0.15270419704783855,
0.15261812595095475, 0.15255120245035042, 0.15251650362641, 0.15257536163149088, 0.15253967278547242,
0.15249871561808356, 0.15248591103997422, 0.15242121840852002, 0.15248773465596907]
fig = plt.figure()
ax1 = fig.add_subplot(111)
ax1.scatter(x, y, s=10, c='b', marker="s", label='y')
plt.legend(loc='upper left')
plt.xticks(rotation=90)
plt.show()
Plot that I get with the sample code
Just pass the value of x in the plt.xticks() and set x-axis using 'plt.gcf' it will work.
I have create a random list for the x and plot the graph check it.
from matplotlib import pyplot as plt
from datetime import datetime
def std(a):
return datetime.strptime(a, '%Y, %m, %d').date()
x = [std('2019, 6, 16'), std('2019, 6, 17'), std('2019, 6, 18'), std('2019, 6, 19'),
std('2019, 6, 20'), std('2019, 6, 21'), std('2019, 6, 22'), std('2019, 6, 23'),
std('2019, 6, 24'), std('2019, 6, 25'), std('2019, 6, 26'), std('2019, 6, 27'),
std('2019, 6, 28'), std('2019, 6, 29'), std('2019, 6, 30'), std('2019, 7, 1'),
std('2019, 7, 2'), std('2019, 7, 3'), std('2019, 7, 4'), std('2019, 7, 5'),
std('2019, 7, 6'), std('2019, 7, 7'), std('2019, 7, 8'), std('2019, 7, 9'),
std('2019, 7, 10'), std('2019, 7, 11'), std('2019, 7, 12'), std('2019, 7, 13'),
std('2019, 7, 15')]
y = [0.15338331291011087, 0.15340904024033467, 0.1534195786228156, 0.15343290378685995, 0.15331644003478487,
0.1533570064827251, 0.1531156771286262, 0.15307150988142237, 0.15306137109205153, 0.15302301551230038,
0.15295889536607005, 0.15298157619113423, 0.15286883583977182, 0.15283539558962958, 0.15284508041253356,
0.15281542656182034, 0.1527844647725921, 0.15277054534676898, 0.1527339281127108, 0.15270419704783855,
0.15261812595095475, 0.15255120245035042, 0.15251650362641, 0.15257536163149088, 0.15253967278547242,
0.15249871561808356, 0.15248591103997422, 0.15242121840852002, 0.15248773465596907]
fig = plt.figure(figsize=(8,5))
ax1 = fig.add_subplot(111)
ax1.scatter(x, y, s=10, c='b', marker="s", label='y')
plt.legend(loc='upper left')
#plt.xticks(x,rotation=90)
#plt.xticks(range(len(x)))
plt.gca().margins(x=0)
plt.gcf().canvas.draw()
t_l = plt.gca().get_xticklabels()
maxsize = max([t.get_window_extent().width for t in t_l])
m = .2 # inch margin
s = maxsize/plt.gcf().dpi*len(x)+3*m
margin = m/plt.gcf().get_size_inches()[1]
plt.gcf().subplots_adjust(left=margin, right=0.8-margin)
plt.gcf().set_size_inches(s, plt.gcf().get_size_inches()[1])
plt.xticks(x,rotation=90)
plt.show()

SQL Server Management: The INSERT statement conflicted with the FOREIGN KEY constraint

I'm using SQL Server Management to create a database (fot a work in the university), I'm trying to insert new values into a table but recieving error.
The two relevant tables I created are:
create table ballotBox
(
bno integer,
cid numeric(4,0) references city,
street varchar(20),
hno integer,
totvoters integer,
primary key (bno)
);
create table votes
(
cid numeric(4,0) references city,
bno integer references ballotBox,
pid numeric(3,0) references party,
nofvotes integer
);
I've entered first the values to ballotBox (as well as for the table 'city' and 'party'):
insert into ballotBox values
(1, 1, 'street1', 10, 1500),
(2, 1, 'street2', 15, 490),
(3, 1, 'street2', 15, 610),
(4, 1, 'street2', 15, 650),
(5, 2, 'street3', 10, 900),
(6, 2, 'street3', 55, 800),
(7, 2, 'street4', 67, 250),
(8, 2, 'street4', 67, 990),
(9, 2, 'street5', 5, 600),
(10, 3, 'street1', 72, 1000),
(11, 3, 'street6', 25, 610),
(12, 3, 'street6', 25, 600),
(13, 4, 'street2', 3, 550),
(14, 4, 'street7', 15, 500),
(15, 5, 'street8', 44, 1100),
(16, 5, 'street9', 7, 710),
(17, 5, 'street10', 13, 950);
And then I tried to enter values to votes:
insert into votes values
(1, 1, 200, 100),
(1, 11, 210, 220),
(1, 1, 220, 2),
(1, 1, 230, 400),
(1, 1, 240, 313),
(1, 1, 250, 99),
(2, 1, 200, 55),
(2, 10, 210, 150),
(2, 10, 220, 2),
(2, 1, 230, 16),
(2, 1, 240, 210),
(2, 9, 250, 54),
(3, 9, 200, 234),
(3, 9, 210, 123),
(3, 1, 220, 8),
(3, 1, 230, 87),
(3, 1, 240, 76),
(3, 1, 250, 6),
(4, 1, 200, 135),
(4, 1, 210, 246),
(4, 17, 220, 7),
(4, 1, 230, 18),
(4, 1, 240, 44),
(4, 1, 250, 66),
(1, 2, 200, 373),
(1, 2, 210, 12),
(1, 2, 220, 3),
(1, 2, 230, 74),
(1, 2, 240, 58),
(1, 2, 250, 272),
(2, 6, 200, 139),
(2, 6, 210, 2580),
(2, 2, 220, 6),
(2, 2, 230, 73),
(2, 2, 240, 7),
(2, 2, 250, 99),
(3, 2, 200, 15),
(3, 2, 210, 68),
(3, 2, 220, 12),
(3, 2, 230, 12),
(3, 2, 240, 15),
(3, 2, 250, 25),
(4, 2, 200, 7),
(4, 2, 210, 245),
(4, 2, 220, 8),
(1, 0, 0.0, 361),
(4, 2, 240, 67),
(4, 2, 250, 144),
(5, 2, 200, 123),
(5, 2, 210, 76),
(5, 2, 220, 15),
(5, 2, 230, 158),
(5, 2, 240, 76),
(5, 2, 250, 132),
(1, 3, 200, 152),
(1, 3, 210, 517),
(1, 3, 220, 0),
(1, 3, 230, 267),
(2, 3, 200, 87),
(2, 3, 210, 134),
(2, 3, 220, 4),
(2, 3, 230, 11),
(2, 3, 240, 256),
(2, 3, 250, 76),
(3, 3, 200, 105),
(3, 3, 210, 132),
(3, 3, 3220, 3),
(3, 3, 230, 24),
(3, 3, 240, 254),
(3, 3, 250, 12),
(1, 4, 200, 61),
(1, 4, 210, 54),
(1, 4, 220, 5),
(1, 4, 230, 19),
(1, 4, 240, 1),
(1, 4, 250, 47),
(2, 4, 200, 17),
(2, 4, 210, 23),
(2, 4, 220, 0),
(2, 4, 230, 64),
(2, 4, 240, 11),
(2, 4, 250, 149),
(1, 5, 0200, 187),
(1, 5, 210, 88),
(1, 5, 220, 1),
(1, 5, 230, 255),
(1, 5, 240, 12),
(1, 5, 250, 373),
(2, 2, 500, 245),
(2, 5, 210, 120),
(2, 5, 220, 9),
(2, 5, 230, 19),
(2, 5, 240, 234),
(2, 5, 250, 5),
(3, 5, 200, 107),
(3, 5, 210, 18),
(3, 5, 220, 11),
(3, 5, 230, 54),
(3, 5, 240, 378),
(3, 5, 250, 243);
But I'm getting an error:
Msg 547, Level 16, State 0, Line 1
The INSERT statement conflicted with the FOREIGN KEY constraint "FK__votes__bno__1920BF5C". The conflict occurred in database "Voting", table "dbo.ballotBox", column 'bno'. The statement has been terminated.
you are trying to insert
(1, 0, 0.0, 361),
in votes table have FK bno
there are no record corresponding to 0 in ballotbox table
please remove this line from votes statement then insert
The relationship between two tables is not correct.
Make it so:
create table votes
(
cid numeric(4,0),
bno integer,
pid numeric(3,0),
nofvotes integer
foreing key cid reference city(cid)
foreing key bno reference ballotbox (bno)
foreing key pid reference party(pid)
);

Postgresql Crosstab with Array row_name

I have the following SQL statement. The inner query ('SELECT ARRAY...ORDER BY 1,2') works correctly and gives the correct totals for each row_name. When I run the crosstab, the result is incorrect. Changing the 'ORDER BY' in the innner query doesn't seem to change its result, but changes the outer query result. I have verified the types match for crosstab(text,text) for column headings.
SELECT
ct.row_name[1:2] AS zonenumber,
sum(ct.amount1) AS "sumEmploymentamount",
sum(ct.amount3) AS "sumExport_Consumersamount"
FROM output.crosstab('
SELECT
ARRAY[
zonenumber::text,
comTypes.commodity_type_name::text,
year_run::text
] as row_name,
tab.activity_type_id as attribute,
amount as value
FROM
output.all_zonalmakeuse_3 tab,
output.activity_numbers actNums,
output.activity_types actTypes,
output.commodity_numbers comNums,
output.commodity_types comTypes
WHERE
scenario = ''S03'' AND year_run = ''2005'' AND
amount != ''-Infinity'' AND moru = ''M'' AND
actNums.activity_type_id = ActTypes.activity_type_id AND
tab.activity = actNums.activitynumber AND
comNums.commodity_type_id = comTypes.commodity_type_id AND
tab.commodity = comNums.commoditynumber AND
(
comTypes.commodity_type_name =''Financial''OR
comNums.commodity = ''Financial'' OR
comTypes.commodity_type_name =''Goods''OR
comNums.commodity = ''Goods''
) AND
(
actTypes.activity_type_name =''Employment'' OR
actNums.activity = ''Employment'' OR
actTypes.activity_type_name =''Export Consumers'' OR
actNums.activity = ''Export Consumers''
)
ORDER BY 1,2
'::text, '
SELECT activity_type_id AS activity
FROM output.activity_types
WHERE activity_type_id = 1 OR activity_type_id = 3
'::text
) ct (row_name text[], amount1 double precision, amount3 double precision)
GROUP BY ct.row_name[1:2]
ORDER BY ct.row_name[1:2]::text;
Tables
CREATE TABLE activity_numbers
("activitynumber" int, "activity" varchar(46), "activity_type_id" int)
;
INSERT INTO activity_numbers
("activitynumber", "activity", "activity_type_id")
VALUES
(0, '"AI01AgMinMan"', 1),
(1, '"AI02AgMinProd"', 1),
(2, '"AI03ConMan"', 1),
(3, '"AI04ConProd"', 1),
(4, '"AI05MfgMan"', 1),
(5, '"AI06MfgProd"', 1),
(6, '"AI07TCUMan"', 1),
(7, '"AI08TCUProd"', 1),
(8, '"AI09Whole"', 1),
(9, '"AI10Retail"', 1),
(10, '"AI11FIRE"', 1),
(11, '"AI12PTSci"', 1),
(12, '"AI13ManServ"', 1),
(13, '"AI14PBSOff"', 1),
(14, '"AI15PBSRet"', 1),
(15, '"AI16PSInd"', 1),
(16, '"AI17Religion"', 1),
(17, '"AI18BSOnsite"', 1),
(18, '"AI19PSOnsite"', 1);
CREATE TABLE activity_types
("activity_type_id" int, "activity_type_name" varchar(18))
;
INSERT INTO activity_types
("activity_type_id", "activity_type_name")
VALUES
(1, '"Employment"'),
(2, '"Households"'),
(3, '"Export Consumers"')
;
CREATE TABLE commodity_numbers
("commoditynumber" int, "commodity" varchar(29), "commodity_type_id" int)
;
INSERT INTO commodity_numbers
("commoditynumber", "commodity", "commodity_type_id")
VALUES
(0, '"CG01AgMinDirection"', 1),
(1, '"CG02AgMinOutput"', 1),
(2, '"CG03ConDirection"', 1),
(3, '"CG04ConOutput"', 1),
(4, '"CG05MfgDirection"', 1),
(5, '"CG06MfgOutput"', 1),
(6, '"CS07TCUDirection"', 2),
(7, '"CS08TCUOutput"', 2),
(8, '"CS09WsOutput"', 2),
(9, '"CS10RetailOutput"', 2),
(10, '"CS11FIREOutput"', 2),
(11, '"CS13OthServOutput"', 2),
(12, '"CS14HealthOutput"', 2),
(13, '"CS15GSEdOutput"', 2),
(14, '"CS16HiEdOutput"', 2),
(15, '"CS17GovOutput"', 2),
(16, '"CF18TaxReceipts"', 4),
(17, '"CF19GovSupReceipts"', 4),
(18, '"CF20InvestReceipts"', 4),
(19, '"CF21ReturnInvestReceipts"', 4),
(20, '"CF22CapitalTransferReceipts"', 4)
;
CREATE TABLE commodity_types
("commodity_type_id" int, "commodity_type_name" varchar(23))
;
INSERT INTO commodity_types
("commodity_type_id", "commodity_type_name")
VALUES
(1, '"Goods"'),
(4, '"Financial"')
;
CREATE TABLE all_zonalmakeuse_3
("year_run" int, "scenario" varchar(6), "activity" int, "zonenumber" int, "commodity" int, "moru" varchar(3), "amount" numeric, "activity_type_id" int, "commodity_type_id" int)
;
INSERT INTO all_zonalmakeuse_3
("year_run", "scenario", "activity", "zonenumber", "commodity", "moru", "amount", "activity_type_id", "commodity_type_id")
VALUES
(2005, '"C11a"', 0, 1, 0, '"M"', 1752708.30900861, 1, 1),
(2005, '"C11a"', 0, 3, 0, '"M"', 2785972.97039016, 1, 1),
(2005, '"C11a"', 0, 4, 0, '"M"', 3847879.45910403, 1, 1),
(2005, '"C11a"', 1, 1, 1, '"M"', 26154618.3893068, 1, 1),
(2005, '"C11a"', 1, 3, 1, '"M"', 1663.49609248196, 1, 1),
(2005, '"C11a"', 1, 4, 1, '"M"', 91727.9065950723, 1, 1),
(2005, '"C11a"', 1, 1, 5, '"M"', 855899.319689473, 1, 1),
(2005, '"C11a"', 1, 3, 5, '"M"', 54.4372375336784, 1, 1),
(2005, '"C11a"', 1, 4, 5, '"M"', 3001.75868302327, 1, 1),
(2005, '"C11a"', 2, 1, 2, '"M"', 150885191.664482, 1, 1),
(2005, '"C11a"', 2, 2, 2, '"M"', 99242746.1181359, 1, 1),
(2005, '"C11a"', 2, 3, 2, '"M"', 90993266.1879518, 1, 1),
(2005, '"C11a"', 2, 4, 2, '"M"', 60169908.2975819, 1, 1),
(2005, '"C11a"', 3, 1, 3, '"M"', 642982844.104623, 1, 1),
(2005, '"C11a"', 3, 2, 3, '"M"', 421379496.576106, 1, 1),
(2005, '"C11a"', 3, 3, 3, '"M"', 592125233.320609, 1, 1),
(2005, '"C11a"', 3, 4, 3, '"M"', 400206994.693349, 1, 1),
(2005, '"C11a"', 4, 1, 4, '"M"', 449206658.578704, 1, 1),
(2005, '"C11a"', 4, 2, 4, '"M"', 103823580.173348, 1, 1),
(2005, '"C11a"', 4, 3, 4, '"M"', 181300924.388112, 1, 1),
(2005, '"C11a"', 4, 4, 4, '"M"', 143113096.547075, 1, 1),
(2005, '"C11a"', 5, 1, 1, '"M"', 83889.8852772168, 1, 1),
(2005, '"C11a"', 5, 2, 1, '"M"', 25716.5837854808, 1, 1),
(2005, '"C11a"', 5, 3, 1, '"M"', 10243.7021847824, 1, 1),
(2005, '"C11a"', 5, 4, 1, '"M"', 22406.3296935502, 1, 1),
(2005, '"C11a"', 5, 1, 5, '"M"', 408669650.696034, 1, 1),
(2005, '"C11a"', 5, 2, 5, '"M"', 125278360.769936, 1, 1),
(2005, '"C11a"', 5, 3, 5, '"M"', 49902204.2985933, 1, 1),
(2005, '"C11a"', 5, 4, 5, '"M"', 109152455.018677, 1, 1),
(2005, '"C11a"', 5, 1, 20, '"M"', 161822.743734245, 1, 4),
(2005, '"C11a"', 5, 2, 20, '"M"', 49607.031096612, 1, 4),
(2005, '"C11a"', 5, 3, 20, '"M"', 19759.998336631, 1, 4),
(2005, '"C11a"', 5, 4, 20, '"M"', 43221.5842952059, 1, 4),
(2005, '"C11a"', 7, 1, 1, '"M"', 122316.017730318, 1, 1),
(2005, '"C11a"', 7, 2, 1, '"M"', 20514.5008361246, 1, 1),
(2005, '"C11a"', 7, 3, 1, '"M"', 8431.33094615992, 1, 1),
(2005, '"C11a"', 7, 4, 1, '"M"', 75842.631567318, 1, 1),
(2005, '"C11a"', 13, 1, 5, '"M"', 1195626.97941868, 1, 1),
(2005, '"C11a"', 13, 2, 5, '"M"', 567002.352487648, 1, 1),
(2005, '"C11a"', 13, 3, 5, '"M"', 1104908.87426762, 1, 1),
(2005, '"C11a"', 13, 4, 5, '"M"', 1071325.74253601, 1, 1),
(2005, '"C11a"', 17, 1, 1, '"M"', 751648.370711072, 1, 1),
(2005, '"C11a"', 17, 2, 1, '"M"', 340439.936040081, 1, 1),
(2005, '"C11a"', 17, 3, 1, '"M"', 800477.767008582, 1, 1),
(2005, '"C11a"', 17, 4, 1, '"M"', 489745.223392316, 1, 1),
(2005, '"C11a"', 17, 1, 20, '"M"', 3154907.39011312, 1, 4),
(2005, '"C11a"', 17, 2, 20, '"M"', 1428934.74123601, 1, 4),
(2005, '"C11a"', 17, 3, 20, '"M"', 3359859.9041298, 1, 4),
(2005, '"C11a"', 17, 4, 20, '"M"', 2055616.54193613, 1, 4),
(2005, '"C11a"', 18, 1, 20, '"M"', 2088003.66854949, 1, 4),
(2005, '"C11a"', 18, 2, 20, '"M"', 1310122.52506653, 1, 4),
(2005, '"C11a"', 18, 3, 20, '"M"', 1481450.29636847, 1, 4),
(2005, '"C11a"', 18, 4, 20, '"M"', 3035710.53213605, 1, 4)
;
I have manipulated the query in several ways (changed type casting, order by, etc), but always get incorrect values. The row and column headers are at least consistently correct.

Pandas: apply tupleize_cols to dataframe without to_csv()?

I like the tupleize_cols option in the to_csv() function. Is this function available on a in-memory dataframe? I would like to clean up the tuples of the multi-indexed columns to 'reportable' column names automatically.
Thanks,
Luc
Just use .values on the index
In [1]: i = pd.MultiIndex.from_product([[1,2,3],['a','b','c']])
In [2]: i
Out[2]:
MultiIndex(levels=[[1, 2, 3], [u'a', u'b', u'c']],
labels=[[0, 0, 0, 1, 1, 1, 2, 2, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2]])
In [3]: i.values
Out[3]:
array([(1, 'a'), (1, 'b'), (1, 'c'), (2, 'a'), (2, 'b'), (2, 'c'),
(3, 'a'), (3, 'b'), (3, 'c')], dtype=object)

How to select unique subsequences in SQL?

In generic terms I have a sequence of events, from which i'd like to select unique non-repeatable sequences using MS SQL Server 2008 R2.
Specifically in this case, each test has a series of recordings, each of which have a specific sequence of stimuli. I'd like to select the unique sequences of stimuli from inside the recordings of one test, insert them into another table and assign the sequence group id to the original table.
DECLARE #Sequence TABLE
([ID] INT
,[TestID] INT
,[StimulusID] INT
,[RecordingID] INT
,[PositionInRecording] INT
,[SequenceGroupID] INT
)
INSERT #Sequence
VALUES
(1, 1, 101, 1000, 1, NULL),
(2, 1, 102, 1000, 2, NULL),
(3, 1, 103, 1000, 3, NULL),
(4, 1, 103, 1001, 1, NULL),
(5, 1, 103, 1001, 2, NULL),
(6, 1, 101, 1001, 3, NULL),
(7, 1, 102, 1002, 1, NULL),
(8, 1, 103, 1002, 2, NULL),
(9, 1, 101, 1002, 3, NULL),
(10, 1, 102, 1003, 1, NULL),
(11, 1, 103, 1003, 2, NULL),
(12, 1, 101, 1003, 3, NULL),
(13, 2, 106, 1004, 1, NULL),
(14, 2, 107, 1004, 2, NULL),
(15, 2, 107, 1005, 1, NULL),
(16, 2, 106, 1005, 2, NULL)
After correctly identifying the unique sequences, the results should look like this
DECLARE #SequenceGroup TABLE
([ID] INT
,[TestID] INT
,[SequenceGroupName] NVARCHAR(50)
)
INSERT #SequenceGroup VALUES
(1, 1, '101-102-103'),
(2, 1, '103-103-101'),
(3, 1, '102-103-101'),
(4, 2, '106-107'),
(5, 2, '107-106')
DECLARE #OutcomeSequence TABLE
([ID] INT
,[TestID] INT
,[StimulusID] INT
,[RecordingID] INT
,[PositionInRecording] INT
,[SequenceGroupID] INT
)
INSERT #OutcomeSequence
VALUES
(1, 1, 101, 1000, 1, 1),
(2, 1, 102, 1000, 2, 1),
(3, 1, 103, 1000, 3, 1),
(4, 1, 103, 1001, 1, 2),
(5, 1, 103, 1001, 2, 2),
(6, 1, 101, 1001, 3, 2),
(7, 1, 102, 1002, 1, 3),
(8, 1, 103, 1002, 2, 3),
(9, 1, 101, 1002, 3, 3),
(10, 1, 102, 1003, 1, 3),
(11, 1, 103, 1003, 2, 3),
(12, 1, 101, 1003, 3, 3),
(13, 2, 106, 1004, 1, 4),
(14, 2, 107, 1004, 2, 4),
(15, 2, 107, 1005, 1, 5),
(16, 2, 106, 1005, 2, 5)
This is fairly easy to do in MySQL and other databases that support some version of GROUP_CONCAT functionality. It's apparently a good deal harder in SQL Server. Here's a stackoverflow question that discusses one technique. Here's another with some information about SQL Server 2008 specific solutions that might also get you started.
This will do it. Had to add an column to #SequenceGroup.
DECLARE #Sequence TABLE
([ID] INT
,[TestID] INT
,[StimulusID] INT
,[RecordingID] INT
,[PositionInRecording] INT
,[SequenceGroupID] INT
)
INSERT #Sequence
VALUES
(1, 1, 101, 1000, 1, NULL),
(2, 1, 102, 1000, 2, NULL),
(3, 1, 103, 1000, 3, NULL),
(4, 1, 103, 1001, 1, NULL),
(5, 1, 103, 1001, 2, NULL),
(6, 1, 101, 1001, 3, NULL),
(7, 1, 102, 1002, 1, NULL),
(8, 1, 103, 1002, 2, NULL),
(9, 1, 101, 1002, 3, NULL),
(10, 1, 102, 1003, 1, NULL),
(11, 1, 103, 1003, 2, NULL),
(12, 1, 101, 1003, 3, NULL),
(13, 2, 106, 1004, 1, NULL),
(14, 2, 107, 1004, 2, NULL),
(15, 2, 107, 1005, 1, NULL),
(16, 2, 106, 1005, 2, NULL)
DECLARE #SequenceGroup TABLE
([ID] INT IDENTITY(1, 1)
,[TestID] INT
,[SequenceGroupName] NVARCHAR(50)
,[RecordingID] INT
)
insert into #SequenceGroup
select TestID, (stuff((select '-' + cast([StimulusID] as nvarchar(100))
from #Sequence t1
where t2.RecordingID = t1.RecordingID
for xml path('')), 1, 1, '')), RecordingID
from #Sequence t2
group by RecordingID, TestID
order by RecordingID
select * from #SequenceGroup
update #Sequence
set SequenceGroupID = sg.ID
from #Sequence s
join #SequenceGroup sg on s.RecordingID=sg.RecordingID and s.TestID=sg.testid
select * from #Sequence