How to SQL conver to dataframe - sql

I want to convert to SQL to dataframe.\
SELECT day,
MAX(id),
MAX(if(device = 'Mobile devices with full browsers', 'mobile', 'pc')),
AVG(replace(replace(search_imprshare, '< 10%', '10'), '%', '') / 100),
REPLACE(SUBSTRING(SUBSTRING_INDEX(add_trackingcode, '_', 1), CHAR_LENGTH(SUBSTRING_INDEX(add_trackingcode, '_', 1 - 1)) + 2), add_trackingcode, '')
FROM MY_TEST_TABLE
GROUP BY day
But I can only do below that.
I don't know how to work on '???'.
df_data= df_data.groupby(['day').agg(
{
'id': np.max,
'device ' : ???,
'percent' : ???,
'tracking' : ???
}
)
How should I do it?

Related

Invalid length parameter passed to the LEFT or SUBSTRING function. error

Can someone help me. The errors are in the left but i don't I don't know what to do anymore.
The objective is to get the words between Source: and Description:
I have this error: Invalid length parameter passed to the LEFT or SUBSTRING function.
SELECT Instance.instance_id
,DBSysJobHistory.job_id
,DBSysJobHistory.step_id
,SUBSTRING(DBSysJobHistory.message, 19, 25) Executedby
,LEFT(REVERSE(LEFT(REVERSE(DBSysJobHistory.message), CHARINDEX(':ecruoS', REVERSE(DBSysJobHistory.message)) - 2))+'Description:', CHARINDEX('Description:',REVERSE(LEFT(REVERSE(DBSysJobHistory.message), CHARINDEX(':ecruoS', REVERSE(DBSysJobHistory.message)) - 2))+'Description:')-1) AS Sources
,LEFT(REVERSE(LEFT(REVERSE(DBSysJobHistory.message), CHARINDEX(' :noitpircseD', REVERSE(DBSysJobHistory.message)) - 2))+ 'End Error', CHARINDEX(' End Error',REVERSE(LEFT(REVERSE(DBSysJobHistory.message), CHARINDEX(':noitpircseD', REVERSE(DBSysJobHistory.message)) - 2))+'End Error')-1) AS Descriptions
,STUFF(STUFF(STUFF(RIGHT(REPLICATE('0', 8) + CAST(DBSysJobHistory.run_duration as varchar(8)), 8), 3, 0, ':'), 6, 0, ':'), 9, 0, ':') run_durations
FROM msdb.dbo.sysjobhistory DBSysJobHistory
JOIN (SELECT DBSysJobHistory.job_id
,DBSysJobHistory.step_id
,MAX(DBSysJobHistory.instance_id) as instance_id
FROM msdb.dbo.sysjobhistory DBSysJobHistory
GROUP BY DBSysJobHistory.job_id
,DBSysJobHistory.step_id
) AS Instance ON DBSysJobHistory.instance_id = Instance.instance_id
WHERE DBSysJobHistory.run_status <> 1

SQL Server - Find frequency of occurance (by row, not word) of most common words in a column

This question has been asked more than a few times, but I can't find the specific answer I need. I have a query that finds the most commonly appearing words in a column in SQL Server and lists them with the count of their appearances. The problem is that if a word appears multiple times in a row, it counts once for each appearance. I would like to only count each word once per row.
So a row with a value of "To be or not to be" would count 'to' and 'be' once each, not twice each for purposes of overall frequency.
Here is the current query, which also strips out common words such as pronouns and replaces all of the commonly occurring separators with spaces. It's a bit old so I suspect it could be a lot neater.
SELECT sep.Col Phrase, count(*) as Qty
FROM (
Select * FROM (
Select value = Upper(RTrim(LTrim(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Title, ',', ' '), '.', ' '), '!', ' '), '+', ' '), ':', ' '), '-', ' '), ';', ' '), '(', ' '), ')', ' '), '/', ' '), '&', ''), '?', ' '), ' ', ' '), ' ', ' '))))
FROM Table
) easyValues
Where value <> ''
) actualValues
Cross Apply dbo.SeparateValues(value, ' ') sep
WHERE sep.Col not in ('', 'THE', 'A', 'AN', 'WHO', 'BOOK', 'AND', 'FOR', 'ON', 'HAVE', 'YOUR', 'HOW', 'WE', 'IN', 'I', 'IT', 'BY', 'SO', 'THEIR', 'IS', 'OR', 'HE', 'OF', 'WHAT'
, 'HIM', 'HIS', 'SHE', 'HER', 'MY', 'FROM', 'US', 'OUR', 'AT', 'ALL', 'BE', 'OF', 'TO', 'YOU', 'WITH', 'THAT', 'THIS', 'WAS', 'ARE', 'THERE', 'BUT', 'HAS'
, '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', 'WILL', 'MORE', 'DIV', 'THAN', 'EACH', 'GET', 'ANY')
and LEN(sep.Col) > 2
GROUP By sep.Col
HAVING count(*) > 1
Appreciate any thoughts on a better way to do this while fixing the issue of repeat words.
You just need to GROUP BY twice.
First by sep.Col and Table.ID to remove duplicates in a row. Your table has some ID column, right?
Second, just by sep.Col to get the final count.
I have also rewritten your query using CTEs to make it readable. At least, for me it is more readable in this way.
WITH
easyValues
AS
(
Select
ID
,value = Upper(RTrim(LTrim(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Title, ',', ' '), '.', ' '), '!', ' '), '+', ' '), ':', ' '), '-', ' '), ';', ' '), '(', ' '), ')', ' '), '/', ' '), '&', ''), '?', ' '), ' ', ' '), ' ', ' '))))
FROM Table
)
,actualValues
AS
(
SELECT
ID
,Value
FROM easyValues
Where value <> ''
)
,SeparateValues
AS
(
SELECT
ID
,sep.Col
FROM
actualValues
Cross Apply dbo.SeparateValues(value, ' ') AS sep
WHERE
sep.Col not in ('', 'THE', 'A', 'AN', 'WHO', 'BOOK', 'AND', 'FOR', 'ON', 'HAVE', 'YOUR', 'HOW', 'WE', 'IN', 'I', 'IT', 'BY', 'SO', 'THEIR', 'IS', 'OR', 'HE', 'OF', 'WHAT'
, 'HIM', 'HIS', 'SHE', 'HER', 'MY', 'FROM', 'US', 'OUR', 'AT', 'ALL', 'BE', 'OF', 'TO', 'YOU', 'WITH', 'THAT', 'THIS', 'WAS', 'ARE', 'THERE', 'BUT', 'HAS'
, '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', 'WILL', 'MORE', 'DIV', 'THAN', 'EACH', 'GET', 'ANY')
and LEN(sep.Col) > 2
)
,UniqueValues
AS
(
SELECT
ID, Col
FROM
SeparateValues
GROUP BY
ID, Col
)
SELECT
Col AS Phrase
,count(*) as Qty
FROM UniqueValues
GROUP By Col
HAVING count(*) > 1
;
As far as I can tell, the STRING_SPLIT function along with CROSS APPLY can give you what you want. You can split the string based on the space delimiter, select each word distinctly, then count in an outer query. I ommitted the part where you don't select specific words for brevity.
Fiddle<>:
CREATE TABLE phrases(phrase NVARCHAR(MAX));
INSERT INTO phrases(phrase)VALUES(N'To be or not to be'),(N'this is not a phrase'),(N'And why is this not another one');
SELECT
w.value,
COUNT(*)
FROM
phrases AS p
CROSS APPLY (
SELECT DISTINCT
value
FROM
STRING_SPLIT(p.phrase,N' ')
) AS w
GROUP BY
w.value;
To achieve your requirement, you can use a FUNCTION to split a string into list of words by delimiter ' ' space. With the help of this function, you can then use some Dynamic SQL like cursor to get the final count.
First create the FUNCTION as-
Source Of code: stackoverflow
CREATE FUNCTION dbo.splitstring ( #stringToSplit VARCHAR(MAX) )
RETURNS #returnList TABLE ([Word] [nvarchar] (500))
AS
BEGIN
DECLARE #name NVARCHAR(255)
DECLARE #pos INT
WHILE CHARINDEX(' ', #stringToSplit) > 0
BEGIN
SELECT #pos = CHARINDEX(' ', #stringToSplit)
SELECT #name = SUBSTRING(#stringToSplit, 1, #pos-1)
INSERT INTO #returnList
SELECT #name
SELECT #stringToSplit = SUBSTRING(#stringToSplit, #pos+1, LEN(#stringToSplit)-#pos)
END
INSERT INTO #returnList
SELECT #stringToSplit
RETURN
END
Then use this CURSOR script to get your final output-
DECLARE #Value VARCHAR(MAX)
DECLARE #WordList TABLE
(
Word VARCHAR(200)
)
DECLARE db_cursor CURSOR
FOR
SELECT Upper(RTrim(LTrim(Replace(Replace(Replace(Replace(Replace
(Replace(Replace(Replace(Replace(Replace(Replace(Replace
(Replace(Replace(title, ',', ' '), '.', ' '), '!', ' '), '+', ' '), ':', ' '), '-', ' '), ';', ' ')
, '(', ' '), ')', ' '), '/', ' '), '&', ''), '?', ' '), ' ', ' '), ' ', ' ')))) [Value]
FROM table
OPEN db_cursor
FETCH NEXT FROM db_cursor INTO #Value
WHILE ##FETCH_STATUS = 0
BEGIN
INSERT INTO #WordList
SELECT DISTINCT Word FROM [dbo].[splitstring](#Value)
WHERE Word NOT IN ('', 'THE', 'A', 'AN', 'WHO', 'BOOK', 'AND', 'FOR', 'ON', 'HAVE', 'YOUR', 'HOW', 'WE', 'IN', 'I', 'IT', 'BY', 'SO', 'THEIR', 'IS', 'OR', 'HE', 'OF', 'WHAT'
, 'HIM', 'HIS', 'SHE', 'HER', 'MY', 'FROM', 'US', 'OUR', 'AT', 'ALL', 'BE', 'OF', 'TO', 'YOU', 'WITH', 'THAT', 'THIS', 'WAS', 'ARE', 'THERE', 'BUT', 'HAS'
, '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', 'WILL', 'MORE', 'DIV', 'THAN', 'EACH', 'GET', 'ANY')
AND LEN(Word) > 2
FETCH NEXT FROM db_cursor INTO #Value
END
CLOSE db_cursor
DEALLOCATE db_cursor
SELECT Word,COUNT(*)
FROM #WordList
GROUP BY Word

Selecting a JSON object of arrays from a PostgreSQL table

I have prepared a simple SQL Fiddle demonstrating my problem -
In a two-player game I store user chats in a table:
CREATE TABLE chat(
gid integer, /* game id */
uid integer, /* user id */
created timestamptz,
msg text
);
Here I fill the table with a simple test data:
INSERT INTO chat(gid, uid, created, msg) VALUES
(10, 1, NOW() + interval '1 min', 'msg 1'),
(10, 2, NOW() + interval '2 min', 'msg 2'),
(10, 1, NOW() + interval '3 min', 'msg 3'),
(10, 2, NOW() + interval '4 min', 'msg 4'),
(10, 1, NOW() + interval '5 min', 'msg 5'),
(10, 2, NOW() + interval '6 min', 'msg 6'),
(20, 3, NOW() + interval '7 min', 'msg 7'),
(20, 4, NOW() + interval '8 min', 'msg 8'),
(20, 4, NOW() + interval '9 min', 'msg 9');
And I can fetch the data by running the SELECT query:
SELECT ARRAY_TO_JSON(
COALESCE(ARRAY_AGG(ROW_TO_JSON(x)),
array[]::json[])) FROM (
SELECT
gid,
uid,
EXTRACT(EPOCH FROM created)::int AS created,
msg
FROM chat) x;
which returns me a JSON-array:
[{"gid":10,"uid":1,"created":1514813043,"msg":"msg 1"},
{"gid":10,"uid":2,"created":1514813103,"msg":"msg 2"},
{"gid":10,"uid":1,"created":1514813163,"msg":"msg 3"},
{"gid":10,"uid":2,"created":1514813223,"msg":"msg 4"},
{"gid":10,"uid":1,"created":1514813283,"msg":"msg 5"},
{"gid":10,"uid":2,"created":1514813343,"msg":"msg 6"},
{"gid":20,"uid":3,"created":1514813403,"msg":"msg 7"},
{"gid":20,"uid":4,"created":1514813463,"msg":"msg 8"},
{"gid":20,"uid":4,"created":1514813523,"msg":"msg 9"}]
This is close to what I need, however I would like to use "gid" as JSON object properties and the rest data as values in that object:
{"10": [{"uid":1,"created":1514813043,"msg":"msg 1"},
{"uid":2,"created":1514813103,"msg":"msg 2"},
{"uid":1,"created":1514813163,"msg":"msg 3"},
{"uid":2,"created":1514813223,"msg":"msg 4"},
{"uid":1,"created":1514813283,"msg":"msg 5"},
{"uid":2,"created":1514813343,"msg":"msg 6"}],
"20": [{"uid":3,"created":1514813403,"msg":"msg 7"},
{"uid":4,"created":1514813463,"msg":"msg 8"},
{"uid":4,"created":1514813523,"msg":"msg 9"}]}
Is that please doable by using the PostgreSQL JSON functions?
I think you're looking for json_object_agg for that last step. Here is how I'd do it:
SELECT json_object_agg(
gid::text, array_to_json(ar)
)
FROM (
SELECT gid,
array_agg(
json_build_object(
'uid', uid,
'created', EXTRACT(EPOCH FROM created)::int,
'msg', msg)
) AS ar
FROM chat
GROUP BY gid
) x
;
I left off the coalesce because I don't think an empty array is possible. But it should be easy to put it back if your real query is something more complicated that could require it.

Converting formula from Crystal Reports to SSRS

I'll try and keep this as short as possible but I'm trying to convert a formula cell from crystal report to SSRS.
Here is the query:
SELECT
(SELECT START_DATE
FROM APPS.GL_PERIODS
WHERE PERIOD_TYPE = 'Month'
AND TRUNC(SYSDATE-:Days) BETWEEN START_DATE AND END_DATE) STR_DATE,
(SELECT END_DATE
FROM APPS.GL_PERIODS
WHERE PERIOD_TYPE = 'Month'
AND TRUNC(SYSDATE-:Days) BETWEEN START_DATE AND END_DATE) END_DATE,
DECODE(RT.ORGANIZATION_ID, 104, 'LPD',RT.ORGANIZATION_ID) ORG,
SUBSTR(POV.VENDOR_NAME, 1, 24) VENDOR_NAME,
DECODE(SUBSTR(PHA.SEGMENT1, 2,1), 'E', 'EXPENSE', 'e', 'EXPENSE', 'P', 'PRODUCT', 'p', 'PRODUCT', ' OTHER') PO_TYPE,
DECODE(SIGN(TRUNC(RT.TRANSACTION_DATE) - TRUNC(NVL(PLL.PROMISED_DATE - 3, PLL.NEED_BY_DATE - 3))), -1, 'LATE', 'ON TIME') PERFORMANCE,
COUNT(*) LINE_COUNT
FROM
APPS.RCV_TRANSACTIONS RT,
APPS.PO_HEADERS_ALL PHA,
APPS.PO_LINES_ALL PLA,
APPS.PO_LINE_LOCATIONS_ALL PLL,
APPS.PO_VENDORS POV
WHERE
RT.ORGANIZATION_ID = 104
AND RT.TRANSACTION_DATE >= (SELECT START_DATE
FROM APPS.GL_PERIODS
WHERE PERIOD_TYPE = 'Month'
AND TRUNC(SYSDATE-:Days) BETWEEN START_DATE AND END_DATE)
AND RT.TRANSACTION_DATE < (SELECT END_DATE + 1
FROM APPS.GL_PERIODS
WHERE PERIOD_TYPE = 'Month'
AND TRUNC(SYSDATE-:Days) BETWEEN START_DATE AND END_DATE)
AND RT.TRANSACTION_TYPE = 'RECEIVE'
AND RT.PO_HEADER_ID = PLL.PO_HEADER_ID
AND RT.PO_LINE_LOCATION_ID = PLL.LINE_LOCATION_ID
AND RT.PO_LINE_ID = PLL.PO_LINE_ID
AND RT.ORGANIZATION_ID = PLL.SHIP_TO_ORGANIZATION_ID
AND PLA.PO_LINE_ID = PLL.PO_LINE_ID
AND PLA.PO_HEADER_ID = PLL.PO_HEADER_ID
AND PHA.PO_HEADER_ID = PLA.PO_HEADER_ID
AND PHA.VENDOR_ID = POV.VENDOR_ID
GROUP BY
DECODE(RT.ORGANIZATION_ID, 104, 'LPD', RT.ORGANIZATION_ID),
SUBSTR(POV.VENDOR_NAME, 1, 24),
DECODE(SUBSTR(PHA.SEGMENT1, 2, 1), 'E', 'EXPENSE', 'e', 'EXPENSE', 'P', 'PRODUCT', 'p', 'PRODUCT', ' OTHER'),
DECODE(SIGN(TRUNC(RT.TRANSACTION_DATE) - TRUNC(NVL(PLL.PROMISED_DATE - 3, PLL.NEED_BY_DATE - 3))), -1, 'LATE', 'ON TIME')
ORDER BY
ORG, VENDOR_NAME, PO_TYPE, PERFORMANCE
In crystal the formula is
SUM({query.LINE_COUNT},{query.PERFORMANCE}) % SUM({query.LINE_COUNT}, {query.PO_TYPE})
This cell basically is just calculating the percentage of on time deliveries and late ones.

how do i update column (selmanufacturers_id) from the query

SELECT
'S. J. & G. FAZUL ELLAHIE (PVT) LTD.' AS CompanyName,
'E - 46, S.I.T.E., KARACHI - 75700' AS CompanyAddress,
'admin' login_user, 'crm.sjg.local' selhost,
'admin' sellogin, 'sr_salereport' seltoprint,
'SALES REPORT' selreportTitle, '' selflagType,
'SI' selreportType, '' selfrmDate, '' seltoDate,
'' selfromProductId, '' selac_fromProductId_ac,
'' selbrandId, '' selcustomerId, '' selac_customerId_ac,
'' selgodownId, '' selsmId, '' selsalesmanId, '' selcity,
'' selarea, '' selorders_status, 'S' seldst,
'Invoice' selgroup1, '1' selcurrencyid, 'id' seltype,
'D' seldefUnit, '' selmanufacturers_id, 'Report' selyt0,
CAST(docTypeId as varchar(20)) + CAST(documentId as varchar(20)) groupId1,
[dbo].[ITL_DATE_TO_STRING_FOR_SORTING](refDate) groupTitle1,
'' groupId2, '' selgroup2,'' groupTitle2, *
FROM
[dbo].SR_Sale(null, null, 0, 0, 0, 0, N'', N'', 0, N'SI', 0, 0, 1, N'id', N'D') AS TR
WHERE
docType IN ('SI', 'GYM RECEIPT')
This is a general update statement
FROM dbo.SR_Sale
SET selmanufacturers_id ='xxx'
WHERE (TODO: add where-clause here to specify which row needs to be updated)
Assuming selmanufacturers_id does not exist in [dbo].SR_Sale function but contrary to products_id.
IF (OBJECT_ID('tempdb..#tmp_SR_SALE') IS NOT NULL )
BEGIN
DROP TABLE #tmp_SR_SALE
END
SELECT 'S. J. & G. FAZUL ELLAHIE (PVT) LTD.' AS CompanyName ,
'E - 46, S.I.T.E., KARACHI - 75700' AS CompanyAddress ,
'admin' login_user ,
'crm.sjg.local' selhost ,
'admin' sellogin ,
'sr_salereport' seltoprint ,
'SALES REPORT' selreportTitle ,
'' selflagType ,
'SI' selreportType ,
'' selfrmDate ,
'' seltoDate ,
'' selfromProductId ,
'' selac_fromProductId_ac ,
'' selbrandId ,
'' selcustomerId ,
'' selac_customerId_ac ,
'' selgodownId ,
'' selsmId ,
'' selsalesmanId ,
'' selcity ,
'' selarea ,
'' selorders_status ,
'S' seldst ,
'Invoice' selgroup1 ,
'1' selcurrencyid ,
'id' seltype ,
'D' seldefUnit ,
'' selmanufacturers_id ,
'Report' selyt0 ,
CAST(docTypeId AS VARCHAR(20)) + CAST(documentId AS VARCHAR(20)) groupId1 ,
[dbo].[ITL_DATE_TO_STRING_FOR_SORTING](refDate) groupTitle1 ,
'' groupId2 ,
'' selgroup2 ,
'' groupTitle2 ,
*
INTO #tmp_SR_SALE FROM [dbo].SR_Sale(NULL, NULL, 0, 0, 0, 0, N'', N'', 0, N'SI', 0, 0, 1,
N'id', N'D') AS TR
WHERE docType IN ( 'SI', 'GYM RECEIPT' )
UPDATE #tmp_SR_SALE
SET selmanufacturers_id = 'value here'
WHERE products_id = 9031
SELECT * FROM #tmp_SR_SALE