Syntax error in spatial query? - ruby-on-rails-3

I wrote a function for found all pois around a track
controller :
def index
#track = Track.friendly.find(params[:track_id])
#tracks = Track.where(way_id: #track.id)
#way = Way.find(1)
#poi_start = Poi.find(#way.point_start)
#pois = #track.pois.sleepsAndtowns
#pois = #way.poi_around_track_from(#poi_start, 50000, #pois)
end
way.rb
def poi_around_track_from(poi, dist, pois)
around_sql = <<-SQL
SELECT
ST_DWithin(
ST_LineSubstring(
way.path,
ST_LineLocatePoint(way.path, pta.lonlat::geometry) + #{dist} / ST_Length(way.path::geography),
ST_LineLocatePoint(way.path, pta.lonlat::geometry) + 100000 / ST_Length(way.path::geography)
),
ptb.lonlat,
2000) is true as pois
FROM ways way, pois pta, pois ptb
WHERE way.id = #{self.id}
and pta.id = #{poi.id}
and ptb.id = #{pois.ids}
SQL
Poi.find_by_sql(around_sql).pois
end
This function return :
syntax error at or near "["
LINE 13: and ptb.id = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
What's wrong, how can I fix it ?

Since you are using standard sql to build the query, (not the ActiveRecord), you will have to use the standard IN clues with where
It looks like pois.ids is returning an array, so, you will have to turn it to a string in the format as below
[1,2] #=> (1,2)
Change,
WHERE way.id = #{self.id}
and pta.id = #{poi.id}
and ptb.id = #{pois.ids}
to
WHERE way.id = #{self.id}
and pta.id = #{poi.id}
and ptb.id IN (#{pois.ids.join(',')})

You can change pois.ids as #semeera207 wrote to string or go another way and compare ptb.id to pois.ids as an array.
WHERE way.id = #{self.id}
and pta.id = #{poi.id}
and array[ptb.id] && #{pois.ids}
To make it faster create gin index
Create index on pois using gin((array[id]));

Related

Applying a mask to a certain range in a pandas column

I'm currently trying to apply a mask to a column on a dataframe, in order to gain the mean from certain values. However, I don't want to do this over the whole column, just over a small range. This is my code at present:
data = pd.DataFrame({"test":[12, 4, 5, 4, 1, 3, 2, 5, 10, 9, 4, 3, 2, 1, 4, 2, 2, 4, 2, 5]})
range_start = 5
range_finish = 17
mask = np.arange(len(data)) %4
measured_stress_ratio_overload = data.iloc[range_start:range_finish, mask == 0, 'test'].mean()
measured_stress_ratio_baseline = data.iloc[range_start:range_finish, mask!= 0, 'test'].mean()
My expected output would be that I gain the average of the values at position 8, 12, 16 for measured stress_ratio_overload, and measured_stress_ratio_baseline all the other values between 5 and 17. However, when I try to run this code, I get this error:
IndexingError: Too many indexers
How do I use this range to properly index and retrieve the answer I'd like? Any help would be greatly appreciated!
You shouldn't put the mask in the iloc. Since you are using divisor as a standard to find your desired row. You can first add a new column in your dataframe and then slice it.
data['divisor'] = np.arange(len(data)) %4
measured_stress_ratio_overload = data.iloc[range_start:range_finish][data['divisor'] == 0]['test'].mean()
measured_stress_ratio_baseline = data[data['divisor'] != 0].iloc[range_start:range_finish]['test'].mean()
or you can use df.where
measured_stress_ratio_overload = data.iloc[range_start:range_finish].where(data['divisor'] == 0)['test'].mean()
measured_stress_ratio_baseline = data.iloc[range_start:range_finish].where(data['divisor'] != 0)['test'].mean()

How to write a cupy user-defined kernel function to calculate the segmented sum

I use the following function now, but I don't think it works, but I can't understand the description of the cupy kernel definition. This function is very memory intensive and time-consuming when it comes to huge data.
def cupy_sum(self, bins):
bidx = cupy.cumsum(bins) -1,
return cupy.diff(cupy.r_[0, cupy.cumsum(self)[bidx]])
Refer to other examples and write the following code, do not know if there is a problem.
sum_section_kernel = cp.ElementwiseKernel(
'raw T bins, raw T dats',
'float32 out',
'''
T bin_f = bins[i ];
T bin_l = bins[i+1];
T biv = 0;
for(size_t j=bin_f; j<bin_l; j++){
biv += dats[j];
}
out = biv;
''',
'summe')
a = cp.array([4, 3, 5], dtype=cp.float32)
b = cp.array([1, 1, 1.1, 1, 2, 2, 2, 3, 3, 3, 3, 3], dtype=cp.float32)
y = cp.empty(3, dtype=cp.float32)
a = cp.r_[0,a.cumsum()]
out = sum_section_kernel(a, b, y)
print(out)
> [ 4.100 6.000 15.000]
The example has been put in the above, and the speed has not been improved, but I think there is still the advantage of saving memory.

Adjusting intervals in Pandas

I created intervals in pandas for a frequency table. The first interval looks like this: (22, 29]
and is open from the left - I want just this first interval to be closed from both sides like this: [22, 29]. I tried intervals[0].closed = "both" but did not work.
intervals = pd.interval_range(start = 22, end = 64, freq = 7)
vek_freq_table = pd.Series([0,0,0,0,0,0], index = intervals)
for x in df.loc[df.loc[:,"c"].notnull(), "c"]:
for y in c_freq_table.index:
if int(x) in y:
c_freq_table.loc[y] +=1
break
You have to construct your own interval index with a list comprehension (or loop):
intervals = [pd.Interval(i.left, i.right)
if no != 0 else pd.Interval(i.left, i.right, closed='both')
for (no, i) in enumerate(intervals)]
intervals
Output:
[Interval(22, 29, closed='both'),
Interval(29, 36, closed='right'),
Interval(36, 43, closed='right'),
Interval(43, 50, closed='right'),
Interval(50, 57, closed='right'),
Interval(57, 64, closed='right')]
Note: A simpler solution might seem just to change the first element like:
new_first_elem = pd.Interval(intervals[0].left, intervals[0].right, closed='both')
intervals[0] = new_first_elem
However, this code throws an TypeError:
TypeError: Index does not support mutable operations

BigQuery Using arrays in parameterized queries

I need to run parameterized queries using arrays.
Python Client Library for BigQuery API
id_pull = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
query = "SELECT column1 FROM `table1` WHERE id = #get_id;"
query_params = [
bigquery.ArrayQueryParameter(
'get_id', 'INT64', id_pull)
]
job_config = bigquery.QueryJobConfig()
job_config.query_parameters = query_params
query_job = client.query(query, location='US', job_config=job_config) #API request-starts query
results = query_job.result() # Waits for job to complete.
I followed instructions from the documentation, however, this error after execution appears:
raise self._exception google.api_core.exceptions.BadRequest: 400 No
matching signature for operator = for argument types: INT64,
ARRAY. Supported signatures: ANY = ANY at [1:67]
Does someone what the problem is and how to fix it?
I think the issue is in your WHERE clause
Instead of
WHERE id = #get_id
it should be something like
WHERE id IN UNNEST(#get_id)

PyQt exclusive OR in sql query

How can I make that if my first search shows results it doesn't do the second part of the query, but stops and displays results? I tried something like this, but it just gives me blank window and it's pretty chaotic:
def test_update(self):
projectModel = QSqlQueryModel()
projectModel.setQuery("""SELECT * FROM pacijent WHERE prezime = '%s' OR (prezime, 3) = metaphone('%s', 3) OR LEVENSHTEIN(LOWER(prezime), '%s') < 3 AND NOT (prezime = '%s' AND (prezime, 3) = metaphone('%s', 3) AND LEVENSHTEIN(LOWER(prezime), '%s') < 3)""" % (str(self.lineEdit.text()), str(self.lineEdit.text()), str(self.lineEdit.text()), str(self.lineEdit.text()), str(self.lineEdit.text()), str(self.lineEdit.text())))
global projectView
projectView = QtGui.QTableView()
projectView.setModel(projectModel)
projectView.show()
So, if it finds the exact value of attribute "prezime" it should display it, but if it doesn't it should call for more advance saerch tactics, such as metaphone and levenshtein.
EDIT:
I got it working like this:
ef search_data(self):
myQSqlQueryModel = QSqlQueryModel()
query = QSqlQueryModel()
global myQTableView
myQTableView = QtGui.QTableView()
querySuccess = False
for queryCommand in [""" SELECT * FROM "%s" WHERE "%s" = '%s' """ % (str(self.search_from_table_lineEdit.text()), str(self.search_where_lineEdit.text()), str(self.search_has_value_lineEdit.text()))]:
myQSqlQueryModel.setQuery(queryCommand)
if myQSqlQueryModel.rowCount() > 0:
myQTableView.setModel(myQSqlQueryModel)
myQTableView.show()
querySuccess = True
break
if not querySuccess:
query.setQuery(""" SELECT * FROM "%s" WHERE METAPHONE("%s", 3) = METAPHONE('%s', 3) OR LEVENSHTEIN("%s", '%s') < 4 """ % (str(self.search_from_table_lineEdit.text()), str(self.search_where_lineEdit.text()), str(self.search_has_value_lineEdit.text()), str(self.search_where_lineEdit.text()), str(self.search_has_value_lineEdit.text())))
global var
var = QtGui.QTableView()
var.setModel(query)
var.show()
After your query success, your can check your data in model if have any row count in this method. And your for loop to get many query;
def testUpdate (self):
myQSqlQueryModel = QtSql.QSqlQueryModel()
myQTableView = QtGui.QTableView()
querySuccess = False
for queryCommand in ["YOUR QUERY 1", "YOUR QUERY 2"]:
myQSqlQueryModel.setQuery(queryCommand)
if myQSqlQueryModel.rowCount() > 0:
myQTableView.setModel(myQSqlQueryModel)
myQTableView.show()
querySuccess = True
break
if not querySuccess:
QtGui.QMessageBox.critical(self, 'Query error', 'Not found')