How to set the settings of scrapy-redis to connect the remote redis-server with password? - redis

I have tried two solutions:
1)`REDIS_HOST = '111.111.111.111'
REDIS_PORT = 12000
REDIS_PASSWORD = 'aaaaaaaa'`
but it will raise:
2017-11-23 15:03:13 [twisted] CRITICAL:
Traceback (most recent call last):
File "/home/yuyanggo/.local/lib/python3.6/site-packages/twisted/internet/defer.py", line 1386, in _inlineCallbacks
result = g.send(result)
File "/home/yuyanggo/.local/lib/python3.6/site-packages/scrapy/crawler.py", line 79, in crawl
yield self.engine.open_spider(self.spider, start_requests)
redis.exceptions.ResponseError: NOAUTH Authentication required.
2)
REDIS_URL = 'redis://:aaaaaaaa#111.111.111.111:12000/0'
but I find that the data of redis is saved into localhost,not the remote redis-server.

try to put the following code in your setting.py file
REDIS_URL = 'redis://:{psw}#{host}:{port}'.format(
host='xx.xx.xx.xx', # your server ip
port='xxx',
psw='xxxx',
)

Related

The security token included in the request is expired, when I try to update credentials

I am using Celery with SQS as a broker and I am trying to renew my credentials "AWS_ACCESS_KEY_ID" and "AWS_SECRET_ACCESS_KEY", before they expire, the first time I run the task and the result is success, but after 15 minutes it expires although credentials have been renewed, the function to update credentials is as follows:
import os
import boto3
from celery import Celery
from kombu.utils.url import safequote
def update_aws_credentials():
role_info = {
'RoleArn': f"arn:aws:iam::{os.environ['AWS_ACCOUNT_NUMER']}:role/my_role_execution",
'RoleSessionName': 'roleExecution',
'DurationSeconds': 900
}
sts_client = boto3.client('sts', region_name='eu-central-1')
credentials = sts_client.assume_role(**role_info)
aws_access_key_id = credentials["Credentials"]['AccessKeyId']
aws_secret_access_key = credentials["Credentials"]['SecretAccessKey']
aws_session_token = credentials["Credentials"]["SessionToken"]
os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
os.environ["AWS_DEFAULT_REGION"] = 'eu-central-1'
os.environ["AWS_SESSION_TOKEN"] = aws_session_token
return aws_access_key_id, aws_secret_access_key
def get_celery(aws_access_key_id, aws_secret_access_key):
broker = f"sqs://{safequote(aws_access_key_id)}:{safequote(aws_secret_access_key)}#"
backend = 'redis://redis-service:6379/0'
celery = Celery(f"my_task", broker=broker, backend=backend)
celery.conf["broker_transport_options"] = {
'polling_interval': 30,
'region': 'eu-central-1',
'predefined_queues': {
f"my_queue": {
'url': f"https://sqs.eu-central-1.amazonaws.com/{os.environ['AWS_ACCOUNT_NUMER']}/my_queue"
}
}
}
celery.conf["task_default_queue"] = f"my_queue"
return celery
def refresh_sqs_credentials():
access, secret = update_aws_credentials()
return get_celery(access, secret)
Running refresh_sqs_credentials, new credentials are created:
celery = worker.refresh_sqs_credentials()
And then I run my task with celery:
task = celery.send_task('my_task.code_of_my_task', args=[content], task_id=task_id)
All tasks that I run before 15 minutes finish successfully, but after 15 minutes the error is the following:
[2021-12-14 14:08:15,637] ERROR in app: Exception on /tasks/run [POST]
Traceback (most recent call last):
File "/api/app.py", line 87, in post
task = celery.send_task('glgt_ap35080_dev_sqs_runalgo.allocation_alg_task', args=[content], task_id=task_id)
File "/usr/local/lib/python3.6/site-packages/celery/app/base.py", line 717, in send_task
amqp.send_task_message(P, name, message, **options)
File "/usr/local/lib/python3.6/site-packages/celery/app/amqp.py", line 547, in send_task_message
**properties
File "/usr/local/lib/python3.6/site-packages/kombu/messaging.py", line 178, in publish
exchange_name, declare,
File "/usr/local/lib/python3.6/site-packages/kombu/connection.py", line 525, in _ensured
return fun(*args, **kwargs)
File "/usr/local/lib/python3.6/site-packages/kombu/messaging.py", line 200, in _publish
mandatory=mandatory, immediate=immediate,
File "/usr/local/lib/python3.6/site-packages/kombu/transport/virtual/base.py", line 605, in basic_publish
return self._put(routing_key, message, **kwargs)
File "/usr/local/lib/python3.6/site-packages/kombu/transport/SQS.py", line 294, in _put
c.send_message(**kwargs)
File "/usr/local/lib/python3.6/site-packages/botocore/client.py", line 337, in _api_call
File "/usr/local/lib/python3.6/site-packages/botocore/client.py", line 656, in _make_api_call
raise error_class(parsed_response, operation_name)
botocore.exceptions.ClientError: An error occurred (ExpiredToken) when calling the SendMessage operation: The security token included in the request is expired
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/local/lib/python3.6/site-packages/flask/app.py", line 1813, in full_dispatch_request
rv = self.dispatch_request()
File "/usr/local/lib/python3.6/site-packages/flask/app.py", line 1799, in dispatch_request
return self.view_functions[rule.endpoint](**req.view_args)
File "/usr/local/lib/python3.6/site-packages/flask_restplus/api.py", line 325, in wrapper
resp = resource(*args, **kwargs)
File "/usr/local/lib/python3.6/site-packages/flask/views.py", line 88, in view
return self.dispatch_request(*args, **kwargs)
File "/usr/local/lib/python3.6/site-packages/flask_restplus/resource.py", line 44, in dispatch_request
resp = meth(*args, **kwargs)
File "/api/app.py", line 90, in post
abort(500)
File "/usr/local/lib/python3.6/site-packages/werkzeug/exceptions.py", line 774, in abort
return _aborter(status, *args, **kwargs)
File "/usr/local/lib/python3.6/site-packages/werkzeug/exceptions.py", line 755, in __call__
raise self.mapping[code](*args, **kwargs)
werkzeug.exceptions.InternalServerError: 500 Internal Server Error: The server encountered an internal error and was unable to complete your request. Either the server is overloaded or there is an error in the application.
10.142.95.217 - - [14/Dec/2021 14:08:15] "POST /tasks/run HTTP/1.1" 500 -
I'm storing the credentials in environment variables, I don't understand why it expires after 15 minutes, can someone help me please?
The versions of the packages used are:
boto3==1.14.54
celery==5.0.0
kombu==5.0.2
pycurl==7.43.0.6
Thank you

Selenium"Can't connect to HTTPS URL because the SSL module is not available

I have an anaconda environment with selenium installed. When I try to run I get this error:
Traceback (most recent call last):
File "c:\Users\Nick\Desktop\Code\product-scraper\sephora-scraper\scraper.py", line 31, in <module>
ChromeDriverManager().install(), options=options)
File "C:\Users\Nick\anaconda3\envs\web-scraper\lib\site-packages\webdriver_manager\chrome.py", line 34, in install
driver_path = self._get_driver_path(self.driver)
File "C:\Users\Nick\anaconda3\envs\web-scraper\lib\site-packages\webdriver_manager\manager.py", line 21, in _get_driver_path
driver_version = driver.get_version()
File "C:\Users\Nick\anaconda3\envs\web-scraper\lib\site-packages\webdriver_manager\driver.py", line 40, in get_version
return self.get_latest_release_version()
File "C:\Users\Nick\anaconda3\envs\web-scraper\lib\site-packages\webdriver_manager\driver.py", line 63, in get_latest_release_version
resp = requests.get(f"{self._latest_release_url}_{self.browser_version}")
File "C:\Users\Nick\anaconda3\envs\web-scraper\lib\site-packages\requests\api.py", line 76, in get
return request('get', url, params=params, **kwargs)
File "C:\Users\Nick\anaconda3\envs\web-scraper\lib\site-packages\requests\api.py", line 61, in request
return session.request(method=method, url=url, **kwargs)
File "C:\Users\Nick\anaconda3\envs\web-scraper\lib\site-packages\requests\sessions.py", line 542, in request
resp = self.send(prep, **send_kwargs)
File "C:\Users\Nick\anaconda3\envs\web-scraper\lib\site-packages\requests\sessions.py", line 655, in send
r = adapter.send(request, **kwargs)
File "C:\Users\Nick\anaconda3\envs\web-scraper\lib\site-packages\requests\adapters.py", line 514, in send
raise SSLError(e, request=request)
requests.exceptions.SSLError: HTTPSConnectionPool(host='chromedriver.storage.googleapis.com', port=443): Max retries exceeded with url: /LATEST_RELEASE_88.0.4324 (Caused by SSLError("Can't connect to HTTPS URL because the SSL module is not available."))
I'm new to anaconda so I don't know what else to provide. Please leave a comment if I need to anything and I will add it right away. Thanks.
Try to add this path to your environment variable:
..\Anaconda3
..\Anaconda3\scripts
..\Anaconda3\Library\bin
You might need to restart windows after set up environment path

s3fs timeout issue on an AWS Lambda function within a VPN

s3fs seems to fail from time to time when reading from an S3 bucket using an AWS Lambda function within a VPN. I am using s3fs==0.4.0 and pandas==1.0.1.
import s3fs
import pandas as pd
def lambda_handler(event, context):
bucket = event['Records'][0]['s3']['bucket']['name']
s3_file = event['Records'][0]['s3']['object']['key']
s3fs.S3FileSystem.connect_timeout = 1800
s3fs.S3FileSystem.read_timeout = 1800
with s3fs.S3FileSystem(anon=False).open(f"s3://{bucket}/{s3_file}", 'rb') as f:
self.data = pd.read_json(f, **kwargs)
The stacktrace is the following:
Traceback (most recent call last):
File "/var/task/urllib3/connection.py", line 157, in _new_conn
(self._dns_host, self.port), self.timeout, **extra_kw
File "/var/task/urllib3/util/connection.py", line 84, in create_connection
raise err
File "/var/task/urllib3/util/connection.py", line 74, in create_connection
sock.connect(sa)
TimeoutError: [Errno 110] Connection timed out
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/var/task/botocore/httpsession.py", line 263, in send
chunked=self._chunked(request.headers),
File "/var/task/urllib3/connectionpool.py", line 720, in urlopen
method, url, error=e, _pool=self, _stacktrace=sys.exc_info()[2]
File "/var/task/urllib3/util/retry.py", line 376, in increment
raise six.reraise(type(error), error, _stacktrace)
File "/var/task/urllib3/packages/six.py", line 735, in reraise
raise value
File "/var/task/urllib3/connectionpool.py", line 672, in urlopen
chunked=chunked,
File "/var/task/urllib3/connectionpool.py", line 376, in _make_request
self._validate_conn(conn)
File "/var/task/urllib3/connectionpool.py", line 994, in _validate_conn
conn.connect()
File "/var/task/urllib3/connection.py", line 300, in connect
conn = self._new_conn()
File "/var/task/urllib3/connection.py", line 169, in _new_conn
self, "Failed to establish a new connection: %s" % e
urllib3.exceptions.NewConnectionError: <botocore.awsrequest.AWSHTTPSConnection object at 0x7f4d578e3ed0>: Failed to establish a new connection: [Errno 110] Connection timed out
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/var/task/botocore/endpoint.py", line 200, in _do_get_response
http_response = self._send(request)
File "/var/task/botocore/endpoint.py", line 244, in _send
return self.http_session.send(request)
File "/var/task/botocore/httpsession.py", line 283, in send
raise EndpointConnectionError(endpoint_url=request.url, error=e)
botocore.exceptions.EndpointConnectionError: Could not connect to the endpoint URL: "https://my_bucket.s3.eu-west-1.amazonaws.com/?list-type=2&prefix=my_folder%2Fsomething%2F&delimiter=%2F&encoding-type=url"
Has someone faced this same issue? Why would it fail only sometimes? Is there a s3fs configuration that could help for this specific issue?
Actually there was no problem at all with s3fs. Seems like we were using a Lambda function with two Subnets within the VPC and one was working normally but the other one wasn't allowed to access S3 resources, therefore when a Lambda was spawned using the second network it wouldn't be able to connect at all.
Fixing this issue was as easy as removing the second subnet.
You could also use boto3 which is supported by AWS, in order to get json from S3.
import json
import boto3
def lambda_handler(event, context):
bucket = event['Records'][0]['s3']['bucket']['name']
key = event['Records'][0]['s3']['object']['key']
s3 = boto3.resource('s3')
file_object = s3_resource.Object(bucket, key)
json_content = json.loads(file_object.get()['Body'].read())

RQ Redis : Connection Refused after a successful connection

My redis server is running under ubuntu 16.04 and I have RQ Dashboard running to monitor the queue. The redis server has a password which I supply for the initial connection. Here's my code:
from rq import Queue, Connection, Worker
from redis import Redis
from dblogger import DbLogger
def _redisCon():
redis_host = "192.168.1.169"
redis_port = "6379"
redis_password = "SecretPassword"
return Redis(host=redis_host, port=redis_port, password=redis_password)
rcon = _redisCon()
if rcon is not None:
with Connection(rcon):
DbLogger.log("rqworker", 0, "Launching Worker", "launching an RQ Worker - default Queue")
worker = Worker(list(map(Queue, 'default'))) # this works - I see the worker registered in RQ dashboard
worker.work() # this eventually fails with the Connection error:
"""
16:28:49 RQ worker 'rq:worker:steve-imac.95379' started, version 0.12.0
16:28:49 *** Listening on default...
16:28:49 Cleaning registries for queue: default
Traceback (most recent call last):
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/redis/connection.py", line 177, in _read_from_socket
raise socket.error(SERVER_CLOSED_CONNECTION_ERROR)
OSError: Connection closed by server.
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/redis/client.py", line 668, in execute_command
return self.parse_response(connection, command_name, **options)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/redis/client.py", line 680, in parse_response
response = connection.read_response()
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/redis/connection.py", line 624, in read_response
response = self._parser.read_response()
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/redis/connection.py", line 284, in read_response
response = self._buffer.readline()
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/redis/connection.py", line 216, in readline
self._read_from_socket()
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/redis/connection.py", line 191, in _read_from_socket
(e.args,))
redis.exceptions.ConnectionError: Error while reading from socket: ('Connection closed by server.',)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/redis/connection.py", line 489, in connect
raise ConnectionError(self._error_message(e))
redis.exceptions.ConnectionError: Error 61 connecting to 192.168.1.169:6379. Connection refused.
"""
I've tried removing the password and enabling the unixsocket in the redis.conf -- neither seemed to help. This seems to be happening in some sort of timeout, since in other testing the worker actually loads a task and executes it before eventually dying with this error.

gcloud auth login : Properties Parse Error

i installed google-cloud-sdk in ubuntu 14.04 and when tried to login,it is showing this error.
krish#jarvis:~$ gcloud auth login
Traceback (most recent call last):
File "/usr/bin/../lib/google-cloud-sdk/./lib/googlecloudsdk/gcloud/gcloud.py", line 87, in
from googlecloudsdk.calliope import base
File "/usr/bin/../lib/google-cloud-sdk/./lib/googlecloudsdk/calliope/base.py", line 8, in
from googlecloudsdk.core import log
File "/usr/bin/../lib/google-cloud-sdk/./lib/googlecloudsdk/core/log.py", line 413, in
_log_manager = _LogManager()
File "/usr/bin/../lib/google-cloud-sdk/./lib/googlecloudsdk/core/log.py", line 195, in init
self.console_formatter = _ConsoleFormatter(sys.stderr)
File "/usr/bin/../lib/google-cloud-sdk/./lib/googlecloudsdk/core/log.py", line 172, in init
use_color = not properties.VALUES.core.disable_color.GetBool()
File "/usr/bin/../lib/google-cloud-sdk/./lib/googlecloudsdk/core/properties.py", line 782, in GetBool
value = _GetBoolProperty(self, PropertiesFile.Load(), required)
File "/usr/bin/../lib/google-cloud-sdk/./lib/googlecloudsdk/core/properties.py", line 1141, in Load
PropertiesFile._PROPERTIES = PropertiesFile(paths)
File "/usr/bin/../lib/google-cloud-sdk/./lib/googlecloudsdk/core/properties.py", line 1160, in init
self.__Load(properties_path)
File "/usr/bin/../lib/google-cloud-sdk/./lib/googlecloudsdk/core/properties.py", line 1174, in __Load
raise PropertiesParseError(e.message)
googlecloudsdk.core.properties.PropertiesParseError: File contains no section headers.
file: /home/krish/.config/gcloud/properties, line: 1
'h4\xaf\xe3\xda^\xa6\xe8\xb2\xdb`$?\x11\x7f\xce\xc1\x1f\x88\xcd"\x82c\x13Bj\x07\xc3\xe3\x9ds\xdd d\xe1\n'
This does look like you have a corruppted gcloud config file /home/krish/.config/gcloud/properties. Check what's the context, may be it's just one line and you can fix it, or just move/remove it (you will need to set all configurations once again if you did any).