I have a gmail account linked to my domain account.
AWS SES will send messages to my S3 bucket. From there, SNS will forward the message in a raw format to my gmail address.
How do I automatically convert the raw message into a standard email format?
The raw message is in the standard email format. I think what you want to know is how to parse that standard raw email into an object that you can manipulate so that you can forward it to yourself and have it look like a standard email. AWS provides a tutorial on how to forward emails with a lambda function, through SES, by first storing them in your S3 bucket: https://aws.amazon.com/blogs/messaging-and-targeting/forward-incoming-email-to-an-external-destination/
If you follow those instructions, you'll find that the email you recieve comes as an attachment, not looking like a standard email. The following code is an alteration of the Python code provided by AWS that does what you're looking for (substitute this for the code provided in the tutorial):
# Copyright 2010-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
# Altered from original by Adam Winter
#
# This file is licensed under the Apache License, Version 2.0 (the "License").
# You may not use this file except in compliance with the License. A copy of the
# License is located at
#
# http://aws.amazon.com/apache2.0/
#
# This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
# OF ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.
import os
import boto3
import email
import re
import html
from botocore.exceptions import ClientError
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.mime.application import MIMEApplication
from email.mime.image import MIMEImage
region = os.environ['Region']
def get_message_from_s3(message_id):
incoming_email_bucket = os.environ['MailS3Bucket']
incoming_email_prefix = os.environ['MailS3Prefix']
if incoming_email_prefix:
object_path = (incoming_email_prefix + "/" + message_id)
else:
object_path = message_id
object_http_path = (f"http://s3.console.aws.amazon.com/s3/object/{incoming_email_bucket}/{object_path}?region={region}")
# Create a new S3 client.
client_s3 = boto3.client("s3")
# Get the email object from the S3 bucket.
object_s3 = client_s3.get_object(Bucket=incoming_email_bucket,
Key=object_path)
# Read the content of the message.
file = object_s3['Body'].read()
file_dict = {
"file": file,
"path": object_http_path
}
return file_dict
def create_message(file_dict):
stringMsg = file_dict['file'].decode('utf-8')
# Create a MIME container.
msg = MIMEMultipart('alternative')
sender = os.environ['MailSender']
recipient = os.environ['MailRecipient']
# Parse the email body.
mailobject = email.message_from_string(file_dict['file'].decode('utf-8'))
#print(mailobject.as_string())
# Get original sender for reply-to
from_original = mailobject['Return-Path']
from_original = from_original.replace('<', '');
from_original = from_original.replace('>', '');
print(from_original)
# Create a new subject line.
subject = mailobject['Subject']
print(subject)
if mailobject.is_multipart():
index = stringMsg.find('Content-Type: multipart/')
stringBody = stringMsg[index:]
#print(stringBody)
stringData = 'Subject: ' + subject + '\nTo: ' + sender + '\nreply-to: ' + from_original + '\n' + stringBody
message = {
"Source": sender,
"Destinations": recipient,
"Data": stringData
}
return message
for part in mailobject.walk():
ctype = part.get_content_type()
cdispo = str(part.get('Content-Disposition'))
# case for each common content type
if ctype == 'text/plain' and 'attachment' not in cdispo:
bodyPart = MIMEText(part.get_payload(decode=True), 'plain', part.get_content_charset())
msg.attach(bodyPart)
if ctype == 'text/html' and 'attachment' not in cdispo:
mt = MIMEText(part.get_payload(decode=True), 'html', part.get_content_charset())
email.encoders.encode_quopri(mt)
del mt['Content-Transfer-Encoding']
mt.add_header('Content-Transfer-Encoding', 'quoted-printable')
msg.attach(mt)
if 'attachment' in cdispo and 'image' in ctype:
mi = MIMEImage(part.get_payload(decode=True), ctype.replace('image/', ''))
del mi['Content-Type']
del mi['Content-Disposition']
mi.add_header('Content-Type', ctype)
mi.add_header('Content-Disposition', cdispo)
msg.attach(mi)
if 'attachment' in cdispo and 'application' in ctype:
ma = MIMEApplication(part.get_payload(decode=True), ctype.replace('application/', ''))
del ma['Content-Type']
del ma['Content-Disposition']
ma.add_header('Content-Type', ctype)
ma.add_header('Content-Disposition', cdispo)
msg.attach(ma)
# not multipart - i.e. plain text, no attachments, keeping fingers crossed
else:
body = MIMEText(mailobject.get_payload(decode=True), 'UTF-8')
msg.attach(body)
# The file name to use for the attached message. Uses regex to remove all
# non-alphanumeric characters, and appends a file extension.
filename = re.sub('[^0-9a-zA-Z]+', '_', subject_original)
# Add subject, from and to lines.
msg['Subject'] = subject
msg['From'] = sender
msg['To'] = recipient
msg['reply-to'] = mailobject['Return-Path']
# Create a new MIME object.
att = MIMEApplication(file_dict["file"], filename)
att.add_header("Content-Disposition", 'attachment', filename=filename)
# Attach the file object to the message.
msg.attach(att)
message = {
"Source": sender,
"Destinations": recipient,
"Data": msg.as_string()
}
return message
def send_email(message):
aws_region = os.environ['Region']
# Create a new SES client.
client_ses = boto3.client('ses', region)
# Send the email.
try:
#Provide the contents of the email.
response = client_ses.send_raw_email(
Source=message['Source'],
Destinations=[
message['Destinations']
],
RawMessage={
'Data':message['Data']
}
)
# Display an error if something goes wrong.
except ClientError as e:
print('send email ClientError Exception')
output = e.response['Error']['Message']
else:
output = "Email sent! Message ID: " + response['MessageId']
return output
def lambda_handler(event, context):
# Get the unique ID of the message. This corresponds to the name of the file
# in S3.
message_id = event['Records'][0]['ses']['mail']['messageId']
print(f"Received message ID {message_id}")
# Retrieve the file from the S3 bucket.
file_dict = get_message_from_s3(message_id)
# Create the message.
message = create_message(file_dict)
# Send the email and print the result.
result = send_email(message)
print(result)
For those getting this error:
'bytes' object has no attribute 'encode'
In this line:
body = MIMEText(mailobject.get_payload(decode=True), 'UTF-8')
I could make it work. I am not an expert on this so the code might need some improvement. Also the email body includes html tags. But at least it got delivered.
If decoding the email still fails the error message will appear in your CloudWatch log. Also you will receive an email with the error message.
payload = mailobject.get_payload(decode=True)
try:
decodedPayload = payload.decode()
body = MIMEText(decodedPayload, 'UTF-8')
msg.attach(body)
except Exception as error:
errorMsg = "An error occured when decoding the email payload:\n" + str(error)
print(errorMsg)
body = errorMsg + "\nPlease download it manually from the S3 bucket."
msg.attach(MIMEText(body, 'plain'))
It is up to you which information you want to add to the error email like the subject or the from address.
Just another hint: With the above code you will get an error because subject_original is undefinded. Just delete the following lines.
# The file name to use for the attached message. Uses regex to remove all
# non-alphanumeric characters, and appends a file extension.
filename = re.sub('[^0-9a-zA-Z]+', '_', subject_original)
# Create a new MIME object.
att = MIMEApplication(file_dict["file"], filename)
att.add_header("Content-Disposition", 'attachment', filename=filename)
# Attach the file object to the message.
msg.attach(att)
As far as I understand this code is supposed to add the original email as an attachment which was not what I wanted.
Related
I need to get the full path of folders where a file is located in Google Drive. I'm getting the files themselves using the Google Drive API, but I need information about it's parent folders
I'm using the following code tothe the list of spreadsheets in a Shared Drive:
from googleapiclient import discovery
from httplib2 import Http
from oauth2client import file, client, tools
# Change the value of SCOPES to 'https://www.googleapis.com/auth/drive'
# if you want to be able to read and write to the user's Google Drive.
SCOPES = 'https://www.googleapis.com/auth/drive'
store = file.Storage('storage.json')
creds = store.get()
if not creds or creds.invalid:
flow = client.flow_from_clientsecrets('client_secret.json', SCOPES)
creds = tools.run_flow(flow, store)
DRIVE = discovery.build('drive', 'v3', http=creds.authorize(Http()))
folder_id = "1Z1GzY-D3I3qwQu3oxIW-L1a9nXgD0PXl"
query = "mimeType='application/vnd.google-apps.spreadsheet'"
query+= "and fullText contains 'CLAS' and trashed = false"
# query += " and parents in '" + folder_id + "'"
spreadsheets = []
# Initialize the page token
next_page_token = None
# Loop until all pages of results have been retrieved
while True:
# Execute the list request
response = DRIVE.files().list(
q=query,
corpora='drive',
includeItemsFromAllDrives=True,
driveId='0AEJNMySKcEzsUk9PVA',
supportsAllDrives=True,
# orderBy='folder',
pageSize=1000,
fields='nextPageToken, files(id, name, parents, mimeType, webViewLink)',
pageToken=next_page_token,
).execute()
# Append the results to the list
spreadsheets.extend(response.get('files', []))
# Check if there is another page of results
next_page_token = response.get('nextPageToken', None)
if next_page_token is None:
break
# Set the page token for the next iteration
# parameters['pageToken'] = next_page_token
# Print the number of results
print(f'Last spreadsheet found: {spreadsheets[-1]["name"]}. Number of spreadsheets: {len(spreadsheets)}')
This returns a list of dictionaries with the specified fields. I would like to know the names of the parent folders for each file, for which I'm trying:
from googleapiclient.errors import HttpError
for item in spreadsheets:
if 'parents' in item:
parent_folders_list = []
parent_id = item['parents'][0]
try:
while parent_id:
folder=DRIVE.files().get(fileId=parent_id, fields='name, id, parents').execute()
parent_folders_list.append(folder.get("parents", []))
if parent_id:
parent_id = parent_id[0]
except HttpError as error:
print('An error occurred: %s' % error)
print(f'{item["name"]} is in {parent_folders_list}')
And I've been able to identify that parent_id is correctly retrieved, and that I am able to access it, as I was able to open it in the browser. However, I get back errors 'File Not Found' for all parent_id. I wonder if the DRIVE.files().get(fileId=) is the correct way to get back a folder using the API.
Any help would be greatly appreciated.
I now: the automatic token refreshing is not a new topic.
This is the use case that generate my problem: let's say that we want extract data from Dropbox. Below you can find the code: for the first time works perfectly: in fact 1) the user goes to the generated link; 2) after allow the app coping and pasting the authorization code in the input box.
The problem arise when some hours after the user wants to do the same operation. How to avoid or by-pass the newly generation of authorization code and go straight to the operation?enter code here
As you can see in the code in a short period is possible reinject the auth code inside the code (commented in the code). But after 1 hour or more this is not loger possible.
Any help is welcome.
#!/usr/bin/env python3
import dropbox
from dropbox import DropboxOAuth2FlowNoRedirect
'''
Populate your app key in order to run this locally
'''
APP_KEY = ""
auth_flow = DropboxOAuth2FlowNoRedirect(APP_KEY, use_pkce=True, token_access_type='offline')
target='/DVR/DVR/'
authorize_url = auth_flow.start()
print("1. Go to: " + authorize_url)
print("2. Click \"Allow\" (you might have to log in first).")
print("3. Copy the authorization code.")
auth_code = input("Enter the authorization code here: ").strip()
#auth_code="3NIcPps_UxAAAAAAAAAEin1sp5jUjrErQ6787_RUbJU"
try:
oauth_result = auth_flow.finish(auth_code)
except Exception as e:
print('Error: %s' % (e,))
exit(1)
with dropbox.Dropbox(oauth2_refresh_token=oauth_result.refresh_token, app_key=APP_KEY) as dbx:
dbx.users_get_current_account()
print("Successfully set up client!")
for entry in dbx.files_list_folder(target).entries:
print(entry.name)
def dropbox_list_files(path):
try:
files = dbx.files_list_folder(path).entries
files_list = []
for file in files:
if isinstance(file, dropbox.files.FileMetadata):
metadata = {
'name': file.name,
'path_display': file.path_display,
'client_modified': file.client_modified,
'server_modified': file.server_modified
}
files_list.append(metadata)
df = pd.DataFrame.from_records(files_list)
return df.sort_values(by='server_modified', ascending=False)
except Exception as e:
print('Error getting list of files from Dropbox: ' + str(e))
#function to get the list of files in a folder
def create_links(target, csvfile):
filesList = []
print("creating links for folder " + target)
files = dbx.files_list_folder('/'+target)
filesList.extend(files.entries)
print(len(files.entries))
while(files.has_more == True) :
files = dbx.files_list_folder_continue(files.cursor)
filesList.extend(files.entries)
print(len(files.entries))
for file in filesList :
if (isinstance(file, dropbox.files.FileMetadata)) :
filename = file.name + ',' + file.path_display + ',' + str(file.size) + ','
link_data = dbx.sharing_create_shared_link(file.path_lower)
filename += link_data.url + '\n'
csvfile.write(filename)
print(file.name)
else :
create_links(target+'/'+file.name, csvfile)
#create links for all files in the folder belgeler
create_links(target, open('links.csv', 'w', encoding='utf-8'))
listing = dbx.files_list_folder(target)
#todo: add implementation for files_list_folder_continue
for entry in listing.entries:
if entry.name.endswith(".pdf"):
# note: this simple implementation only works for files in the root of the folder
res = dbx.sharing_get_shared_links(
target + entry.name)
#f.write(res.content)
print('\r', res)
import os
import service
from google.auth.transport.requests import AuthorizedSession
from google.oauth2 import service_account
# Path to service account key file obtained from Google CLoud Console
service_account_file = os.environ.get("GOOGLE_APPLICATION_CREDENTIALS", "vdelodevepoper-bc77807e143f.json")
# Issuer ID obtained from Google Pay Business Console.
# Developer defined ID for the wallet class.
issuer_id = "3388000000022131935"
class_id = "6666"
resourceId = issuer_id + '.' + class_id
# [START auth]
credentials = service_account.Credentials.from_service_account_file(service_account_file, scopes=[
"https://www.googleapis.com/auth/wallet_object.issuer"])
http_client = AuthorizedSession(credentials)
# [END auth]
# Get the specific Offer Object
offer_object = service.offerobject().get(resourceId=resourceId)
# Update the version, validTimeInterval.end, and add a message
offer_object['version'] = str(int(offer_object['version']) + 1)
offer_object['validTimeInterval'] = {
'start': {'date': ''},
'end': {'date': ''}
}
# Get the current messages
messages = offer_object['messages']
# Define new message
message = {
'header': 'Test notification',
'body': 'Your offer has been extended!',
'kind': 'walletobjects#walletObjectMessage'
}
# Add the new message about updates to the Offer Object
messages.append(message)
offer_object['messages'] = messages
# Update the Offer Object
api_request = service.offerobject().update(resourceId=resourceId, body=offer_object)
api_response = api_request.execute()
I don't understand what is used offer_object = service.offerobject().get(resourceId=resourceId)
service.offerobject(), which libraries should be included?
When I run my code, I get an error:
offer_object = service.offerobject().get(resourceId=resourceId)
AttributeError: module 'service' has no attribute 'offerobject'
boto3 documentation does not clearly specify how to update the user metadata of an already existing S3 Object.
It can be done using the copy_from() method -
import boto3
s3 = boto3.resource('s3')
s3_object = s3.Object('bucket-name', 'key')
s3_object.metadata.update({'id':'value'})
s3_object.copy_from(CopySource={'Bucket':'bucket-name', 'Key':'key'}, Metadata=s3_object.metadata, MetadataDirective='REPLACE')
You can do this using copy_from() on the resource (like this answer) mentions, but you can also use the client's copy_object() and specify the same source and destination. The methods are equivalent and invoke the same code underneath.
import boto3
s3 = boto3.client("s3")
src_key = "my-key"
src_bucket = "my-bucket"
s3.copy_object(Key=src_key, Bucket=src_bucket,
CopySource={"Bucket": src_bucket, "Key": src_key},
Metadata={"my_new_key": "my_new_val"},
MetadataDirective="REPLACE")
The 'REPLACE' value specifies that the metadata passed in the request should overwrite the source metadata entirely. If you mean to only add new key-values, or delete only some keys, you'd have to first read the original data, edit it and call the update.
To replacing only a subset of the metadata correctly:
Retrieve the original metadata with head_object(Key=src_key, Bucket=src_bucket). Also take note of the Etag in the response
Make desired changes to the metadata locally.
Call copy_object as above to upload the new metadata, but pass CopySourceIfMatch=original_etag in the request to ensure the remote object has the metadata you expect before overwriting it. original_etag is the one you got in step 1. In case the metadata (or the data itself) has changed since head_object was called (e.g. by another program running simultaneously), copy_object will fail with an HTTP 412 error.
Reference: boto3 issue 389
Similar to this answer but with the existing Metadata preserved while modifying only what is needed. From the system defined meta data, I've only preserved ContentType and ContentDisposition in this example. Other system defined meta data can also be preserved similarly.
import boto3
s3 = boto3.client('s3')
response = s3.head_object(Bucket=bucket_name, Key=object_name)
response['Metadata']['new_meta_key'] = "new_value"
response['Metadata']['existing_meta_key'] = "new_value"
result = s3.copy_object(Bucket=bucket_name, Key=object_name,
CopySource={'Bucket': bucket_name,
'Key': object_name},
Metadata=response['Metadata'],
MetadataDirective='REPLACE', TaggingDirective='COPY',
ContentDisposition=response['ContentDisposition'],
ContentType=response['ContentType'])
You can either update metadata by adding something or updating a current metadata value with a new one, here is the piece of code I am using :
import sys
import os
import boto3
import pprint
from boto3 import client
from botocore.utils import fix_s3_host
param_1= YOUR_ACCESS_KEY
param_2= YOUR_SECRETE_KEY
param_3= YOUR_END_POINT
param_4= YOUR_BUCKET
#Create the S3 client
s3ressource = client(
service_name='s3',
endpoint_url= param_3,
aws_access_key_id= param_1,
aws_secret_access_key=param_2,
use_ssl=True,
)
# Building a list of of object per bucket
def BuildObjectListPerBucket (variablebucket):
global listofObjectstobeanalyzed
listofObjectstobeanalyzed = []
extensions = ['.jpg','.png']
for key in s3ressource.list_objects(Bucket=variablebucket)["Contents"]:
#print (key ['Key'])
onemoreObject=key['Key']
if onemoreObject.endswith(tuple(extensions)):
listofObjectstobeanalyzed.append(onemoreObject)
#print listofObjectstobeanalyzed
else :
s3ressource.delete_object(Bucket=variablebucket,Key=onemoreObject)
return listofObjectstobeanalyzed
# for a given existing object, create metadata
def createmetdata(bucketname,objectname):
s3ressource.upload_file(objectname, bucketname, objectname, ExtraArgs={"Metadata": {"metadata1":"ImageName","metadata2":"ImagePROPERTIES" ,"metadata3":"ImageCREATIONDATE"}})
# for a given existing object, add new metadata
def ADDmetadata(bucketname,objectname):
s3_object = s3ressource.get_object(Bucket=bucketname, Key=objectname)
k = s3ressource.head_object(Bucket = bucketname, Key = objectname)
m = k["Metadata"]
m["new_metadata"] = "ImageNEWMETADATA"
s3ressource.copy_object(Bucket = bucketname, Key = objectname, CopySource = bucketname + '/' + objectname, Metadata = m, MetadataDirective='REPLACE')
# for a given existing object, update a metadata with new value
def CHANGEmetadata(bucketname,objectname):
s3_object = s3ressource.get_object(Bucket=bucketname, Key=objectname)
k = s3ressource.head_object(Bucket = bucketname, Key = objectname)
m = k["Metadata"]
m.update({'watson_visual_rec_dic':'ImageCREATIONDATEEEEEEEEEEEEEEEEEEEEEEEEEE'})
s3ressource.copy_object(Bucket = bucketname, Key = objectname, CopySource = bucketname + '/' + objectname, Metadata = m, MetadataDirective='REPLACE')
def readmetadata (bucketname,objectname):
ALLDATAOFOBJECT = s3ressource.get_object(Bucket=bucketname, Key=objectname)
ALLDATAOFOBJECTMETADATA=ALLDATAOFOBJECT['Metadata']
print ALLDATAOFOBJECTMETADATA
# create the list of object on a per bucket basis
BuildObjectListPerBucket (param_4)
# Call functions to see the results
for objectitem in listofObjectstobeanalyzed:
# CALL The function you want
readmetadata(param_4,objectitem)
ADDmetadata(param_4,objectitem)
readmetadata(param_4,objectitem)
CHANGEmetadata(param_4,objectitem)
readmetadata(param_4,objectitem)
I have a file to this address:
http://s3.amazonaws.com/bucket-name/sdile_pr_2_1_1/pr/0/2/1/1/dile_0_2_1_1.nc
in a s3 bucket, that i want to make accessible via a flask app.
to do so i created a function that looks like this:
#app.route('/select/dile')
def select_dile_by_uri():
uri=request.args.get('uri')
if uri is not None:
if uri.startswith("http://s3.amazonaws.com/"):
path = uri.replace("http://s3.amazonaws.com/","")
bname, kstr = path.split("/",1) # split the bname from the key string
conn = S3Connection(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)
try:
bucket = conn.get_bucket(bname)
except:
print "BUCKET NOT FOUND"
return str("ERROR: bucket "+bname+" not found")
else:
print "BUCKET CONNECTED"
try:
key = bucket.get_key(kstr)
print "KEY: ", key
except:
print "KEY NOT FOUND"
return str("ERROR: key "+kstr+"not found")
else:
try:
key.open_read() # opens the file
headers = dict(key.resp.getheaders()) # request the headers
return Response(key, headers=headers) # return a response
except S3ResponseError as e:
return Response(e.body, status=e.status, headers=key.resp.getheaders())
abort(400)
the download works, but the name of the downloaded file appears to be only "dile" instead of dile_0_2_1_1.nc .
How come ? is there something i needed to set?
what i needed to do was add a field into the headers, specifically:
headers["Content-Disposition"] = "inline; filename=myfilename"
where -myfilename- is the name you want the file to have.