attach excel files to seperate email IDs from a folder - pandas

This is the below code to separate as per the requirement enter image description here
The code executes till the time of separating the files per country to excel, but the final mail attachment is not done due to this error 'com_error: (-2147352567, 'Exception occurred.', (4096, 'Microsoft Outlook', 'Cannot find this file. Verify the path and file name are correct.', None, 0, -2147024894), None)
'
import os, datetime,pathlib,schedule
import csv
from time import sleep
import win32com.client as client
import win32com.client as win32 # pip install pywin32
from pathlib import Path
import os
import pickle
import win32com
from PIL import ImageGrab
workbook_path = r'C:\Users\cb\OneDrive\final.xlsx'
#C:\Users\cb\Desktop\Automate
EXCEL_FILE_PATH = Path.cwd() / "final_new.xlsx"
ATTACHMENT_DIR = Path.cwd() / "Attachments"
ATTACHMENT_DIR.mkdir(exist_ok=True)
data = pd.read_excel(workbook_path, sheet_name="Sheet1")
data.head()
# Query/Filter the data frame and export the filtered data frame as an Excel file
for unique_value in unique_values:
data_output = data.query(f"{column_name} == #unique_value")
output_path = ATTACHMENT_DIR / f"{unique_value}.xlsx"
data_output.to_excel(output_path, sheet_name=unique_value, index=False)
#EXCEL_FILE_PATH = r'C:\Users\cb\OneDrive\paste this as people.csv'
email_list = pd.read_excel('yeah.xlsx')
email_list.tail()
outlook = win32com.client.dynamic.Dispatch("outlook.Application")#.GetNameSpace("MAPI")
for index, row in email_list.iterrows():
mail = outlook.CreateItem(0)
mail.To = row["Email ID"]
mail.CC = row["CC"]
mail.Subject = f"issue is for Country - {row['COUNTRY']}"
mail.HTMLBody = f"""
Hi {row['Contact Name']},
Please find attached the report for {row['COUNTRY']}.
This is a Test email
Best Regards,
team Auto
"""
attachment_path = str(ATTACHMENT_DIR / f"{row['COUNTRY']}.xlsx")
mail.Attachments.Add(Source=attachment_path)
mail.Display()

'Exception occurred.', (4096, 'Microsoft Outlook', 'Cannot find this file. Verify the path and file name are correct.'
It seems the problem is related to attaching a file:
attachment_path = str(ATTACHMENT_DIR / f"{row['COUNTRY']}.xlsx")
mail.Attachments.Add(Source=attachment_path)
The Attachments.Add method creates a new attachment in the Attachments collection. The source of the attachment can be a file (represented by the full file system path with a file name) or an Outlook item that constitutes the attachment. You need to make sure that such file exists on the disk. Try to copy the result file path and open the file from the shell.

Related

wait until the blob storoage folder is created

I would like to download a picture into a blob folder.
Before that I need to create the folder first.
Below codes are what I am doing.
The issue is the folder needs time to be created.
When it comes to with open(abs_file_name, "wb") as f:
it can not find the folder.
I am wondering whether there is an 'await' to get to know the completion of the folder creation, then do the write operation.
for index, row in data.iterrows():
url = row['Creatives']
file_name = url.split('/')[-1]
r = requests.get(url)
abs_file_name = lake_root + file_name
dbutils.fs.mkdirs(abs_file_name)
if r.status_code == 200:
with open(abs_file_name, "wb") as f:
f.write(r.content)
The final sub folder will not be created when using dbutils.fs.mkdirs() on blob storage.
It creates a file with the final sub folder name which would be considered as a directory, but it is not a directory. Look at the following demonstration:
dbutils.fs.mkdirs('/mnt/repro/s1/s2/s3.csv')
When I try to open this file, the error says that this is a directory.
This might be the issue with the code. So, try using the following code instead:
for index, row in data.iterrows():
url = row['Creatives']
file_name = url.split('/')[-1]
r = requests.get(url)
abs_file_name = lake_root + 'fail' #creates the fake directory (to counter the problem we are facing above)
dbutils.fs.mkdirs(abs_file_name)
if r.status_code == 200:
with open(lake_root + file_name, "wb") as f:
f.write(r.content)

Use URLs from List to save zip file

Trying to use urllib.request to read a list of urls from a shapefile, then download the zips from all those URLs. So far I got my list of a certain number of URLs, but I am unable to pass all of them through. The error is expected string or bytes-like object. Meaning theres prob an issue with the URL. As a side note, I also need to download them and name them by their file name/#. Need help!! Code below.
import arcpy
import urllib.request
import os
os.chdir('C:\\ProgInGIS\\FinalExam\\Final')
lidar_shp = 'C:\\ProgInGIS\\FinalExam\\Final\\lidar-2013.shp'
zip_file_download = 'C:\\ProgInGIS\\FinalExam\\Final\\file1.zip'
data = []
with arcpy.da.SearchCursor(lidar_shp,"*") as cursor:
for row in cursor:
data.append(row)
data.sort(key=lambda tup: tup[2])
i = 0
with arcpy.da.UpdateCursor(lidar_shp,"*") as cursor:
for row in cursor:
row = data[i]
i += 1
cursor.updateRow(row)
counter = 0
url_list = []
with arcpy.da.UpdateCursor(lidar_shp, ['geotiff_ur']) as cursor:
for row in cursor:
url_list.append(row)
counter += 1
if counter == 18:
break
for item in url_list:
print(item)
urllib.request.urlretrieve(item)
I understand your question this way: you want to download a zip file for each record in a shapefile from an URL defined in a certain field.
It's easier to use the requests package which is also recommended in the urllib.request documentation:
The Requests package is recommended for a higher-level HTTP client interface.
Here is an example:
import arcpy, arcpy.da
import shutil
import requests
SHAPEFILE = "your_shapefile.shp"
with arcpy.da.SearchCursor(SHAPEFILE, ["name", "url"]) as cursor:
for name, url in cursor:
response = requests.get(url, stream=True)
if response.status_code == 200:
with open(f"{name}.zip", "wb") as file:
response.raw.decode_content = True
shutil.copyfileobj(response.raw, file)
There is another example on GIS StackExchange:
https://gis.stackexchange.com/a/392463/21355

I want to get the excel file from the data frame created which automatically changes as written in the code

i have tried two methods and both showing different location as given by me in this image
apikey='abcd'
import pandas as pd
from alpha_vantage.timeseries import TimeSeries
import time
ts=TimeSeries(key=apikey,output_format='pandas')
data,metadata=ts.get_intraday(symbol='name',interval='1min',outputsize='full')
data
while True:
data, metadata=ts.get_intraday(symbol='TCS',interval='1min',outputsize='full')
data.to_excel('livedat.xlsx')
time.sleep(60)
The code is running properly but I don't know how to get the data file in excel.
imp- the method should get the file which is updated timely i.e 1min automaticaly.
Also i am using IBM watson studio to write the code.
I am not familiar with the alpha_vantage wrapper that you are using however this is how i would perform your question. The code works and i have included comments.
To get the file in the python script i would do pd.read_excel(filepath).
import requests
import pandas as pd
import time
import datetime
# Your API KEY and the URL we will request from
API_KEY = "YOUR API KEY"
url = "https://www.alphavantage.co/query?"
def Generate_file(symbol="IBM", interval="1min"):
# URL parameters
parameters = {"function": "TIME_SERIES_INTRADAY",
"symbol": symbol,
"interval": interval,
"apikey": API_KEY,
"outputsize": "compact"}
# get the json response from AlphaVantage
response = requests.get(url, params=parameters)
data = response.json()
# filter the response to only get the time series data we want
time_series_interval = f"Time Series ({interval})"
prices = data[time_series_interval]
# convert the filtered reponse to a Pandas DataFrame
df = pd.DataFrame.from_dict(prices, orient="index").reset_index()
df = df.rename(columns={"index": time_series_interval})
# create a timestampe for our excel file. So that the file does not get overriden with new data each time.
current_time = datetime.datetime.now()
file_timestamp = current_time.strftime("%Y%m%d_%H.%M")
filename = f"livedat_{file_timestamp}.xlsx"
df.to_excel(filename)
# sent a limit on the number of calls we make to prevent infinite loop
call_limit = 3
number_of_calls = 0
while(number_of_calls < call_limit):
Generate_file() # our function
number_of_calls += 1
time.sleep(60)

How to load a zip file (containing shp) from s3 bucket to Geopandas?

I zipped name.shp, name.shx, name.dbf files and uploaded them into a AWS s3 bucket. So now, i wanna load this zip file and convert the contained shapefile into a GeoDataFrame of geopandas.
I can do it perfectly if the file is a zipped geojson instead of zipped shapefile.
import io
import boto3
import geopandas as gpd
import zipfile
cliente = boto3.client("s3", aws_access_key_id=ak, aws_secret_access_key=sk)
bucket_name = 'bucketname'
object_key = 'myfolder/locations.zip'
bytes_buffer = io.BytesIO()
cliente.download_fileobj(Bucket=bucket_name, Key=object_key, Fileobj=bytes_buffer)
geojson = bytes_buffer.getvalue()
with zipfile.ZipFile(bytes_buffer) as zi:
with zi.open("locations.shp") as file:
print(gpd.read_file(file.read().decode('ISO-8859-9')))
I got this error:
ç­¤íEÀ¡ËÆ3À: No such file or directory
Basically geopandas package allows to read files directly from S3. And as mentioned in the answer above it allows to read zip files also. So below you can see the code which will read zip file from s3 without downloading it. You need to enter zip+s3:// in the beginning, then add the path in S3.
geopandas.read_file(f'zip+s3://bucket-name/file.zip')
You can read zip directly, no need to use zipfile. You need all parts of Shapefile, not just .shp itself. That is why it works with geojson. You just need to pass it with zip:///. So instead of
gpd.read_file('path/file.shp')
You go with
gpd.read_file('zip:///path/file.zip')
I am not familiar enough with boto3 to know at which point you actually have this path, but I think it will help.
I do not know if it can be of any help, but I faced a similar problem recently, though I only wanted to read the .shp with fiona. I ended up like others zipping the relevant shp, dbf, cpg and shx on the bucket.
And to read from the bucket, I do like so:
from io import BytesIO
from pathlib import Path
from typing import List
from typing import Union
import boto3
from fiona.io import ZipMemoryFile
from pydantic import BaseSettings
from shapely.geometry import Point
from shapely.geometry import Polygon
import fiona
class S3Configuration(BaseSettings):
"""
S3 configuration class
"""
s3_access_key_id: str = ''
s3_secret_access_key: str = ''
s3_region_name: str = ''
s3_endpoint_url: str = ''
s3_bucket_name: str = ''
s3_use: bool = False
S3_CONF = S3Configuration()
S3_STR = 's3'
S3_SESSION = boto3.session.Session()
S3 = S3_SESSION.resource(
service_name=S3_STR,
aws_access_key_id=S3_CONF.s3_access_key_id,
aws_secret_access_key=S3_CONF.s3_secret_access_key,
endpoint_url=S3_CONF.s3_endpoint_url,
region_name=S3_CONF.s3_region_name,
use_ssl=True,
verify=True,
)
BUCKET = S3_CONF.s3_bucket_name
CordexShape = Union[Polygon, List[Polygon], List[Point]]
ZIP_EXT = '.zip'
def get_shapefile_data(file_path: Path, s3_use: S3_CONF.s3_use) -> CordexShape:
"""
Retrieves the shapefile content associated to the passed file_path (either on disk or on S3).
file_path is a .shp file.
"""
if s3_use:
return load_zipped_shp(get_s3_object(file_path.with_suffix(ZIP_EXT)), file_path)
return load_shp(file_path)
def get_s3_object(file_path: Path) -> bytes:
"""
Retrieve as bytes the content associated to the passed file_path
"""
return S3.Object(bucket_name=BUCKET, key=forge_key(file_path)).get()['Body'].read()
def forge_key(file_path: Path) -> str:
"""
Edit this code at your convenience to forge the bucket key out of the passed file_path
"""
return str(file_path.relative_to(*file_path.parts[:2]))
def load_shp(file_path: Path) -> CordexShape:
"""
Retrieve a list of Polygons stored at file_path location
"""
with fiona.open(file_path) as shape:
parsed_shape = list(shape)
return parsed_shape
def load_zipped_shp(zipped_data: bytes, file_path: Path) -> CordexShape:
"""
Retrieve a list of Polygons stored at file_path location
"""
with ZipMemoryFile(BytesIO(zipped_data)) as zip_memory_file:
with zip_memory_file.open(file_path.name) as shape:
parsed_shape = list(shape)
return parsed_shape
There is quite a lot of code, but the first part is very helpful to easily use a minio proxy for local devs (just have to change the .env).
The key to solve the issue for me was the use of fiona not so well documented (in my opinion) but life saver (in my case :)) ZipMemoryFile

Read and parse CSV file in S3 without downloading the entire file using Python

So, i want to read a large CSV file from an S3 bucket, but i dont want that file to be completely downloaded in memory, what i wanna do is somehow stream the file in chunks and then process it.
So far this is what i have done, but i dont think so this is gonna solve the problem.
import logging
import boto3
import codecs
import os
import csv
LOGGER = logging.getLogger()
LOGGER.setLevel(logging.INFO)
s3 = boto3.client('s3')
def lambda_handler(event, context):
# retrieve bucket name and file_key from the S3 event
bucket_name = event['Records'][0]['s3']['bucket']['name']
file_key = event['Records'][0]['s3']['object']['key']
chunk, chunksize = [], 1000
if file_key.endswith('.csv'):
LOGGER.info('Reading {} from {}'.format(file_key, bucket_name))
# get the object
obj = s3.get_object(Bucket=bucket_name, Key=file_key)
file_object = obj['Body']
count = 0
for i, line in enumerate(file_object):
count += 1
if (i % chunksize == 0 and i > 0):
process_chunk(chunk)
del chunk[:]
chunk.append(line)
def process_chunk(chuck):
print(len(chuck))
This will do what you want to achieve. It wont download the whole file in the memory, instead will download in chunks, process and proceed:
from smart_open import smart_open
import csv
def get_s3_file_stream(s3_path):
"""
This function will return a stream of the s3 file.
The s3_path should be of the format: '<bucket_name>/<file_path_inside_the_bucket>'
"""
#This is the full path with credentials:
complete_s3_path = 's3://' + aws_access_key_id + ':' + aws_secret_access_key + '#' + s3_path
return smart_open(complete_s3_path, encoding='utf8')
def download_and_process_csv:
datareader = csv.DictReader(get_s3_file_stream(s3_path))
for row in datareader:
yield process_csv(row) # write a function to do whatever you want to do with the CSV
Did u try AWS Athena https://aws.amazon.com/athena/ ?
its extremely good serverless and pay as go. Without dowloading the file it does everything what you want.
BlazingSql is open source and its also usefull in case of big data problem.