How can I scrape data from new tab? - selenium

browser.get("http://event.ybu.edu.tr/kulupler/") #main url
time.sleep(1)
browser.execute_script("window.open('http://event.ybu.edu.tr/kulup/afak', 'new window')") #open new tab
for i in range(1):
browser.execute_script('window.scrollTo(0,document.body.scrollHeight)') #sayfayı aşağıya doğru çekmek için
time.sleep(1)
kulupnames = browser.find_elements_by_xpath("/html/body/div[2]/div[2]/section/div/div[2]/div/div[1]/div/div[1]/table/tbody/tr[1]/td[2]")
kulupList=[]
for kulupname in kulupnames:
kulupList.append(kulupname.text)
mails = browser.find_elements_by_css_selector("#bilgiler > a.btn.bg-orange.btn-social")
MailList=[]
for mail in mails:
MailList.append(mail.text)
FacebookAdresses = browser.find_elements_by_css_selector("#bilgiler > a.btn.bg-blue.btn-social")
FacebookList=[]
for FacebookAdress in FacebookAdresses:
FacebookList.append(FacebookAdress.text)
TwitterAdresses = browser.find_elements_by_css_selector("#bilgiler > a.btn.btn-social.bg-aqua")
TwitterList=[]
for TwitterAdress in TwitterAdresses:
TwitterList.append(TwitterAdress.text)
InstagramAdresses = browser.find_elements_by_css_selector("#bilgiler > a.btn.btn-social.bg-light-blue")
InstagramList=[]
for InstagramAdress in InstagramAdresses:
InstagramList.append(InstagramAdress.text)
AkademikDanismanlar = browser.find_elements_by_xpath("/html/body/div[2]/div[2]/section/div/div[2]/div/div[1]/div/div[1]/table/tbody/tr[2]/td[2]")
DanismanList=[]
for AkademikDanisman in AkademikDanismanlar:
DanismanList.append(AkademikDanisman.text)
KulupBaskanlari = browser.find_elements_by_xpath("/html/body/div[2]/div[2]/section/div/div[2]/div/div[1]/div/div[1]/table/tbody/tr[3]/td[2]")
BaskanList=[]
for KulupBaskani in KulupBaskanlari:
BaskanList.append(KulupBaskani.text)
ToplamUyeler = browser.find_elements_by_xpath("/html/body/div[2]/div[2]/section/div/div[2]/div/div[1]/div/div[1]/table/tbody/tr[4]/td[2]")
UyeList=[]
for Uye in ToplamUyeler:
UyeList.append(Uye.text)
Etkinlikler = browser.find_elements_by_xpath("/html/body/div[2]/div[2]/section/div/div[2]/div/div[1]/div/div[1]/table/tbody/tr[5]/td[2]")
EtkinlikList=[]
for Etkinlik in Etkinlikler:
EtkinlikList.append(Etkinlik.text)
time.sleep(2)
browser.quit()
#DataFile = csv.writer(open('AYBU.csv','w'))
#DataFile.writerow(['KulupAdi','MailAdresi','FacebookAdresi','TwitterAdresi','InstagramAdresi','AkademikDanisman','KulupBaskani','ToplamUyeSayisi','ToplamEtkinlikSayisi'])
#DataFile.to_csv("AYBU.csv", index = False, encoding='utf-8-sig')
liste = {'kulupList':kulupList,'MailList':MailList,'FacebookList':FacebookList,'TwitterList':TwitterList,'InstagramList':InstagramList,'DanismanList':DanismanList,'BaskanList':BaskanList,'UyeList':UyeList,'EtkinlikList':EtkinlikList}
df = pd.DataFrame(data = liste)
df.to_csv("AYBU.csv", index=False, encoding='utf-8-sig')

the general process:
browser.get("http://event.ybu.edu.tr/kulupler/") #main url
# Open a new window
browser.execute_script("window.open('');")
# Switch to the new window
browser.switch_to.window(browser.window_handles[1])
browser.get('http://event.ybu.edu.tr/kulup/afak')
:
:
# close the active tab
browser.close()
time.sleep(3)
# Switch back to the first tab
browser.switch_to.window(browser.window_handles[0])

Related

WebDriverException: When trying to scrap amazon for product title and price using Selenium

I'm attempting to scrape Amazon for iPhone 11 names and prices, but when I run the code, I get the following error:
The Error I get:
My code is as the following:
```
#First project
class CrawledInfo:
def __init__(self, product_name, product_price, cust_name = None, cust_location = None, rating = None, review = None, review_date = None)-> None:
self.cust_name = cust_name
self.cust_location = cust_location
self.product_name = product_name
self.product_price = product_price
self.rating = rating
self.review = review
self.review_date = review_date
class CrawlerBot:
def item(self, name):
count = 1
page = 1
pageIncrement = 1
maxRetrieves = 100
url = 'https://www.amazon.co.uk/s?k='+ name + '&page=' + str(page)
l = []
#Declaring options
options = Options()
options.headless = False
options.add_experimental_option('detach', True)
browser = webdriver.Chrome(ChromeDriverManager().install(), options=options)
browser.maximize_window()
browser.get(url)
browser.set_page_load_timeout(10)
while True:
try:
if pageIncrement * page > maxRetrieves:
break
if count > pageIncrement:
page +=1
count = 1
#Capture item name
xPathTitle = '//*[#id="search"]/div[1]/div[2]/div/span[3]/div[2]/div[' + str(count) + ']/div/span/div/div/div[2]/div[2]/div/div[1]/div/div/div[1]/h2/a/span'
title = browser.find_element_by_xpath(xPathTitle)
titleText = title.get_attribute('innerHTML').splitLines()[0]
title.click()
#Capture item price
xPathPrice = '//*[#id="price_inside_buybox"]'
price = browser.find_element_by_xpath(xPathPrice)
priceText = price.get_attribute('innerHTML').splitLines()
#Return to the search page
url = 'https://www.amazon.co.uk/s?k='+ name + '&page=' + str(page)
browser.get(url)
browser.set_page_load_timeout(10)
#Send the results to class CrawledInfo
info = CrawledInfo(titleText, priceText)
l.append(info)
count +=1
except Exception as e:
print('Exception: ', e)
count +=1
if pageIncrement * page > maxRetrieves:
break
if count > pageIncrement:
page +=1
count = 1
#Return to the search page
url = 'https://www.amazon.co.uk/s?k='+ name + '&page=' + str(page)
browser.get(url)
browser.set_page_load_timeout(10)
browser.close()
return l
#Creating the object
start_crawler = CrawlerBot()
with open('results', 'w', newline='', encoding='utf-8') as fileWriter:
dataWriter = csv.writer(fileWriter, delimiter=';', quotechar='"', quoting=csv.QUOTE_MINIMAL)
for dat in start_crawler.item('iphone 11'):
dataWriter.writerow([dat.product_name, dat.product_price])
```
Anyone who has an idea of what's wrong?
When my code is working write I'm expecting it to create a csv file with the names of iPhone 11 together with their prices

Selenium scraping div table, getting duplicate rows

I wrote a script that scrolls through an infinitely loading table on a site and scrapes the entries, but instead of being a the entire thing is made up of elements. I can't scroll through and then scrape since new elements are loaded as it scrolls (shows about 6-8 at a time), so it scrolls, scrapes, appends to a dataframe, then repeats. It works great for the first few hundred rows, then it starts to get duplicate rows. Any idea what I'm doing wrong?
def scrapenotis():
driver.get("NOTIFICATIONS");
WebDriverWait(driver, 120).until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.vue-recycle-scroller__item-view")));
tbltitles = ["Datetime","Username","User Link","Description","Category","Desc Link"];
tablelist = [];
starttime = datetime.now()
while driver.title == "WEBSITE TITLE":
try:
# gets list of all entries
entries = driver.find_elements(By.CSS_SELECTOR, "div.vue-recycle-scroller__item-view");
if len(entries) == 0:
break;
# iterates through entries
for x in entries:
# checking for those elements that persist for some ungodly reason
if x.get_attribute("style") == "transform: translateY(-9999px);":
continue;
#each entry is 83 pixels long with about 6 on screen at a time
driver.execute_script("window.scrollBy(0, 300);");
# entries need to load after scroll, they load twice within a second(?)
time.sleep(1.5);
# datedesc = driver.find_element(By.XPATH, "//*[#id='content']/div[1]/div[1]/div/div[3]/div/div[1]//div/div/div[5]/span/span").get_attribute("title");
datedesc = driver.find_element(By.CSS_SELECTOR, "span.b-notifications__list__item__actions__item.g-date span").get_attribute("title");
username = driver.find_element(By.CSS_SELECTOR, "div.b-username-wrapper div.g-user-name").text;
userlink = driver.find_element(By.CSS_SELECTOR, "div.b-username-wrapper a").get_attribute("href");
description = driver.find_element(By.CSS_SELECTOR, "div.b-notifications__list__item__text div.g-truncated-text").text;
#sorting them out for categories
if "ubscribed" in description:
cat = "New Sub";
desclink = "N/A";
elif "iked your" in description:
cat = "Like";
desclink = driver.find_element(By.CSS_SELECTOR, "div.b-notifications__list__item__text div.g-truncated-text a").get_attribute("href");
elif "restarted their monthly subscription" in description:
cat = "Sub Renewal";
desclink = "N/A";
elif "purchased your" in description:
cat = "Purchase";
desclink = driver.find_element(By.CSS_SELECTOR, "div.b-notifications__list__item__text div.g-truncated-text a").get_attribute("href");
elif any(x in description for x in ["eplied","esponded"]):
cat = "Comment";
desclink = driver.find_element(By.CSS_SELECTOR, "div.b-notifications__list__item__text div.g-truncated-text a").get_attribute("href");
elif "tip" in description:
cat = "Tip";
desclink = "N/A";
dict1 = [datedesc,username,userlink,description,cat,desclink];
tablelist.append(dict1);
#specify stop time in seconds
if (datetime.now()-starttime).seconds >= 14400: #3600(1 hour) * 4 = 14400
break;
except:
break
#convert list to df
msgbox(tablelist);
df = pd.DataFrame(tablelist,columns=tbltitles);
df.drop_duplicates(subset=tbltitles, inplace=True, keep='first');
#save to csv
path = filesavebox("Save your updated data file","","",["*.txt","*.csv"]);
if path == None:
return;
df.to_csv(path + ".csv");

How can I fix 'list' object has no attribute 'to_csv' issue?

from selenium import webdriver
import time
from selenium.webdriver.common.keys import Keys # keys içerisinden enter yapabilmesini sağlıyoruz
browser = webdriver.Chrome("C:/Users/EMRE/Desktop/SCRAPE/chromedriver_win32/chromedriver.exe")
import pandas as pd
browser.get("http://event.ybu.edu.tr/kulupler/")
import csv
#browser.fullscreen_window()
#time.sleep(2)
#for i in range(6):
#browser.execute_script('window.scrollTo(0,document.body.scrollHeight)') #sayfayı aşağıya doğru çekmek için
#time.sleep(1)
Kulup_button = browser.find_element_by_xpath("/html/body/div[2]/div[2]/div[1]/a/div/div[1]/div") #ilk kulüp için sonra değiştir
Kulup_button.click()
time.sleep(1)
for i in range(1):
browser.execute_script('window.scrollTo(0,document.body.scrollHeight)') #sayfayı aşağıya doğru çekmek için
time.sleep(1)
kulupnames = browser.find_elements_by_xpath("/html/body/div[2]/div[2]/section/div/div[2]/div/div[1]/div/div[1]/table/tbody/tr[1]/td[2]")
kulupList=[]
for kulupname in kulupnames:
kulupList.append(kulupname.text)
mails = browser.find_elements_by_css_selector("#bilgiler > a.btn.bg-orange.btn-social")
MailList=[]
for mail in mails:
MailList.append(mail.text)
FacebookAdresses = browser.find_elements_by_css_selector("#bilgiler > a.btn.bg-blue.btn-social")
FacebookList=[]
for FacebookAdress in FacebookAdresses:
FacebookList.append(FacebookAdress.text)
TwitterAdresses = browser.find_elements_by_css_selector("#bilgiler > a.btn.btn-social.bg-aqua")
TwitterList=[]
for TwitterAdress in TwitterAdresses:
TwitterList.append(TwitterAdress.text)
InstagramAdresses = browser.find_elements_by_css_selector("#bilgiler > a.btn.btn-social.bg-light-blue")
InstagramList=[]
for InstagramAdress in InstagramAdresses:
InstagramList.append(InstagramAdress.text)
AkademikDanismanlar = browser.find_elements_by_xpath("/html/body/div[2]/div[2]/section/div/div[2]/div/div[1]/div/div[1]/table/tbody/tr[2]/td[2]")
DanismanList=[]
for AkademikDanisman in AkademikDanismanlar:
DanismanList.append(AkademikDanisman.text)
KulupBaskanlari = browser.find_elements_by_xpath("/html/body/div[2]/div[2]/section/div/div[2]/div/div[1]/div/div[1]/table/tbody/tr[3]/td[2]")
BaskanList=[]
for KulupBaskani in KulupBaskanlari:
BaskanList.append(KulupBaskani.text)
ToplamUyeler = browser.find_elements_by_xpath("/html/body/div[2]/div[2]/section/div/div[2]/div/div[1]/div/div[1]/table/tbody/tr[4]/td[2]")
UyeList=[]
for Uye in ToplamUyeler:
UyeList.append(Uye.text)
Etkinlikler = browser.find_elements_by_xpath("/html/body/div[2]/div[2]/section/div/div[2]/div/div[1]/div/div[1]/table/tbody/tr[5]/td[2]")
EtkinlikList=[]
for Etkinlik in Etkinlikler:
EtkinlikList.append(Etkinlik.text)
time.sleep(5)
browser.quit()
DataFile = csv.writer(open('AYBU.csv','w'))
DataFile.writerow(['KulupAdi','MailAdresi','FacebookAdresi','TwitterAdresi','InstagramAdres','AkademikDanisman','KulupBaskani','ToplamUyeSayisi','ToplamEtkinlikSayisi'])
DataFile.to_csv("AYBU.csv", index = False, encoding='utf-8-sig')
liste = ['kulupList','MailList','FacebookList','TwitterList','InstagramList','DanismanList','BaskanList','UyeList','EtkinlikList']
df = pd.DataFrame(data = liste)
liste.to_csv("AYBU.csv", index = False, encoding='utf-8-sig')
I am trying to save my variable list as dataframe to csv.
You have a couple flaws in your code that I can see.
I took your code and made it work and I'll explain how:
import csv
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.keys import Keys # keys içerisinden enter yapabilmesini sağlıyoruz
browser = webdriver.Chrome()
time.sleep(5)
browser.get("http://event.ybu.edu.tr/kulupler/")
Kulup_button = browser.find_element_by_xpath("/html/body/div[2]/div[2]/div[1]/a/div/div[1]/div") #ilk kulüp için sonra değiştir
Kulup_button.click()
time.sleep(1)
for _ in range(1):
browser.execute_script('window.scrollTo(0,document.body.scrollHeight)') #sayfayı aşağıya doğru çekmek için
time.sleep(1)
kulupnames = browser.find_elements_by_xpath("/html/body/div[2]/div[2]/section/div/div[2]/div/div[1]/div/div[1]/table/tbody/tr[1]/td[2]")
kulupList = [kulupname.text for kulupname in kulupnames]
mails = browser.find_elements_by_css_selector("#bilgiler > a.btn.bg-orange.btn-social")
MailList = [mail.text for mail in mails]
FacebookAdresses = browser.find_elements_by_css_selector("#bilgiler > a.btn.bg-blue.btn-social")
FacebookList = [FacebookAdress.text for FacebookAdress in FacebookAdresses]
TwitterAdresses = browser.find_elements_by_css_selector("#bilgiler > a.btn.btn-social.bg-aqua")
TwitterList = [TwitterAdress.text for TwitterAdress in TwitterAdresses]
InstagramAdresses = browser.find_elements_by_css_selector("#bilgiler > a.btn.btn-social.bg-light-blue")
InstagramList = [InstagramAdress.text for InstagramAdress in InstagramAdresses]
AkademikDanismanlar = browser.find_elements_by_xpath("/html/body/div[2]/div[2]/section/div/div[2]/div/div[1]/div/div[1]/table/tbody/tr[2]/td[2]")
DanismanList = [
AkademikDanisman.text for AkademikDanisman in AkademikDanismanlar
]
KulupBaskanlari = browser.find_elements_by_xpath("/html/body/div[2]/div[2]/section/div/div[2]/div/div[1]/div/div[1]/table/tbody/tr[3]/td[2]")
BaskanList = [KulupBaskani.text for KulupBaskani in KulupBaskanlari]
ToplamUyeler = browser.find_elements_by_xpath("/html/body/div[2]/div[2]/section/div/div[2]/div/div[1]/div/div[1]/table/tbody/tr[4]/td[2]")
UyeList = [Uye.text for Uye in ToplamUyeler]
Etkinlikler = browser.find_elements_by_xpath("/html/body/div[2]/div[2]/section/div/div[2]/div/div[1]/div/div[1]/table/tbody/tr[5]/td[2]")
EtkinlikList = [Etkinlik.text for Etkinlik in Etkinlikler]
time.sleep(5)
browser.quit()
with open('AYBU.csv','w') as datafile:
DataFile = csv.writer(datafile)
DataFile.writerow(['KulupAdi','MailAdresi','FacebookAdresi','TwitterAdresi','InstagramAdres','AkademikDanisman','KulupBaskani','ToplamUyeSayisi','ToplamEtkinlikSayisi'])
liste = [kulupList,MailList,FacebookList,TwitterList,InstagramList,DanismanList,BaskanList,UyeList,EtkinlikList]
df = pd.DataFrame(data = liste)
df.to_csv("AYBU.csv", index = False, encoding='utf-8-sig')
The key changes here are at the bottom (don't mind the clean up of the generators).
DataFile = csv.writer(open('AYBU.csv','w'))
DataFile.writerow(['KulupAdi','MailAdresi','FacebookAdresi','TwitterAdresi','InstagramAdres','AkademikDanisman','KulupBaskani','ToplamUyeSayisi','ToplamEtkinlikSayisi'])
DataFile.to_csv("AYBU.csv", index = False, encoding='utf-8-sig')
liste = ['kulupList','MailList','FacebookList','TwitterList','InstagramList','DanismanList','BaskanList','UyeList','EtkinlikList']
df = pd.DataFrame(data = liste)
liste.to_csv("AYBU.csv", index = False, encoding='utf-8-sig')
This code doesnt work.
with open('AYBU.csv','w') as datafile:
DataFile = csv.writer(datafile)
DataFile.writerow(['KulupAdi','MailAdresi','FacebookAdresi','TwitterAdresi','InstagramAdres','AkademikDanisman','KulupBaskani','ToplamUyeSayisi','ToplamEtkinlikSayisi'])
liste = [kulupList,MailList,FacebookList,TwitterList,InstagramList,DanismanList,BaskanList,UyeList,EtkinlikList]
df = pd.DataFrame(data = liste)
df.to_csv("AYBU.csv", index = False, encoding='utf-8-sig')
You had 'list' as strings.
pandas is able to use .to_csv but csv.writer is not.

controlling filename download selenium

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
path_to_download = '/home/dev/'
options = Options()
options.add_experimental_option("prefs", {
"download.default_directory": path_to_download,
"download.prompt_for_download": False,
"download.directory_upgrade": True,
"safebrowsing.enabled": True
})
options.add_argument('start-maximized')
driver = webdriver.Chrome(executable_path='/home/dev/Downloads/chromedriver_linux64/chromedriver',
options=options)
#long logic
elem2 = WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.XPATH, "//*[contains(text(), 'Excel')]")))
elem2.click() #downloads the file (export to excel)
for now I am stuck with putting a time.sleep(5) then os.rename(f'{path_to_download}/exported.xlsx',f'{path_to_download}/{my_id}.xlsx')
is there a way of controlling the filename while or before the file is downloaded?
You can achieve that by checking for the file existence in your download directory and loop over and over until the file appears.
You can do someting like that :
# check the size of the file and return 0 if doesn't exist
def getSize(filename):
if os.path.isfile(filename):
st = os.stat(filename)
return st.st_size
else:
return 0
def wait_download(file_path):
current_size = getSize(file_path)
printed = False
start_time = time.time()
# loop over and over until the current_size change
while current_size !=getSize(file_path) or getSize(file_path)==0:
current_size = getSize(file_path)
# print something to the string to know that we are waiting for the file
while printed == False:
print("Downloading file...")
print("Waiting for download to complete...")
printed = True
# Here we add an exit to not loop forever
if (time.time() - start_time) > 15:
return -1
return 0
# In your code you can call the function like that
files = wait_download(f'{path_to_download}/exported.xlsx')
if files == 0:
#do something
else:
# the file didn't download

Email body cut off when adding to dataframe

I am trying to parse email data to a dataframe but the majority of the email body seems to disappear when I view the dataframe.
I have tried printing the body before adding to the dataframe and it appears to be parsed correctly but when I use iloc to add to the dataframe it cuts off.
from bs4 import BeautifulSoup
from html2text import HTML2Text
import pandas as pd
import easyimap
import getpass
import email
import base64
import os
import email
import mimetypes
from datetime import datetime
from email.utils import parsedate_to_datetime
def to_text(html, rehtml=False):
parser = HTML2Text()
parser.wrap_links = False
parser.skip_internal_links = True
parser.inline_links = True
parser.ignore_anchors = True
parser.ignore_images = True
parser.ignore_emphasis = True
parser.ignore_links = True
text = parser.handle(html)
text = text.strip(' \t\n\r')
if rehtml:
text = text.replace('\n', '<br/>')
text = text.replace('\\', '')
return text
imap_password = getpass.getpass()
user = 'pmccabe#tradevela.com\edc-notifications'
host = 'outlook.office365.com'
password = imap_password
#'
folders = ('"INBOX/Americas/Not Raised"', '"INBOX/Americas/Raised"', '"INBOX/APAC/Not Raised"', '"INBOX/APAC/Raised"',
'"INBOX/Consolidated/Not Raised"', '"INBOX/Consolidated/Raised"', '"INBOX/EMEA"', '"INBOX/EMEA/Not Raised"', '"INBOX/EMEA/Raised"')
df = pd.DataFrame(columns=['Subject','Sender','From','To','Body','References','content_type', 'local_date_time',
'Classification', 'in_reply_to','return_path', 'mime_version', 'message_id', 'folder_name'])
for mailbox in folders:
#Connect to mailbox read_only = True to ensure the mail is not marked as read.
imapper = easyimap.connect(host, user, password, mailbox,read_only=True)
#fetch each mail up to limit and return email data and add to a dataframe
for mail_id in imapper.listids(limit=5000):
try:
mail = imapper.mail(mail_id, include_raw=True)
#convert body to text using to_text function and add to dataframe
df.loc[mail_id, ['Body']] = to_text(mail.body, rehtml=False)
#return mail features to dataframe
df.loc[mail_id, ['Subject']] = mail.title
df.loc[mail_id, ['Sender']] = mail.sender
df.loc [mail_id, ['From']] = mail.from_addr
df.loc [mail_id, ['To']] = mail.to
df.loc [mail_id, ['References']] = mail.references
df.loc [mail_id, ['content_type']] = mail.content_type
#converting the date to datetime and taking account of time difference changes
date_= mail.date
df.loc [mail_id, ['local_date_time']] = datetime.fromtimestamp(parsedate_to_datetime(date_).timestamp()).strftime('%Y-%m-%d %H:%M:%S')
#parsing the keyword data from the raw mail data to provide the classification
raw_data = mail.raw
email_message = email.message_from_bytes(raw_data)
df.loc [mail_id, ['Classification']] = email_message['Keywords']
df.loc [mail_id, ['in_reply_to']] = mail.in_reply_to
df.loc [mail_id, ['return_path']] = mail.return_path
df.loc [mail_id, ['mime_version']] = mail.mime_version
df.loc [mail_id, ['message_id']] = mail.message_id
df.loc [mail_id, ['folder_name']] = mailbox
except:
#if error print email to file
counter = 1
for part in email_message.walk():
if part.get_content_maintype() == "multipart":
continue
filename = part.get_filename()
content_type = part.get_content_type()
if not filename:
ext = mimetypes.guess_extension(content_type)
if not ext:
ext = '.bin'
if 'text' in content_type:
ext = '.txt'
elif 'html' in content_type:
ext = '.html'
filename = 'msg-part-%08d%s' %(counter, ext)
counter += 1
#save file
date_ = datetime.fromtimestamp(parsedate_to_datetime(date_).timestamp()).strftime('%Y-%m-%d %H:%M:%S')
save_path = os.path.join(os.getcwd(), "emails", date_, mail.title)
if not os.path.exists(save_path):
os.makedirs(save_path)
with open(os.path.join(save_path, filename), 'wb') as fp:
fp.write(part.get_payload(decode=True))
counter += 1
data frame should contain all the email body content
Updated Jupyter notebook and this has fixed this issue.