PyQt4 & BeautifulSoup setting the Browser Window Size - beautifulsoup

I'm attempting to use PyQt4 to do some web scraping, but the site I'm attempting to scrape keeps thinking I'm a mobile device, and is not presenting the dataset available to a desktop or laptop (even though I'm using a Mozilla/5.0 user agent).
To try to find out why I'm setting my URL to "whatsmyuseragent.com". And I notice that it's telling me that although my Screen Resolution is 1920px x 1080px my Browser Window Size is 0px x 0px, so could this be the problem?
Here is my code below. Any suggestions on what I need to change to convince the site I'm scraping to believe I'm a desktop or laptop (rather than a mobile) would be appreciated. Thanks.
import sys
from PyQt4 import QtGui
from PyQt4.QtGui import QApplication
from PyQt4.QtCore import QUrl
from PyQt4.QtWebKit import QWebPage
from PyQt4.QtNetwork import QNetworkRequest
import bs4 as bs
import urllib.request
class Client(QWebPage):
def __init__ (self, url):
self.app = QApplication(sys.argv)
QWebPage.__init__(self)
self.loadFinished.connect(self.on_page_load)
self.request = QNetworkRequest()
self.request.setUrl(QUrl(url))
self.request.setRawHeader("User-Agent",'Mozilla/5.0')
self.mainFrame().load(self.request)
self.app.exec_()
def on_page_load (self):
self.app.quit()
url = 'http://www.whatsmyuseragent.com'
client_response = Client(url)
source = client_response.mainFrame().toHtml()
soup = bs.BeautifulSoup(source, 'lxml')
print(soup.prettify())

Try to set the Viewport Size:
import sys
import re
from PyQt4 import QtGui
from PyQt4.QtGui import QApplication
from PyQt4.QtCore import QUrl, QSize
from PyQt4.QtWebKit import QWebPage
from PyQt4.QtNetwork import QNetworkRequest
import bs4 as bs
class Client(QWebPage):
def __init__ (self, url):
self.app = QApplication(sys.argv)
QWebPage.__init__(self)
# good ol'size
size = QSize(640, 480)
self.setViewportSize(size)
self.loadFinished.connect(self.on_page_load)
self.request = QNetworkRequest()
self.request.setUrl(QUrl(url))
self.request.setRawHeader("User-Agent",'Mozilla/5.0')
self.mainFrame().load(self.request)
self.app.exec_()
def on_page_load (self):
self.app.quit()
url = 'http://www.whatsmyuseragent.com'
client_response = Client(url)
source = client_response.mainFrame().toHtml()
soup = bs.BeautifulSoup(source, 'lxml')
# some meat from the soup
print(re.sub('\s+', ' ', soup.find(class_='browser-window').text))
This produce the following for me:
625 px x 465 px

Related

How to track pyqt5 crash errors?

I use QWebEngineView to log in to the telegram web.
I would like to be able to save the session in the profile.
When I try to load a profile with an authorized session, my application abruptly terminates.
Is there anything I can do to track the errors that occurred during startup?
import sys
from PyQt5.QtWidgets import *
from PyQt5.QtWebEngineWidgets import *
from PyQt5.QtCore import *
class Window(QMainWindow):
def __init__(self):
super(Window,self).__init__()
self.browser = QWebEngineView()
profile = QWebEngineProfile(f"profile", self.browser)
webpage = QWebEnginePage(profile, self.browser)
self.browser.setPage(webpage)
self.browser.load(QUrl('https://web.telegram.org/z'))
self.browser.show()
if __name__ == "__main__":
MyApp = QApplication(sys.argv)
window = Window()
MyApp.exec_()

how to make multi window using pyqt5

everybody!
Here's what I'm trying to do:
First, I created a file called basic.ui with a pushbutton.
Press the pushbutton to set the path and select the picture file in the set path to want the picture to appear in a new window(another window).
Let me tell you about the part where I am having difficulty.
The command to blow up the file is executed. Selecting the file causes an error.
I wonder what the cause is the problem.
How do I get the picture to appear in a new window(another window) after pressing the button?????
MainActivity.python
import sys
try:
from PyQt5.QtCore import Qt, QT_VERSION_STR
from PyQt5.QtGui import QImage
from PyQt5.QtWidgets import QApplication, QFileDialog
except ImportError:
try:
from PyQt4.QtCore import Qt, QT_VERSION_STR
from PyQt4.QtGui import QImage, QApplication, QFileDialog
except ImportError:
raise ImportError("Requires PyQt5 or PyQt4.")
from QtImageViewer import QtImageViewer
from PyQt5 import QtWidgets
from PyQt5 import QtGui
from PyQt5 import uic
from PyQt5.QtCore import pyqtSlot
class From(QtWidgets.QDialog):
def __init__(self, parent=None):
QtWidgets.QDialog.__init__(self, parent)
self.ui=uic.loadUi("basic.ui",self)
self.ui.show()
#pyqtSlot()
def add_number(self):
app = QApplication(sys.argv)
viewer=QtImageViewer()
fileName, dummy = QFileDialog.getOpenFileName(None, "Open image file...")
image = QImage(fileName)
viewer.setImage(image)
viewer.show()
sys.exit(app.exec_())
if __name__=='__main__':
app=QtWidgets.QApplication(sys.argv)
w=From()
sys.exit(app.exec())
Signals & Slots
Signals and slots are used for communication between objects. The signals and slots mechanism is a central feature of Qt and probably the part that differs most from the features provided by other frameworks. Signals and slots are made possible by Qt's meta-object system.
http://doc.qt.io/qt-5/signalsandslots.html
Try it:
import sys
from os import getcwd
from PyQt5 import QtWidgets
from PyQt5 import QtGui
from PyQt5 import uic
from PyQt5.QtCore import Qt, pyqtSlot
class From(QtWidgets.QDialog):
def __init__(self, parent=None):
super(From, self).__init__(parent)
self.ui=uic.loadUi("basic.ui",self)
self.ui.pushButton.clicked.connect(self.add_number) # +++
# ^^^^^^^^^^ vvvvvvvvvv
# <widget class="QPushButton" name="pushButton"> <---> "basic.ui"
self.ui.labelImagen = QtWidgets.QLabel()
self.ui.show()
#pyqtSlot()
def add_number(self):
fileName, dummy = QtWidgets.QFileDialog.getOpenFileName(
self, "Open image file...", getcwd(),
"Image (*.png *.jpg)",
options=QtWidgets.QFileDialog.Options())
if fileName:
pix = QtGui.QPixmap(fileName)
self.ui.labelImagen.setPixmap(pix)
self.ui.labelImagen.show()
if __name__=='__main__':
app=QtWidgets.QApplication(sys.argv)
w=From()
sys.exit(app.exec())

How save a Matplotlib figure with Pickle in a Pyqt5 environment?

I'm facing an issue and I cannot get rid of it.
I'm trying to use the Pickle package in order to save a matplotlib figure to replot it if i want to.
So far I have the below code which open a Qt window and plot some curves in it if the 'if' condition in lfpViewer.__Init__() is 1 (I put 0 only to check the pickle load function).
So I added, to the toolbar, two buttons where I can save a .pickle of the current figure or load a .pickle from a previous figure.
import pickle
from PyQt5.QtGui import *
from PyQt5.QtCore import *
from PyQt5.QtWidgets import *
import sys
import os
import matplotlib
import numpy as np
matplotlib.use('Qt5Agg')
import matplotlib.patches as patches
from matplotlib.figure import Figure
import matplotlib.pyplot as plt
from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg as FigureCanvas
from matplotlib.backends.backend_qt5agg import NavigationToolbar2QT as NavigationToolbar
class SurfViewer(QMainWindow):
def __init__(self, parent=None):
super(SurfViewer, self).__init__()
self.parent = parent
self.centralWidget = QWidget()
self.color = self.centralWidget.palette().color(QPalette.Background)
self.setCentralWidget(self.centralWidget)
self.plotview = QGroupBox(" ")
self.layout_plotview = QVBoxLayout()
self.mascenelfp = lfpViewer(self)
self.layout_plotview.addWidget(self.mascenelfp)
self.centralWidget.setLayout(self.layout_plotview)
class lfpViewer(QGraphicsView):
def __init__(self, parent=None):
super(lfpViewer, self).__init__(parent)
self.parent=parent
self.scene = QGraphicsScene(self)
self.setScene(self.scene)
self.setBackgroundBrush(QBrush(self.parent.color))# self.setBackgroundBrush(QBrush(QColor(200, 200, 200)))
self.figure = plt.figure(facecolor=[self.parent.color.red()/255,self.parent.color.green()/255,self.parent.color.blue()/255]) #Figure()
self.canvas = FigureCanvas(self.figure)
self.toolbar = NavigationToolbar(self.canvas, self)
self.save_button = QPushButton()
self.save_button.setIcon(QIcon(os.path.join('icons','SaveData.png')))
self.save_button.setToolTip("Save Figure Data")
self.toolbar.addWidget(self.save_button)
self.save_button.clicked.connect(self.saveFigData)
self.load_button = QPushButton()
self.load_button.setIcon(QIcon(os.path.join('icons','LoadData.png')))
self.load_button.setToolTip("Load Figure Data")
self.toolbar.addWidget(self.load_button)
self.load_button.clicked.connect(self.loaddatapickle)
if 0:
t=np.arange(1000)
self.axes_l=self.figure.add_subplot(311)
self.axes_l.plot(t, np.sin(2*3.14*100*t))
self.axes_Y=self.figure.add_subplot(312)
self.axes_Y.plot(t, np.cos(2*3.14*100*t))
self.axes_Yi=self.figure.add_subplot(313)
self.axes_Yi.plot(t, np.tan(2*3.14*100*t))
self.canvas.setGeometry(0, 0, 1600, 500 )
layout = QVBoxLayout()
layout.addWidget(self.toolbar)
layout.addWidget(self.canvas)
self.setLayout(layout)
def loaddatapickle(self):
fileName = QFileDialog.getOpenFileName(self,'Load Data', '', 'pickle (*.pickle)')
if (fileName[0] == '') :
return
fileName = str(fileName[0])
filehandler = open(fileName , 'rb')
self.figure = pickle.load(filehandler)
filehandler.close()
self.canvas.draw()
self.parent.parent.processEvents()
return
def saveFigData(self):
fileName = QFileDialog.getSaveFileName(self,'Save Figure Data', '', 'pickle (*.pickle)')
if (fileName[0] == '') :
return
fileName = str(fileName[0])
file_pi = open(fileName, 'wb')
pickle.dump(self.figure, file_pi, -1)
file_pi.close()
return
def main():
app = QApplication(sys.argv)
ex = SurfViewer(app)
ex.showMaximized()
sys.exit(app.exec_())
if __name__ == '__main__':
main()
The save seems works (well, at least, I have a file), but the load button do absolutly nothing !
Even If I have a .pickle file, I don't know if pickle save the correct binary of the figure because when I load the pickle file in debug mode, I get lot of red stuff.
Look for the below image :
If I do the code without PyQt5, it works fine, for instance, with the below code:
import pickle
import matplotlib
import numpy as np
matplotlib.use('Qt5Agg')
import matplotlib.pyplot as plt
def loaddatapickle():
filehandler = open('test.pickle' , 'rb')
figure = pickle.load(filehandler )
filehandler.close()
return figure
def saveFigData(figure):
file_pi = open('test.pickle', 'wb')
pickle.dump(figure , file_pi, 1)
file_pi.close()
return
figure = plt.figure( ) #Figure()
Save= 0
if Save==1:
t=np.arange(1000)
axes_l=figure.add_subplot(311)
axes_l.plot(t, np.sin(2*3.14*100*t))
axes_Y=figure.add_subplot(312)
axes_Y.plot(t, np.cos(2*3.14*100*t))
axes_Yi=figure.add_subplot(313)
axes_Yi.plot(t, np.tan(2*3.14*100*t))
saveFigData(figure)
else:
figure=loaddatapickle()
plt.show()
If somebody have an idea of what is going on here, please tell me !
Have a nice day.
I can't be sure if this solves the problem you are facing, but let's give it a try. I need to mention, I don't have QT5 available and I'm working with python 2.7 and matplotlib 2.0.0. But the solution here might be valid for general cases.
When adapting the program to pyqt4 and running it, I found out that the pickling works fine. Also the unpickling did not throw any error, so I suspected that there might be a problem of displaying the unpickled figure.
What turns out to allow loading the figure is to not only load the figure into self.figure but to recreate the canvas with this unpickled figure and newly add it to the layout:
def loaddatapickle(self):
#needed to change some stuff here, since in Qt4 the dialog directly returns a string
fileName = QFileDialog.getOpenFileName(self,'Load Data', '' )
if (fileName == '') :
return
fileName = str(fileName)
filehandler = open(fileName , 'rb')
self.figure = pickle.load(filehandler)
filehandler.close()
# remove the old canvas
self.layout().removeWidget(self.canvas)
# create a new canvas
self.canvas = FigureCanvas(self.figure)
# add the new canvas at the position of the old one
self.layout().addWidget(self.canvas, 1)
self.canvas.draw()
self.parent.parent.processEvents()
return
Of course it would be better to directly update the canvas with the new figure, but I haven't found any way to do that.

How to change ipython qtconsole input

I'm making a guide with pyqt and I'm including an ipython qtconsole widget.
try:
from qtconsole.rich_jupyter_widget import RichJupyterWidget as ipythonWidget
from qtconsole.inprocess import QtInProcessKernelManager
except:
from IPython.qt.console.rich_ipython_widget import RichIPythonWidget as ipythonWidget
from IPython.qt.inprocess import QtInProcessKernelManager
I want to modify the qtconsole input from my code but is not working. I've tried the set_next_input function but it doesn't work and I can't find another function I can use to acomplish what I want. Is even possible to achieve what I want? and if so, how can I do it?
Here is my code:
try:
from qtconsole.rich_jupyter_widget import RichJupyterWidget as ipythonWidget
from qtconsole.inprocess import QtInProcessKernelManager
except:
from IPython.qt.console.rich_ipython_widget import RichIPythonWidget as ipythonWidget
from IPython.qt.inprocess import QtInProcessKernelManager
import sys
from PyQt4 import QtGui
class sympyIpython(QtGui.QWidget):
def __init__(self):
super().__init__()
self.ipython = IpythonWidget()
v = QtGui.QVBoxLayout(self)
button = QtGui.QPushButton('append to input')
v.addWidget(self.ipython)
v.addWidget(button)
button.clicked.connect(self.symClicked)
def symClicked(self):
self.ipython.kernel.shell.set_next_input(' appended text')
class IpythonWidget(ipythonWidget):
def __init__(self):
super().__init__()
self.kernel_manager = QtInProcessKernelManager()
self.kernel_manager.start_kernel()
self.kernel = self.kernel_manager.kernel
self.kernel.gui = 'qt4'
self.kernel_client = self.kernel_manager.client()
self.kernel_client.start_channels()
if __name__ == '__main__':
app = QtGui.QApplication(sys.argv)
m = sympyIpython()
m.show()
sys.exit(app.exec_())
Reposting as an answer:
To change the text at the prompt in the Qt console, set input_buffer on the widget object:
jupyter_widget.input_buffer = 'text'

Spider to download images does not seem to work though i have pil installed

I am a newbie to scrapy.I am trying to write a spider to download images.for using the image pipeline,is installing PIL sufficient?My PIL is located in
/usr/lib/python2.7/dist-packages/PIL
How do I include it in my Scrapy project?
settings file:
BOT_NAME = 'paulsmith'
BOT_VERSION = '1.0'
ITEM_PIPELINES = ['scrapy.contrib.pipeline.images.ImagesPipeline']
IMAGE_STORE = '/home/jay/Scrapy/paulsmith/images'
SPIDER_MODULES = ['paulsmith.spiders']
NEWSPIDER_MODULE = 'paulsmith.spiders'
DEFAULT_ITEM_CLASS = 'paulsmith.items.PaulsmithItem'
USER_AGENT = '%s/%s' % (BOT_NAME, BOT_VERSION)
Items file:
from scrapy.item import Item, Field
class PaulsmithItem(Item):
image_urls=Field()
image = Field()
pass
Spider code
from scrapy.spider import BaseSpider
from scrapy.selector import HtmlXPathSelector
from paulsmith.items import PaulsmithItem
class PaulSmithSpider(BaseSpider):
name="Paul"
allowed_domains=["http://www.paulsmith.co.uk/uk-en/shop/mens"]
start_urls=["http://www.paulsmith.co.uk/uk-en/shop/mens/jeans"]
def parse(self,response):
item= PaulsmithItem()
#open('paulsmith.html','wb').write(response.body)
hxs=HtmlXPathSelector(response)
#sites=hxs.select('//div[#class="category-products"]')
item['image_urls']=hxs.select("//div[#class='category-products']//a/img/#src").extract()
#for site in sites:
#print site.extract()
#image = site.select('//a/img/#src').extract()
return item
SPIDER = PaulSmithSpider()
You may have not set IMAGES_STORE = '/path/to/valid/dir'
morever, try to use a Custom Images pipeline like this:
from scrapy.contrib.pipeline.images import ImagesPipeline
from scrapy.exceptions import DropItem
from scrapy.http import Request
class MyImagesPipeline(ImagesPipeline):
def get_media_requests(self, item, info):
for image_url in item['image_urls']:
yield Request(image_url)
def item_completed(self, results, item, info):
image_paths = [x['path'] for ok, x in results if ok]
if not image_paths:
raise DropItem("Item contains no images")
item['image_paths'] = image_paths
return item
You can check whether image_urls are requested from method "get_media_requests"
reference: http://doc.scrapy.org/en/latest/topics/images.html