QWebEngineView and background transparency - pyqt5

I'm using python 3.6.5 and PyQt 5.10.1.
I was trying with this simple code to have background transparency and after to add this feature to qutebrowser.
https://github.com/Rhylx/browser_bg_transparency
But it doesn't work. I have a webpage with a white background. Have someone an idea or a clue that could help me to fix it ?
Do you think it could be done with QWebEnginePage ?

Try it:
import sys
import argparse
from PyQt5.QtCore import Qt
from PyQt5.QtCore import QUrl
from PyQt5.QtWidgets import QApplication
from PyQt5.QtWebEngineWidgets import QWebEngineView
def parse_args():
"""Parse commandline arguments."""
parser = argparse.ArgumentParser()
parser.add_argument('url', help='The URL to open')
return parser.parse_known_args()[0]
if __name__ == '__main__':
args = parse_args()
app = QApplication(sys.argv)
wv = QWebEngineView()
wv.loadStarted.connect(lambda: print("Loading started"))
wv.loadProgress.connect(lambda p: print("Loading progress: {}%".format(p)))
wv.loadFinished.connect(lambda: print("Loading finished"))
wv.setWindowFlags(Qt.FramelessWindowHint)
wv.setAttribute(Qt.WA_TranslucentBackground, True)
#wv.setStyleSheet("background:transparent;")
wv.load(QUrl.fromUserInput(args.url))
wv.setWindowOpacity(0.6) # +++
wv.setWindowFlags(Qt.WindowStaysOnBottomHint) # +++
wv.show()
app.exec_()
test.html
<!DOCTYPE html>
<html>
<head>
<style>
body {
background-color: rgba(255,0,0,255);
color: blue;
font-size: 36px;
}
</style>
</head>
<body>
HELLO WORLD !!
</body>
</html>

Related

TinyMCE ERR_ABORTED 404 (Not Found) skins (vue)

I'm facing the console error issue with tinymce on vue.js. It works correctly both localhost and server hovewer editor throws me error on server (doesn't throw error on localhost).
console errors
Here is my import section
[I've tried import files both dynamically and statically, hovewer errors continue to exist]
<script src="https://cdnjs.cloudflare.com/ajax/libs/vue/2.5.17/vue.js">
import Editor from "#tinymce/tinymce-vue";
import "tinymce/tinymce";
// Theme
import "../../../../../../node_modules/tinymce/themes/silver/theme";
// Skins
import "../../../../../../node_modules/tinymce/skins/ui/oxide/skin.min.css";
import "../../../../../../node_modules/tinymce/skins/ui/oxide/content.min.css";
import "../../../../../../node_modules/tinymce/skins/content/default/content.min.css";
// Plugins
import "../../../../../../node_modules/tinymce/plugins/fullscreen";
import "../../../../../../node_modules/tinymce/plugins/paste";
import "../../../../../../node_modules/tinymce/plugins/autoresize";
import "../../../../../../node_modules/tinymce/icons/default";
</script>
I've solved my issue with writing :init="{content_css: false, skin: false}" property to editor tag

Is there a way to run code after reactor.run() in scrapy?

I am working on a scrapy api. One of my issues was that the twisted reactor wasn't restartable. I fixed this using crawl runner as opposed to crawl process. My spider extracts links from a website, validates them. My issue is that if I add the validation code after reactor.run() it doesn't work. This is my code:
from scrapy.spiders import CrawlSpider, Rule
from scrapy.linkextractors import LinkExtractor
from scrapy.crawler import CrawlerRunner
from scrapy.utils.log import configure_logging
from twisted.internet import reactor
from urllib.parse import urlparse
list = set([])
list_validate = set([])
runner = CrawlerRunner()
class Crawler(CrawlSpider):
name = "Crawler"
start_urls = ['https:www.example.com']
allowed_domains = ['www.example.com']
rules = [Rule(LinkExtractor(), callback='parse_links', follow=True)]
configure_logging({'LOG_FORMAT': '%(levelname)s: %(message)s'})
def parse_links(self, response):
base_url = url
href = response.xpath('//a/#href').getall()
list.add(urllib.parse.quote(response.url, safe=':/'))
for link in href:
if base_url not in link:
list.add(urllib.parse.quote(response.urljoin(link), safe=':/'))
for link in list:
if base_url in link:
list_validate.add(link)
runner.crawl(Crawler)
reactor.run()
If add the code that validates the links after reactor.run(), it doesn't get executed. And if I put the code before reactor.run(), nothing happens because the spider hasn't yet finished crawling all the links. What should I do? The code that validates the links is perfectly fine I used it before and it works.
We can do that with d.addCallback(<callback_function>) and d.addErrback(<errback_function>)
...
runner = CrawlerRunner()
d = runner.crawl(MySpider)
def finished(d):
print("finished :D")
def spider_error(err):
print("Spider error :/")
d.addCallback(finished)
d.addErrback(spider_error)
reactor.run()
For your ScraperApi you can use Klein.
Klein is a micro-framework for developing production-ready web services with Python. It is 'micro' in that it has an incredibly small API similar to Bottle and Flask.
...
import scrapy
from scrapy.crawler import CrawlerRunner
from klein import Klein
app=Klein()
#app.route('/')
async def hello(request):
status=list()
class TestSpider(scrapy.Spider):
name='test'
start_urls=[
'https://quotes.toscrape.com/',
'https://quotes.toscrape.com/page/2/',
'https://quotes.toscrape.com/page/3/',
'https://quotes.toscrape.com/page/4/'
]
def parse(self,response):
"""
parse
"""
status.append(response.status)
runner=CrawlerRunner()
d= await runner.crawl(TestSpider)
content=str(status)
return content
#app.route('/h')
def index(request):
return 'Index Page'
app.run('localhost',8080)

Having difficulty parsing website with selenium and python

Background:
I am trying to scrape information from a link but I cannot seem to get the HTML source code to further parse it.
Link:
https://www.realestate.com.au/buy/property-house-in-vaucluse,+nsw+2030/list-1?source=refinement
Code:
chrome_options = webdriver.ChromeOptions()
preferences = {"safebrowsing.enabled": "false"}
chrome_options.add_experimental_option("prefs", preferences)
chrome_options.add_argument('--disable-gpu')
browser = webdriver.Chrome('link_to_chrome_driver.exee', chrome_options=chrome_options)
url = property_link
print(url)
browser.get(url)
delay = 20 # seconds
try:
WebDriverWait(browser, delay).until(EC.element_to_be_clickable((By.CSS_SELECTOR,'rui-button-brand pagination__link-next')))
time.sleep(10)
except:
pass
html = browser.page_source
soup = BeautifulSoup(html)
print(soup)
Output:
<html lang="en"><head>
<meta charset="utf-8"/>
<link href="about:blank" rel="shortcut icon"/>
</head>
<body>
<script src="/149e9513-01fa-4fb0-aad4-566afd725d1b/2d206a39-8ed7-437e-a3be-862e0f06eea3/j.js"></script>
<script src="/149e9513-01fa-4fb0-aad4-566afd725d1b/2d206a39-8ed7-437e-a3be-862e0f06eea3/f.js"></script>
<script src="/149e9513-01fa-4fb0-aad4-566afd725d1b/2d206a39-8ed7-437e-a3be-862e0f06eea3/fingerprint/script/kpf.js?url=/149e9513-01fa-4fb0-aad4-566afd725d1b/2d206a39-8ed7-437e-a3be-862e0f06eea3/fingerprint&token=d33b4707-4c3a-5fbb-8de6-b6889ed26c7d"></script><div></div>
</body></html>
Question:
I don't understand what is going wrong - but when I manually load the site from the any browser - the html script is significantly different. However parsing the site with selenium/bs is far too problematic - What am I doing wrong?
Your CSS selector is incorrect.
Try to edit the css selector as below:
.rui-button-brand.pagination__link-next
Refer to: https://www.w3schools.com/cssref/css_selectors.asp

Selenium skips clicking on an element for no apparent reason if I place a different image in the src attribute of the img tag?

This is ridiculous. Why does it happen??
HTML source code:
<!DOCTYPE html>
<html>
<head>
<title>WTF</title>
<meta charset="utf-8" />
</head>
<body id="b">
<map name="Map" id="Map">
<area
id="clickhereyoustupidselenium" alt="" title=""
href="javascript:document.getElementById('b').innerHTML = 'adsf'"
shape="poly" coords="51,29,155,25,247,87,156,129,52,132,23,78,84,56,104,35" />
<img usemap="#Map" src="http://placehold.it/350x150" alt="350 x 150 pic">
</map>
</body>
</html>
Selenium test code:
from django.contrib.staticfiles.testing import StaticLiveServerTestCase
from selenium.webdriver.firefox.webdriver import WebDriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support.expected_conditions import text_to_be_present_in_element
from selenium.webdriver.common.by import By
class SeleniumTest(StaticLiveServerTestCase):
#classmethod
def setUpClass(cls):
super(SeleniumTest, cls).setUpClass()
cls.selenium = WebDriver()
#classmethod
def tearDownClass(cls):
cls.selenium.quit()
super(SeleniumTest, cls).tearDownClass()
def test_wtf(self):
self.selenium.get('%s%s' % (self.live_server_url, '/'))
self.selenium.find_element_by_id('clickhereyoustupidselenium').click()
WebDriverWait(self.selenium, 100).until(text_to_be_present_in_element((By.TAG_NAME, "body"), "adsf"))
self.assertEqual(self.selenium.find_element_by_tag_name('body').text, 'adsf')
The test passes beautifully.
OK, so now let's replace src="http://placehold.it/350x150" with a different image, let's say this one: src="https://upload.wikimedia.org/wikipedia/commons/thumb/b/bf/POL_location_map.svg/500px-POL_location_map.svg.png":
<!DOCTYPE html>
<html>
<head>
<title>WTF</title>
<meta charset="utf-8" />
</head>
<body id="b">
<map name="Map" id="Map">
<area
id="clickhereyoustupidselenium" alt="" title=""
href="javascript:document.getElementById('b').innerHTML = 'adsf'"
shape="poly" coords="51,29,155,25,247,87,156,129,52,132,23,78,84,56,104,35" />
<img usemap="#Map" src="https://upload.wikimedia.org/wikipedia/commons/thumb/b/bf/POL_location_map.svg/500px-POL_location_map.svg.png" alt="350 x 150 pic">
</map>
</body>
</html>
Let's not touch Selenium code not a teeny tiny bit.
Result? Selenium raises: selenium.common.exceptions.TimeoutException
And indeed, the Firefox window that shows up still shows the map of Poland, and not 'adsf'. If I click on this area in the Firefox window that shows up until the timeout of 100 seconds passes then Selenium immediately concludes the test has passed. But it was Selenium that was supposed to click on this element!!
What is happening and how to stop this madness?
Geckodriver 0.18.0. Selenium 3.5.0. Firefox 55.0.2. Python 3.5.2. And, if this matters, the dev server is Django 1.11.4.
The root cause is size of <area> on GeckoDriver is incorrect. Selenium WebDriver tries to click at the middle of the element but size of the area equals to the map. So Selenium clicks on a wrong position.
You can calculate the position and force Selenium to click at the position. See code below.
area = driver.find_element_by_id('clickhereyoustupidselenium')
coords = area.get_attribute("coords").split(',')
coordsNumbers = [ int(p) for p in coords ]
x = filter(lambda p: p % 2 != 0, coordsNumbers)
y = filter(lambda p: p % 2 == 0, coordsNumbers)
middleX = (max(x) - min(x))/2
middley = (max(y) - min(y))/2
action = webdriver.common.action_chains.ActionChains(driver)
action.move_to_element_with_offset(area, middleX, middley)
action.click()
action.perform()
WebDriverWait(driver, 100).until(EC.text_to_be_present_in_element((By.TAG_NAME, "body"), "adsf"))
print("Message found")
driver.quit()

How to parse extJS using Jsoup?

I am trying to parse the html of this link "http://dev.sencha.com/extjs/5.0.0/examples/desktop/index.html" using jsoup but all it gives me is
<html>
<head>
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<title>Desktop</title>
<!-- The line below must be kept intact for Sencha Cmd to build your application -->
<script type="text/javascript">
<!-- here it shows some script -->
</script>
</head>
<body>
</body>
</html>
How to extract attributes of the notepad icon there so that i can click on that using webdriver?
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.firefox.FirefoxDriver;
public class Main {
public static void main(String[] args) throws Exception {
WebDriver driver = new FirefoxDriver();
driver.get("http://dev.sencha.com/extjs/5.0.0/examples/desktop/index.html");
WebElement notepadIcon = driver.findElement(By.id("Notepad-shortcut"));
notepadIcon.click();
WebElement notepadDiv = driver.findElement(By.id("notepad"));
driver.switchTo().frame(notepadDiv.findElement(By.id("notepad-editor-inputCmp-iframeEl")));
//Now you have the notepad DOM
System.out.println("Page title is: " + driver.getPageSource());
driver.quit();
}
}
Side note:
I tried using HtmlUnitDriver but as you can see here there seems to be a problem.
Quoting user nlotz
The JavaScript engine used by HtmlUnit chokes on the :first-childselector (used in >= 2.2.1).
Workaround:
Ext.override(Ext.Button, { buttonSelector: 'button:first' }); (reverts
to the selector used in <= 2.2)
Update
The problem with ChromeDriver was that it was too fast. I had the same exception (NoSuchElement) until I put the debugger on. Then everything worked, because the breakpoint gave the web app the time
to finish executing the javascript code it needed. After some research this is what I came up with
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions;
import org.openqa.selenium.support.ui.ExpectedConditions;
import org.openqa.selenium.support.ui.WebDriverWait;
public class Main {
public static void main(String[] args) throws Exception {
System.setProperty("webdriver.chrome.driver", "PATH_TO_CHROMEDRIVER.EXE");
ChromeOptions options = new ChromeOptions();
options.addArguments("test-type");
WebDriver driver = new ChromeDriver(options);
driver.get("http://dev.sencha.com/extjs/5.0.0/examples/desktop/index.html");
//This sets an expected condition for the icon. It has to be clickable in 5 seconds.
//After that an Exception will be raised
WebElement notepadIcon = (new WebDriverWait(driver, 5))
.until(ExpectedConditions.elementToBeClickable(By.id("Notepad-shortcut")));
notepadIcon.click();
WebElement notepadDiv = driver.findElement(By.id("notepad"));
driver.switchTo().frame(notepadDiv.findElement(By.id("notepad-editor-inputCmp-iframeEl")));
//Now you have the notepad DOM
System.out.println("Page source: " + driver.getPageSource());
driver.quit();
}
}