I am running a webscraper with selenium to get some data on the NBA. I have urls to get to the websites for each of the 30 teams, but when I run the code it only gets through a few of the urls and then crashes with the errors below being shown:
#web scraper
from bs4 import BeautifulSoup
import requests
from selenium import webdriver
import pandas as pd
import os
class NBAScraper:
def __init__(self):
#part 1
url = "https://www.nba.com/teams"
HTML = requests.get(url)
soup = BeautifulSoup(HTML.text, 'html.parser')
text = str(soup.find_all("a", "Anchor_anchor__cSc3P TeamFigureLink_teamFigureLink__uqnNO"))
ids = []
for i in range(0, 30):
hr = text.find("stats")
ids.append(text[(hr+11):(hr+21)])
text = text[(hr+22):]
#part 2
names = []
for j in range(0, 30):
url2 = "https://www.nba.com/stats/team/"+str(ids[j])+"/advanced"
HTML2 = requests.get(url2)
soup2 = BeautifulSoup(HTML2.text, 'html.parser')
##div class="TeamHeader_name__MmHlP
name = str(soup2.find("div", "TeamHeader_name__MmHlP"))
ni = name.find("div>")
ni2 = name.find("<!")
name1 = name[(ni+4):ni2]
name = name[ni2:]
ni3 = name.find("<div>")
name = name[(ni3+5):]
ni4 = name.find("</div>")
name2 = name[:ni4]
n = name1 + " " + name2
names.append(n)
##tbody class="Crom_body__UYOcU"
#part 3
offrtg = []
defrtg = []
reb = []
tov = []
efg = []
for k in range(0, 30):
self.driver = webdriver.Chrome()
url3 = "https://www.nba.com/stats/team/"+str(ids[k])+"/advanced"
self.driver.get(url3)
rndrhtml = self.driver.page_source
self.driver.close()
#self.driver.quit()
soup3 = BeautifulSoup(rndrhtml, 'html.parser')
ovrall = str(soup3.find("tbody", "Crom_body__UYOcU").find_all("td"))
for d in range(0, 13):
di = ovrall.find("<td>")
ovrall = ovrall[(di+4):]
#conditions
if d == 2:
di2 = ovrall.find("</td>")
offrtg.append(float(ovrall[:di2]))
elif d == 3:
di2 = ovrall.find("</td>")
defrtg.append(float(ovrall[:di2]))
elif d == 10:
di2 = ovrall.find("</td>")
reb.append(float(ovrall[:di2]))
elif d == 11:
di2 = ovrall.find("</td>")
tov.append(float(ovrall[:di2]))
elif d == 12:
di2 = ovrall.find("</td>")
efg.append(float(ovrall[:di2]))
#writing to excel
os.remove(r"C:\Users\jackm\OneDrive\Desktop\NBA\NBASTATS.xlsx")
d = {'Name': names, 'OFFRTG': offrtg, 'DEFRTG': defrtg, 'REB': reb,
'TOV': tov, 'EFG': efg}
df = pd.DataFrame(data=d)
df.to_excel(r"C:\Users\jackm\OneDrive\Desktop\NBA\NBASTATS.xlsx", sheet_name="STATS")
NBAScraper()
I tried to play around with the closing and quitting functions for the driver, or put the driver in a separate function and run it outside the class, but none of that worked. I realized through some testing that even if it's not inside a loop, selenium will throw the error for a url but run it fine the second time. I tried using implicit waits to solve this but to no avail.
Traceback (most recent call last):
File "C:\Program Files\Spyder\pkgs\spyder_kernels\py3compat.py", line 356, in compat_exec
exec(code, globals, locals)
File "c:\users\jackm\spyder\nba.py", line 104, in <module>
NBAScraper()
File "c:\users\jackm\spyder\nba.py", line 71, in __init__
ovrall = str(soup3.find("tbody", "Crom_body__UYOcU").find_all("td"))
AttributeError: 'NoneType' object has no attribute 'find_all'
Hope you help me, I want to get flow info from switch and that by sending a request every 10s and the switch reply with the info but I get the following error when the controller receive the reply by using a flow request reply handler The error is occuring because of flow matching 'eth_type'
CollectTrainingStatsApp: Exception occurred during handler processing. Backtrace from offending handler [_flow_stats_reply_handler] servicing event [EventOFPFlowStatsReply] follows.
Traceback (most recent call last):
File "/usr/local/lib/python2.7/dist-packages/ryu/base/app_manager.py", line 290, in _event_loop
handler(ev)
File "/home/guenfaf/Documents/Training ryu/data_to_csv/data_to_csv.py", line 59, in _flow_stats_reply_handler
for stat in sorted([flow for flow in body if (flow.priority == 1) ], key=lambda flow:
File "/home/guenfaf/Documents/Training ryu/data_to_csv/data_to_csv.py", line 60, in <lambda>
(flow.match['eth_type'],flow.match['ipv4_src'],flow.match['ipv4_dst'],flow.match['ip_proto'])):
File "/usr/local/lib/python2.7/dist-packages/ryu/ofproto/ofproto_v1_3_parser.py", line 904, in __getitem__
return dict(self._fields2)[key]
KeyError: 'eth_type'
Here is my code :
from ryu.app import simple_switch_13
from ryu.controller import ofp_event
from ryu.controller.handler import MAIN_DISPATCHER, DEAD_DISPATCHER
from ryu.controller.handler import set_ev_cls
from ryu.lib import hub
from time import time
# class CollectTrainingStatsApp(simple_switch_13.SimpleSwitch13):
class CollectTrainingStatsApp(simple_switch_13.SimpleSwitch13):
def __init__(self, *args, **kwargs):
super(CollectTrainingStatsApp, self).__init__(*args, **kwargs)
self.datapaths = {}
self.monitor_thread = hub.spawn(self.monitor)
file0 = open("FlowStatsfile.csv","w")
file0.write('datapath_id,flow_id,ip_src,tp_src,ip_dst,tp_dst,ip_proto,flow_duration_sec,flow_duration_nsec,idle_timeout,hard_timeout,flags,packet_count,byte_count,packet_count_per_second,packet_count_per_nsecond,byte_count_per_second,byte_count_per_nsecond,label\n')
file0.close()
#Asynchronous message
#set_ev_cls(ofp_event.EventOFPStateChange,[MAIN_DISPATCHER, DEAD_DISPATCHER])
def state_change_handler(self, ev):
datapath = ev.datapath
if ev.state == MAIN_DISPATCHER:
if datapath.id not in self.datapaths:
self.logger.debug('register datapath: %016x', datapath.id)
self.datapaths[datapath.id] = datapath
elif ev.state == DEAD_DISPATCHER:
if datapath.id in self.datapaths:
self.logger.debug('unregister datapath: %016x', datapath.id)
del self.datapaths[datapath.id]
def monitor(self):
while True:
for dp in self.datapaths.values():
self.request_stats(dp)
hub.sleep(10)
def request_stats(self, datapath):
self.logger.debug('send stats request: %016x', datapath.id)
parser = datapath.ofproto_parser
req = parser.OFPFlowStatsRequest(datapath)
datapath.send_msg(req)
#set_ev_cls(ofp_event.EventOFPFlowStatsReply, MAIN_DISPATCHER)
def _flow_stats_reply_handler(self, ev):
#timestamp = time.time()
tp_src = 0
tp_dst = 0
file0 = open("FlowStatsfile.csv","a+")
body = ev.msg.body
for stat in sorted([flow for flow in body if (flow.priority == 1) ], key=lambda flow:
(flow.match['eth_type'],flow.match['ipv4_src'],flow.match['ipv4_dst'],flow.match['ip_proto'])):
ip_src = stat.match['ipv4_src']
ip_dst = stat.match['ipv4_dst']
ip_proto = stat.match['ip_proto']
if stat.match['ip_proto'] == 1:
icmp_code = stat.match['icmpv4_code']
icmp_type = stat.match['icmpv4_type']
elif stat.match['ip_proto'] == 6:
tp_src = stat.match['tcp_src']
tp_dst = stat.match['tcp_dst']
elif stat.match['ip_proto'] == 17:
tp_src = stat.match['udp_src']
tp_dst = stat.match['udp_dst']
flow_id = str(ip_src) + str(tp_src) + str(ip_dst) + str(tp_dst) + str(ip_proto)
try:
packet_count_per_second = stat.packet_count/stat.duration_sec
packet_count_per_nsecond = stat.packet_count/stat.duration_nsec
except:
packet_count_per_second = 0
packet_count_per_nsecond = 0
try:
byte_count_per_second = stat.byte_count/stat.duration_sec
byte_count_per_nsecond = stat.byte_count/stat.duration_nsec
except:
byte_count_per_second = 0
byte_count_per_nsecond = 0
file0.write("{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{}\n"
.format(ev.msg.datapath.id, flow_id, ip_src, tp_src,ip_dst, tp_dst,
stat.match['ip_proto'],
stat.duration_sec, stat.duration_nsec,
stat.idle_timeout, stat.hard_timeout,
stat.flags, stat.packet_count,stat.byte_count,
packet_count_per_second,packet_count_per_nsecond,
byte_count_per_second,byte_count_per_nsecond,0))
file0.close()
Good day,
I'm developing a deep learning model for wireless signal detection. Below is the snippet of the function that computes the model accuracy and bit error rate (BER):
from chainer.datasets import TupleDataset
import numpy as np
from chainer import cuda
from chainer import function
def get_idp_acc(model, dataset_tuple, comp_ratio, profile = None, batchsize = 128, gpu = -1):
chainer.config.train = True
xp = np if gpu < 0 else cuda.cupy
x, indices, x_zf, HtH, Hty = dataset_tuple._datasets[0], dataset_tuple._datasets[1], dataset_tuple._datasets[2], dataset_tuple._datasets[3], dataset_tuple._datasets[4]
accs = 0
BERs = 0
model.train = False
for j in range(0, len(x), batchsize):
x_batch = xp.array(x[j:j + batchsize])
indices_batch = xp.array(indices[j:j + batchsize])
x_zf_batch = xp.array(x_zf[j:j + batchsize])
HtH_batch = xp.array(HtH[j:j + batchsize])
Hty_batch = xp.array(Hty[j:j + batchsize])
if profile == None:
acc_data = model(x_batch, indices_batch, x_zf_batch, HtH_batch, Hty_batch, comp_ratio = comp_ratio,
ret_param = 'acc')
else:
acc_data = model(x_batch, indices_batch, x_zf_batch, HtH_batch, Hty_batch, comp_ratio = comp_ratio,
ret_param = 'acc', profile = profile)
acc_data.to_cpu()
acc = acc_data.data
BER = 1.0 - acc
accs += acc * len(x_batch)
BERs += BER * len(x_batch)
return (accs / len(x)) * 100.
When the code is run, I get the following error below despite having imported all the required chainer modules. I really need your help on this issue as I'm stuck for nearly two months without making any headways in my project.
Traceback (most recent call last):
File "/Users/mac/Documents/idp_detnet/examples/run_mlp.py", line 14, in <module>
mlp.run(args)
File "/Users/mac/Documents/idp_detnet/examples/mlp.py", line 39, in run
acc_dict[name], BER_dict[name] = util.sweep_idp(model, test, comp_ratios, args)
File "/Users/mac/Documents/idp_detnet/examples/util.py", line 107, in sweep_idp
batchsize=args.batchsize, profile=profile))
File "/Users/mac/Documents/idp_detnet/examples/util.py", line 83, in get_idp_acc
acc_data.to_cpu()
AttributeError: 'numpy.float32' object has no attribute 'to_cpu'
Below is the additional information providing codes for model definition:
K = 10
num_layers = 3*K
def lin_soft_sign(x, t):
'''Linear soft sign activation function from the original paper Eq. (11)'''
y = -1 + F.relu(x + t)/ F.absolute(t) - F.relu(- t)/ F.absolute(t)
return y
def accuracy(x, y):
'''Computes the fraction of elements for which x and y are equal'''
return np.mean(np.equal(x, y)).astype(np.float32)
class MLP(chainer.Chain):
def __init__(self, K, coeff_generator, profiles = None, z_dims = 8*K, v_dims = 2*K):
super(MLP, self).__init__()
if profiles == None:
profiles = [(0, 10)]
self.coeff_generator = coeff_generator
self.z_dims = z_dims
self.v_dims = v_dims
self.K = K
self.profiles = profiles
self.profile = 0
with self.init_scope():
self.p0_l1 = IncompleteLinear(None, self.z_dims)
self.p1_l1 = IncompleteLinear(None, self.z_dims)
self.p2_l1 = IncompleteLinear(None, self.z_dims)
self.p0_lv = IncompleteLinear(None, self.v_dims)
self.p1_lv = IncompleteLinear(None, self.v_dims)
self.p2_lv = IncompleteLinear(None, self.v_dims)
self.p0_l3 = IncompleteLinear(None, self.K)
self.p1_l3 = IncompleteLinear(None, self.K)
self.p2_l3 = IncompleteLinear(None, self.K)
def __call__(self, x, indices, x_zf, HtH, Hty, ret_param = 'loss', profile = None, comp_ratio = None):
if profile == None:
profile = self.profile
# Form Zero-forcing detection
err_rel = F.sum((x - x_zf)**2, axis = 1)
params = layer_profile(self.coeff_generator,
*self.profiles[profile], self.z_dims,
self.v_dims, comp_ratio)
def detnet_layer(x_d, x_logit, v, z_dims, v_dims):
HtH_x = np.matmul(HtH, np.expand_dims(x_d.data, axis = 2).astype(np.float32))
HtH_x = F.squeeze(HtH_x, axis = -1)
#x_concat = np.concatenate([Hty, x, HtH_x, v], axis=1)
x_concat = F.concat([Hty, x_d, HtH_x, v], axis = 1)
if profile == 0:
z = F.relu(self.p0_l1(x_concat))
v += self.p0_lv(z, *params)
x_logit += self.p0_l3(z, *params)
x = lin_soft_sign(x_logit, F.broadcast_to(np.ones(1).astype(np.float32), x_logit.shape))
elif profile == 1:
z = F.relu(self.p1_l1(x_concat))
v += self.p1_lv(z, *params)
x_logit += self.p1_l3(z, *params)
x = lin_soft_sign(x_logit, F.broadcast_to(np.ones(1).astype(np.float32), x_logit.shape))
elif profile == 2:
z = F.relu(self.p2_l1(x_concat))
v += self.p2_lv(z, *params)
x_logit += self.p2_l3(z, *params)
x = lin_soft_sign(x_logit, F.broadcast_to(np.ones(1).astype(np.float32), x_logit.shape))
return x, x_logit, v
x_k = np.zeros((Hty.shape[0], self.K), dtype = np.float32)
x_k_logit = np.zeros((Hty.shape[0], self.K), dtype = np.float32)
v = np.zeros((Hty.shape[0], self.v_dims), dtype = np.float32)
loss = 0
mod = sg.Modulator('BPSK', K)
for k in range(1, num_layers + 1):
x_k, x_k_logit, v = detnet_layer(x_k, x_k_logit, v, self.z_dims, self.v_dims)
err = F.sum((x - x_k)**2, 1)
loss += (np.log(k)).astype(np.float32) * F.mean(err/err_rel)
report = {'loss': loss, 'acc': accuracy(mod.demodulate(x_k.data), indices)}
reporter.report(report, self)
return report[ret_param]
def report_params(self):
return ['validation/main/acc']
def param_names(self):
if len(self.profiles) > 1:
return 'IDPDETNET_{}_{}_{}_p{}'.format(self.z_dims, self.v_dims, self.coeff_generator.__name__, len(self.profiles))
return 'IDPDETNET_{}_{}_{}'.format(self.z_dims, self.v_dims, self.coeff_generator.__name__)
import os
import sys
sys.path.insert(0, os.path.abspath(
os.path.join(os.path.dirname(__file__), '..')))
import numpy as np
import visualize as vz
import idp.coeffs_generator as cg
from net import MLP
import util
K = 10
N = 4
v_dims = 2*K
z_dims = 8*K
SNR_dB_tmin = -4
SNR_dB_tmax = 24
SNR_dB_test = np.linspace(SNR_dB_tmin, SNR_dB_tmax, 8)
num_snr_test = len(SNR_dB_test)
def run(args):
train, test = util.get_dataset(args.modeltype)
names = ['all-one (standard)', 'linear']
colors = [vz.colors.all_one_lg, vz.colors.linear_lg]
models = [
MLP.MLP(K, cg.uniform, z_dims = 8*K, v_dims = 2*K),
MLP.MLP(K, cg.linear, z_dims = 8*K, v_dims = 2*K)
]
comp_ratios = np.linspace(0.1, 1.0, 20)
acc_dict = {}
BER_dict = {}
ratios_dict = {}
for i in range(num_snr_test):
for name, model in zip(names, models):
util.load_or_train_model(model, train, test, args)
acc_dict[name], BER_dict[name] = util.sweep_idp(model, test, comp_ratios, args)
ratios_dict[name] = [100. * cr for cr in comp_ratios]
filename = "IDPDETNET1_{}".format(args.modeltype)
vz.plot(ratios_dict, acc_dict, names, filename, colors = colors,
folder = args.figure_path, ext=args.ext,
title = 'IDPDETNET (BPSK)',
xlabel = 'IDP (%)',
ylabel = 'Test Accuracy (%)', ylim = (0, 100))
filename = "IDPDETNET2_{}".format(args.modeltype)
vz.plot(ratios_dict, BER_dict, names, filename, colors = colors,
folder=args.figure_path, ext=args.ext,
title='IDPDETNET (BPSK)',
xlabel='IDP (%)',
ylabel='BER (bits/sec)')
filename = "IDPDETNET3_{}".format(args.modeltype)
vz.plot(num_snr_test, BER_dict, names, filename, colors = colors,
folder = args.figure_path, ext = args.ext,
title = 'IDPDETNET (BPSK)',
xlabel = 'SNR (dB)',
ylabel = ' BER (bits/sec)')
if __name__ == '__main__':
args = util.default_parser('IDPDETNET Example').parse_args()
run(args)
Hi Seiya Tokui. Thank you for your kind input. Here is the model definition based on the above code:
model = MLP.MLP(K, cg.uniform, z_dims = 8*K, v_dims = 2*K)
OR
model = MLP.MLP(K, cg.linear, z_dims = 8*K, v_dims = 2*K)
Hi #BloodyD. Thank for your brilliant contributions. The model started training, but then later returned the following error:
1 nan nan 0.50108 5.85448
Traceback (most recent call last):
File "run_mlp.py", line 14, in <module>
mlp.run(args)
File "/Users/mac/Documents/idp_detnet/examples/mlp.py", line 38, in run
util.load_or_train_model(model, train, test, args)
File "/Users/mac/Documents/idp_detnet/examples/util.py", line 204, in load_or_train_model
train_model(model, train, test, args)
File "/Users/mac/Documents/idp_detnet/examples/util.py", line 184, in train_model
return eval(fp.read().replace('\n', ''))
File "<string>", line 1, in <module>
NameError: name 'NaN' is not defined
The error occurs in the last line of this snippet code below:
name = model.param_names()
save_model(model, os.path.join(args.model_path, name))
chainer.config.train = False
with open(os.path.join(args.out, 'log'), 'r') as fp:
return eval(fp.read().replace('\n', ''))
My Code.
#!/usr/bin/env python
#coding: utf-8
userid="NicoNicoCreate#gmail.com"
passwd="********"
import sys, re, cgi, urllib, urllib.request, urllib.error, http.cookiejar, xml.dom.minidom, time, urllib.parse
import simplejson as json
def getToken():
html = urllib.request.urlopen("http://www.nicovideo.jp/my/mylist").read()
for line in html.splitlines():
mo = re.match(r'^\s*NicoAPI\.token = "(?P<token>[\d\w-]+)";\s*',line)
if mo:
token = mo.group('token')
break
assert token
return token
def mylist_create(name):
cmdurl = "http://www.nicovideo.jp/api/mylistgroup/add"
q = {}
q['name'] = name.encode("utf-8")
q['description'] = ""
q['public'] = 0
q['default_sort'] = 0
q['icon_id'] = 0
q['token'] = token
cmdurl += "?" + urllib.parse.urlencode(q).encode("utf-8")
j = json.load( urllib.request.urlopen(cmdurl), encoding='utf-8')
return j['id']
def addvideo_tomylist(mid,smids):
for smid in smids:
cmdurl = "http://www.nicovideo.jp/api/mylist/add"
q = {}
q['group_id'] = mid
q['item_type'] = 0
q['item_id'] = smid
q['description'] = u""
q['token'] = token
cmdurl += "?" + urllib.parse.urlencode(q).encode("utf-8")
j = json.load( urllib.request.urlopen(cmdurl), encoding='utf-8')
time.sleep(0.5)
#Login
opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(http.cookiejar.CookieJar()))
urllib.request.install_opener(opener)
urllib.request.urlopen("https://secure.nicovideo.jp/secure/login",
urllib.parse.urlencode( {"mail":userid, "password":passwd}) ).encode("utf-8")
#GetToken
token = getToken()
#MakeMylist&AddMylist
mid = mylist_create(u"Testlist")
addvideo_tomylist(mid, ["sm9","sm1097445", "sm1715919" ] )
MyError.
Traceback (most recent call last):
File "Nico3.py", line 48, in <module>
urllib.parse.urlencode( {"mail":userid, "password":passwd}) ).encode("utf-8")
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/urllib/request.py", line 162, in urlopen
return opener.open(url, data, timeout)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/urllib/request.py", line 463, in open
req = meth(req)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/urllib/request.py", line 1170, in do_request_
raise TypeError(msg)
TypeError: POST data should be bytes or an iterable of bytes. It cannot be of type str.
I've tried encode but it did not help.
I'm japanese accademic students.
It was not able to be settled by my knowledge.
I am aware of this similar question, TypeError: POST data should be bytes or an iterable of bytes. It cannot be str, but am too new for the answer to be much help.
You paren is in the wrong place so you are not actually encoding:
.urlencode({"mail":userid, "password":passwd}).encode("utf-8")) # <- move inside