TypeError: POST data should be bytes or an iterable of bytes. It cannot be of type str - api

My Code.
#!/usr/bin/env python
#coding: utf-8
userid="NicoNicoCreate#gmail.com"
passwd="********"
import sys, re, cgi, urllib, urllib.request, urllib.error, http.cookiejar, xml.dom.minidom, time, urllib.parse
import simplejson as json
def getToken():
html = urllib.request.urlopen("http://www.nicovideo.jp/my/mylist").read()
for line in html.splitlines():
mo = re.match(r'^\s*NicoAPI\.token = "(?P<token>[\d\w-]+)";\s*',line)
if mo:
token = mo.group('token')
break
assert token
return token
def mylist_create(name):
cmdurl = "http://www.nicovideo.jp/api/mylistgroup/add"
q = {}
q['name'] = name.encode("utf-8")
q['description'] = ""
q['public'] = 0
q['default_sort'] = 0
q['icon_id'] = 0
q['token'] = token
cmdurl += "?" + urllib.parse.urlencode(q).encode("utf-8")
j = json.load( urllib.request.urlopen(cmdurl), encoding='utf-8')
return j['id']
def addvideo_tomylist(mid,smids):
for smid in smids:
cmdurl = "http://www.nicovideo.jp/api/mylist/add"
q = {}
q['group_id'] = mid
q['item_type'] = 0
q['item_id'] = smid
q['description'] = u""
q['token'] = token
cmdurl += "?" + urllib.parse.urlencode(q).encode("utf-8")
j = json.load( urllib.request.urlopen(cmdurl), encoding='utf-8')
time.sleep(0.5)
#Login
opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(http.cookiejar.CookieJar()))
urllib.request.install_opener(opener)
urllib.request.urlopen("https://secure.nicovideo.jp/secure/login",
urllib.parse.urlencode( {"mail":userid, "password":passwd}) ).encode("utf-8")
#GetToken
token = getToken()
#MakeMylist&AddMylist
mid = mylist_create(u"Testlist")
addvideo_tomylist(mid, ["sm9","sm1097445", "sm1715919" ] )
MyError.
Traceback (most recent call last):
File "Nico3.py", line 48, in <module>
urllib.parse.urlencode( {"mail":userid, "password":passwd}) ).encode("utf-8")
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/urllib/request.py", line 162, in urlopen
return opener.open(url, data, timeout)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/urllib/request.py", line 463, in open
req = meth(req)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/urllib/request.py", line 1170, in do_request_
raise TypeError(msg)
TypeError: POST data should be bytes or an iterable of bytes. It cannot be of type str.
I've tried encode but it did not help.
I'm japanese accademic students.
It was not able to be settled by my knowledge.
I am aware of this similar question, TypeError: POST data should be bytes or an iterable of bytes. It cannot be str, but am too new for the answer to be much help.

You paren is in the wrong place so you are not actually encoding:
.urlencode({"mail":userid, "password":passwd}).encode("utf-8")) # <- move inside

Related

How to pass columns as a JSON record to the API method using Pyspark?

I have a dataset where I segregate the records in 4K batch for a DF and then try to hit the API. The first part is working fine. The problem occurs when I try to hit the API. I want to hit the API in the form of JSON records. Therefore, I am passing a struct of values converted as JSON to the call_to_cust_bulk_api method. Inside that method, I pass to a custRequestBody and then send that data to the API. The code is below:
def call_to_cust_bulk_api(url, payload):
print("Calling Bulk API")
try:
print(payload)
token = get_token(tokenUrl, tokenBody)
custRequestBody = {
"Token": token,
"CustomerName": "",
"Object": "",
"Data": payload
}
# TODO: write code...
headers = {'content-type': 'application/json'}
print(":::::::jsn load::::")
response = requests.post(url, data=custRequestBody, headers=headers)
print(":::Response::::", response)
data = response.json()
return data
except Exception as e:
print('ExceptionInPushing ' + str(e))
df = spark.read.option("header", "true").csv(
".csv",
sep="~")
df = df.withColumn("uniqueID", lit("1"))
df = df.withColumn("row_num", row_number().over(
Window.partitionBy(col("uniqueID")).orderBy(col("uniqueID"))
))
total_count = df.count()
i = 1
while i < total_count:
rangeNum = i + 3999
print("Range Num:::")
print(rangeNum)
df1 = df.filter((col("row_num") >= i) & (col("row_num") <= rangeNum))
finalDF = df1.drop("row_num", "edl_timestamp", "uniqueID")
colsListToBePassed = finalDF.columns
print("finalDF count:::", finalDF.count())
finalDF = finalDF.repartition(finalDF.rdd.getNumPartitions()).withColumn("status_for_batch",
call_to_cust_bulk_api(policyUrl,
to_json(
struct(*colsListToBePassed))))
It gives me the below error:
Traceback (most recent call last):
File "/home/lumiq/IdeaProjects/pyspark_python_test/com/apitest3.py", line 116, in <module>
finalDF = finalDF.repartition(finalDF.rdd.getNumPartitions()).withColumn("status_for_batch",
File "/home/lumiq/Downloads/spark-3.2.1-bin-hadoop3.2/python/pyspark/sql/dataframe.py", line 2477, in withColumn
raise TypeError("col should be Column")
TypeError: col should be Column
So, I want to track the response as a new column and write this data frame later.
So, where am I going wrong?

How to move to the next line when we have an error when we try to have all the url headers in a file

I used pass but this exception does not ignore this error and pass to the next line
import argparse
import requests
import re
import requests
parser = argparse.ArgumentParser(description= 'put the right url')
parser.add_argument('-l','--file', type=argparse.FileType('r'))
parser.add_argument('-u','--url', help="Check a url for straight quotes", type=str)
args = parser.parse_args()
url1 = args.url
def analyseur(url1):
def formaturl(url1):
if not re.match('(?:http|ftp|https)://', url1):
return 'https://{}'.format(url1)
return url1
#print(formaturl(url1))
response = requests.get(formaturl(url1))
print(response.headers["Server"])
if args.url:
analyseur(url1)
elif args.file:
try:
with args.file as file:
count = 0
while True:
count += 1
Get next line from file
line = file.readline()
if line is empty
end of file is reached
if not line:
break
print("Line{}: {}".format(count, line.strip()))
url1=line.strip()
analyseur(url1)
except ValueError:
pass
#print("erreur url")

Getting flow infos from switch and copy the info in csv file (Ryu controller)

Hope you help me, I want to get flow info from switch and that by sending a request every 10s and the switch reply with the info but I get the following error when the controller receive the reply by using a flow request reply handler The error is occuring because of flow matching 'eth_type'
CollectTrainingStatsApp: Exception occurred during handler processing. Backtrace from offending handler [_flow_stats_reply_handler] servicing event [EventOFPFlowStatsReply] follows.
Traceback (most recent call last):
File "/usr/local/lib/python2.7/dist-packages/ryu/base/app_manager.py", line 290, in _event_loop
handler(ev)
File "/home/guenfaf/Documents/Training ryu/data_to_csv/data_to_csv.py", line 59, in _flow_stats_reply_handler
for stat in sorted([flow for flow in body if (flow.priority == 1) ], key=lambda flow:
File "/home/guenfaf/Documents/Training ryu/data_to_csv/data_to_csv.py", line 60, in <lambda>
(flow.match['eth_type'],flow.match['ipv4_src'],flow.match['ipv4_dst'],flow.match['ip_proto'])):
File "/usr/local/lib/python2.7/dist-packages/ryu/ofproto/ofproto_v1_3_parser.py", line 904, in __getitem__
return dict(self._fields2)[key]
KeyError: 'eth_type'
Here is my code :
from ryu.app import simple_switch_13
from ryu.controller import ofp_event
from ryu.controller.handler import MAIN_DISPATCHER, DEAD_DISPATCHER
from ryu.controller.handler import set_ev_cls
from ryu.lib import hub
from time import time
# class CollectTrainingStatsApp(simple_switch_13.SimpleSwitch13):
class CollectTrainingStatsApp(simple_switch_13.SimpleSwitch13):
def __init__(self, *args, **kwargs):
super(CollectTrainingStatsApp, self).__init__(*args, **kwargs)
self.datapaths = {}
self.monitor_thread = hub.spawn(self.monitor)
file0 = open("FlowStatsfile.csv","w")
file0.write('datapath_id,flow_id,ip_src,tp_src,ip_dst,tp_dst,ip_proto,flow_duration_sec,flow_duration_nsec,idle_timeout,hard_timeout,flags,packet_count,byte_count,packet_count_per_second,packet_count_per_nsecond,byte_count_per_second,byte_count_per_nsecond,label\n')
file0.close()
#Asynchronous message
#set_ev_cls(ofp_event.EventOFPStateChange,[MAIN_DISPATCHER, DEAD_DISPATCHER])
def state_change_handler(self, ev):
datapath = ev.datapath
if ev.state == MAIN_DISPATCHER:
if datapath.id not in self.datapaths:
self.logger.debug('register datapath: %016x', datapath.id)
self.datapaths[datapath.id] = datapath
elif ev.state == DEAD_DISPATCHER:
if datapath.id in self.datapaths:
self.logger.debug('unregister datapath: %016x', datapath.id)
del self.datapaths[datapath.id]
def monitor(self):
while True:
for dp in self.datapaths.values():
self.request_stats(dp)
hub.sleep(10)
def request_stats(self, datapath):
self.logger.debug('send stats request: %016x', datapath.id)
parser = datapath.ofproto_parser
req = parser.OFPFlowStatsRequest(datapath)
datapath.send_msg(req)
#set_ev_cls(ofp_event.EventOFPFlowStatsReply, MAIN_DISPATCHER)
def _flow_stats_reply_handler(self, ev):
#timestamp = time.time()
tp_src = 0
tp_dst = 0
file0 = open("FlowStatsfile.csv","a+")
body = ev.msg.body
for stat in sorted([flow for flow in body if (flow.priority == 1) ], key=lambda flow:
(flow.match['eth_type'],flow.match['ipv4_src'],flow.match['ipv4_dst'],flow.match['ip_proto'])):
ip_src = stat.match['ipv4_src']
ip_dst = stat.match['ipv4_dst']
ip_proto = stat.match['ip_proto']
if stat.match['ip_proto'] == 1:
icmp_code = stat.match['icmpv4_code']
icmp_type = stat.match['icmpv4_type']
elif stat.match['ip_proto'] == 6:
tp_src = stat.match['tcp_src']
tp_dst = stat.match['tcp_dst']
elif stat.match['ip_proto'] == 17:
tp_src = stat.match['udp_src']
tp_dst = stat.match['udp_dst']
flow_id = str(ip_src) + str(tp_src) + str(ip_dst) + str(tp_dst) + str(ip_proto)
try:
packet_count_per_second = stat.packet_count/stat.duration_sec
packet_count_per_nsecond = stat.packet_count/stat.duration_nsec
except:
packet_count_per_second = 0
packet_count_per_nsecond = 0
try:
byte_count_per_second = stat.byte_count/stat.duration_sec
byte_count_per_nsecond = stat.byte_count/stat.duration_nsec
except:
byte_count_per_second = 0
byte_count_per_nsecond = 0
file0.write("{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{}\n"
.format(ev.msg.datapath.id, flow_id, ip_src, tp_src,ip_dst, tp_dst,
stat.match['ip_proto'],
stat.duration_sec, stat.duration_nsec,
stat.idle_timeout, stat.hard_timeout,
stat.flags, stat.packet_count,stat.byte_count,
packet_count_per_second,packet_count_per_nsecond,
byte_count_per_second,byte_count_per_nsecond,0))
file0.close()

TypeError: '<' not supported between instances of 'str' and 'int' Doc2Vec

Any ideas why this error is being thrown
"TypeError: '<' not supported between … 'str' and 'int'" when doc-tag not present for most_similar()
I have a list of .txt documents stored in my data folder and want to compare one doc to another through my flask app on localhost.
Traceback (most recent call last):
File "C:\Users\ibrahimm\AppData\Local\Continuum\anaconda3\lib\site-packages\flask\app.py", line
2463, in __call__
return self.wsgi_app(environ, start_response)
File "C:\Users\ibrahimm\AppData\Local\Continuum\anaconda3\lib\site-packages\flask\app.py", line
2449, in wsgi_app
response = self.handle_exception(e)
File "C:\Users\ibrahimm\AppData\Local\Continuum\anaconda3\lib\site-packages\flask\app.py", line
1866, in handle_exception
reraise(exc_type, exc_value, tb)
File "C:\Users\ibrahimm\AppData\Local\Continuum\anaconda3\lib\site-packages\flask\_compat.py", line
39, in reraise
raise value
File "C:\Users\ibrahimm\AppData\Local\Continuum\anaconda3\lib\site-packages\flask\app.py", line
2446, in wsgi_app
response = self.full_dispatch_request()
File "C:\Users\ibrahimm\AppData\Local\Continuum\anaconda3\lib\site-packages\flask\app.py", line
1951, in full_dispatch_request
rv = self.handle_user_exception(e)
File "C:\Users\ibrahimm\AppData\Local\Continuum\anaconda3\lib\site-packages\flask\app.py", line
1820,
in handle_user_exception
reraise(exc_type, exc_value, tb)
File "C:\Users\ibrahimm\AppData\Local\Continuum\anaconda3\lib\site-packages\flask\_compat.py", line
39, in reraise
raise value
File "C:\Users\ibrahimm\AppData\Local\Continuum\anaconda3\lib\site-packages\flask\app.py", line
1949,
in full_dispatch_request
rv = self.dispatch_request()
File "C:\Users\ibrahimm\AppData\Local\Continuum\anaconda3\lib\site-packages\flask\app.py", line
1935,
in dispatch_request
return self.view_functions[rule.endpoint](**req.view_args)
File "C:\Users\ibrahimm\Desktop\doc2vec-compare-doc-demo\app.py", line 56, in api_compare_2
vec1 = d2v_model.docvecs.most_similar(data['doc1'])
File "C:\Users\ibrahimm\AppData\Local\Continuum\anaconda3\lib\site-
packages\gensim\models\keyedvectors.py", line 1715, in most_similar
elif doc in self.doctags or doc < self.count:
TypeError: '<' not supported between instances of 'str' and 'int'\
app.py
#app.route('/api/compare_2', methods=['POST'])
def api_compare_2():
data = request.get_json()
if not 'doc1' in data or not 'doc2' in data:
return 'ERROR'
vec1 = d2v_model.docvecs.most_similar(data['doc1'])
vec2 = d2v_model.docvecs.most_similar(data['doc2'])
vec1 = gensim.matutils.full2sparse(vec1)
vec2 = gensim.matutils.full2sparse(vec2)
print (data)
print (vec2)
print (vec1)
return jsonify(sim=gensim.matutils.cossim(vec1, vec2))
#app.route('/api/compare_all', methods=['POST'])
def api_compare_all():
data = request.get_json()
if not 'doc' in data:
return 'ERROR'
vec = d2v_model.docvecs.most_similar(data['doc'])
res = d2v_model.docvecs.most_similar([vec], topn=5)
return jsonify(list=res)
model.py
def load_model():
try:
return gensim.models.doc2vec.Doc2Vec.load("doc2vec.model2")
except:
print ('Model not found!')
return None
def train_model():
#path to the input corpus files
data="data"
#tagging the text files
class DocIterator(object):
def __init__(self, doc_list, labels_list):
self.labels_list = labels_list
self.doc_list = doc_list
def __iter__(self):
for idx, doc in enumerate(self.doc_list):
yield TaggedDocument(words=doc.split(), tags=[self.labels_list[idx]])
docLabels = [f for f in listdir(data) if f.endswith('.txt')]
print(docLabels)
data = []
for doc in docLabels:
data.append(open(r'C:\Users\ibrahimm\Desktop\doc2vec-compare-doc-demo\data\\' + doc,
encoding='cp437').read())
tokenizer = RegexpTokenizer(r'\w+')
stopword_set = set(stopwords.words('english'))
#This function does all cleaning of data using two objects above
def nlp_clean(data):
new_data = []
for d in data:
new_str = d.lower()
dlist = tokenizer.tokenize(new_str)
dlist = list(set(dlist).difference(stopword_set))
new_data.append(dlist)
return new_data
data = nlp_clean(data)
it = DocIterator(data, docLabels)
#train doc2vec model
model = gensim.models.Doc2Vec(size=300, window=15, min_count=4, workers=10,alpha=0.025, min_alpha=0.025, iter=20) # use fixed learning rate
model.build_vocab(it)
model.train(it, epochs=model.iter, total_examples=model.corpus_count)
model.save("doc2vec.model2")
If you try to look-up a string doc-tag that's not in the model, you unfortunately get this confusing error, instead of a clearer error. (See gensim's open-issue: https://github.com/RaRe-Technologies/gensim/issues/1737#issuecomment-346995119 )
Whatever is in data['doc1'] isn't a tag in the model.
You may be able to pre-check, before attempting a most_similar() operation, by looking at whether data['doc1'] in model.docvecs is True.
TypeError: '<' not supported between instances of 'str' and 'int'
[35182] Failed to execute script docker-compose
This error is was as a result of copy and paste code with a wrong quotation mark(). change this to this ''

How to write custom PySpark serializer

I want to write a custom pyspark serializer. There is very little documentation online apart from details here.
My logic is as follows:
If I receive my special object, then I use custom logic to serialize/deserialize.
Otherwise, use cPickle for the same.
The custom serializer looks like the following:
from pyspark.serializers import FramedSerializer
class CustomSerializer(FramedSerializer):
import cPickle as pickle
import CustomClass
def dumps(self, obj):
if isinstance(obj, CustomClass):
bytes_str = obj.serialize()
bytes_str = '\1' + bytes_str
elif isinstance(obj, CustomClass.Location):
bytes_str = obj.serialize()
bytes_str = '\2' + bytes_str
else:
bytes_str = pickle.dumps(obj)
bytes_str = '\0' + bytes_str
return bytes_str
def loads(self, bytes_str):
c = bytes_str[0]
if c=='\1':
obj = CustomClass()
obj.parse_from_string(bytes_str[1:])
elif c=='\2':
obj = CustomClass.Location()
obj.parse_from_string(bytes_str[1:])
else:
obj = pickle.loads(bytes_str[1:])
return obj
While initiating SparkContext, I make sure I specify the custom serializer:
serializer = CustomSerializer()
sc = SparkContext(appName='MyApp', serializer=serializer)
However, I still get error:
Caused by: org.apache.spark.api.python.PythonException: Traceback (most recent call last):
File "/mnt/yarn/usercache/user/appcache/application_1507044435666_0035/container_1507044435666_0035_01_000002/pyspark.zip/pyspark/worker.py", line 174, in main
process()
File "/mnt/yarn/usercache/user/appcache/application_1507044435666_0035/container_1507044435666_0035_01_000002/pyspark.zip/pyspark/worker.py", line 169, in process
serializer.dump_stream(func(split_index, iterator), outfile)
File "/mnt/yarn/usercache/user/appcache/application_1507044435666_0035/container_1507044435666_0035_01_000002/pyspark.zip/pyspark/serializers.py", line 272, in dump_stream
bytes = self.serializer.dumps(vs)
File "<ipython-input-2-536808351108>", line 14, in dumps
PicklingError: Can't pickle <class 'CustomClass.Location'>: attribute lookup Location failed
What am I missing?
Thanks.