I'm having some issues with persisting documents with pymongo when using insert_many.
I'm handing over a list of dicts to insert_many and it works fine from inside the same script that does the inserting. Less so once the script has finished.
def row_to_doc(row):
rowdict = row.to_dict()
for key in rowdict:
val = rowdict[key]
if type(val) == float or type(val) == np.float64:
if np.isnan(val):
# If we want a SQL style document collection
rowdict[key] = None
# If we want a NoSQL style document collection
# del rowdict[key]
return rowdict
def dataframe_to_collection(df):
n = len(df)
doc_list = []
for k in range(n):
doc_list.append(row_to_doc(df.iloc[k]))
return doc_list
def get_mongodb_client(host="localhost", port=27017):
return MongoClient(host, port)
def create_collection(client):
db = client["material"]
return db["master-data"]
def add_docs_to_mongo(collection, doc_list):
collection.insert_many(doc_list)
def main():
client = get_mongodb_client()
csv_fname = "some_csv_fname.csv"
df = get_clean_csv(csv_fname)
doc_list = dataframe_to_collection(df)
collection = create_collection(client)
add_docs_to_mongo(collection, doc_list)
test_doc = collection.find_one({"MATERIAL": "000000000000000001"})
When I open up another python REPL and start looking through the client.material.master_data collection with collection.find_one({"MATERIAL": "000000000000000001"}) or collection.count_documents({}) I get None for the find_one and 0 for the count_documents.
Is there a step where I need to call some method to persist the data to disk? db.collection.save() in the mongo client API sounds like what I need but it's just another way of inserting documents from what I have read. Any help would be greatly appreciated.
The problem was that I was getting my collection via client.db_name.collection_name and it wasn't getting the same collection I was creating with my code. client.db_name["collection-name"] solved my issue. Weird.
Related
I am trying to extract the content of the [Documentation] section as a string for comparision with other part in a Python script.
I was told to use Robot framework API https://robot-framework.readthedocs.io/en/stable/
to extract but I have no idea how.
However, I am required to work with version 3.1.2
Example:
*** Test Cases ***
ATC Verify that Sensor Battery can enable and disable manufacturing mode
[Documentation] E1: This is the description of the test 1
... E2: This is the description of the test 2
[Tags] E1 TRACE{Trace_of_E1}
... E2 TRACE{Trace_of_E2}
Extract the string as
E1: This is the description of the test 1
E2: This is the description of the test 2
Have a look at these examples. I did something similar to generate testplans descritio. I tried to adapt my code to your requirements and this could maybe work for you.
import os
import re
from robot.api.parsing import (
get_model, get_tokens, Documentation, EmptyLine, KeywordCall,
ModelVisitor, Token
)
class RobotParser(ModelVisitor):
def __init__(self):
# Create object with remarkup_text to store formated documentation
self.text = ''
def get_text(self):
return self.text
def visit_TestCase(self, node):
# The matched `TestCase` node is a block with `header` and
# `body` attributes. `header` is a statement with familiar
# `get_token` and `get_value` methods for getting certain
# tokens or their value.
for keyword in node.body:
# skip empty lines
if keyword.get_value(Token.DOCUMENTATION) == None:
continue
self.text += keyword.get_value(Token.ARGUMENT)
def visit_Documentation(self,node):
# The matched "Documentation" node with value
self.remarkup_text += node.value + self.new_line
def visit_File(self, node):
# Call `generic_visit` to visit also child nodes.
return self.generic_visit(node)
if __name__ == "__main__":
path = "../tests"
for filename in os.listdir(path):
if re.match(".*\.robot", filename):
model = get_model(os.path.join(path, filename))
robot_parser = RobotParser()
robot_parser.visit(model)
text=robot_parser._text()
The code marked as best answer didn't quite work for me and has a lot of redundancy but it inspired me enough to get into the parsing and write it in a much readable and efficient way that actually works as is. You just have to have your own way of generating & iterating through filesystem where you call the get_robot_metadata(filepath) function.
from robot.api.parsing import (get_model, ModelVisitor, Token)
class RobotParser(ModelVisitor):
def __init__(self):
self.testcases = {}
def visit_TestCase(self, node):
testcasename = (node.header.name)
self.testcases[testcasename] = {}
for section in node.body:
if section.get_value(Token.DOCUMENTATION) != None:
documentation = section.value
self.testcases[testcasename]['Documentation'] = documentation
elif section.get_value(Token.TAGS) != None:
tags = section.values
self.testcases[testcasename]['Tags'] = tags
def get_testcases(self):
return self.testcases
def get_robot_metadata(filepath):
if filepath.endswith('.robot'):
robot_parser = RobotParser()
model = get_model(filepath)
robot_parser.visit(model)
metadata = robot_parser.get_testcases()
return metadata
This function will be able to extract the [Documentation] section from the testcase:
def documentation_extractor(testcase):
documentation = []
for setting in testcase.settings:
if len(setting) > 2 and setting[1].lower() == "[documentation]":
for doc in setting[2:]:
if doc.startswith("#"):
# the start of a comment, so skip rest of the line
break
documentation.append(doc)
break
return "\n".join(documentation)
I made a Web app that takes in a text file, reads each line, takes the 11th character and saves it to SQLite3 db. How do I lock the database or have two or more separate tables while multiple requests are running?
I have added adding ATOMIC_REQUESTS': True to the settings.py in Django.
and I tried creating temporary tables for each request, but can't figure it out. I am pretty fresh to Django 2.2
My View.py
def home(request):
if request.method == 'GET':
return render(request, 'home.html')
if request.method == 'POST':
form = DocumentForm(data=request.POST, files=request.FILES)
print(form.errors)
if form.is_valid():
try:
f = request.FILES['fileToUpload']
except:
print('\033[0;93m'+ "No File uploaded, Redirecting" +'\033[0m')
return HttpResponseRedirect('/tryagain')
print('\033[32m'+ "Success" +'\033[0m')
print('Working...')
line = f.readline()
while line:
#print(line)
mst = message_typer.messages.create_message(str(line)[11])
line = f.readline()
else:
print('\033[0;91m'+ "Failed to validate Form" +'\033[0m')
return HttpResponseRedirect('/output')
return HttpResponse('Failure')
def output(request):
s = message_typer.messages.filter(message='s').count()
A = message_typer.messages.filter(message='A').count()
d = message_typer.messages.filter(message='d').count()
E = message_typer.messages.filter(message='E').count()
X = message_typer.messages.filter(message='X').count()
P = message_typer.messages.filter(message='P').count()
r = message_typer.messages.filter(message='r').count()
B = message_typer.messages.filter(message='B').count()
H = message_typer.messages.filter(message='H').count()
I = message_typer.messages.filter(message='I').count()
J = message_typer.messages.filter(message='J').count()
R = message_typer.messages.filter(message='R').count()
message_types = {'s':s, 'A':A, 'd':d, 'E':E, 'X':X, 'P':P,\
'r':r, 'B':B, 'H':H, 'I':I, 'J':J, 'R':R }
output = {'output':message_types}
#return HttpResponse('Received')
message_typer.messages.all().delete()
return render(request, 'output.html',output)
When the web page loads, it should display a simple break down each character in the 11th position of the uploaded text file.
However, if two requests are running concurrently, the first page that makes the request gets an Operation Error; Db is locked.
Traceback to here:
message_typer.messages.all().delete()
The second page will sum the total of the two files that were uploaded.
I do want to wipe the table after so that the next user will have an empty table to populate and perform a count on.
Is there a better way?
Is it possible to have a shared dict of Locks in python3? I need multiple locks because I want to protect a dict of shared resources. Each resource gets a lock:
manager = multiprocessing.Manager()
locks = manager.dict({key : manager.Lock() for key in range(100)})
shared_resource = manager.dict({key : SomeClass() for key in range(100)})
# later in a multi-processed function
def foo(key):
# ...
locks[key].acquire()
shared_resource[key] = ...
locks[key].release()
# ...
This toy example would fail with:
multiprocessing.managers.RemoteError:
---------------------------------------------------------------------------
Unserializable message: ('#RETURN', <unlocked _thread.lock object at 0x7f9a4c9dc468>)
Any idea how to get around this problem? I could use conditional variables? Or how would you protect a list of resources?
Ok, seems like it's a bug with ptyhon3.5.
With python3.6 it works like a charm.
I'm sure it's possible. I don't get an error when I run this code...I just substituted SomeClass with 'x'. So maybe there's an issue there.
Also, using a context manager to acquire and release the lock is a nice little abstraction...
manager = multiprocessing.Manager()
locks = {key : manager.Lock() for key in range(100)}
shared_resource = {key : 'x' for key in range(100)}
# later in a multi-processed function
def foo(key):
# ...
with locks[key]:
shared_resource[key] = 'xoyo'
if __name__ == '__main__':
p = Process(target=foo, args=(1,))
p.start()
p = Process(target=foo, args=(1,))
p.start()
p = Process(target=foo, args=(1,))
p.start()
p.join()
I have a question about using apollo-upload-client and graphene-django. Here I've discovered that apollo-upload-client adding operations to formData. But here graphene-django is only trying to get query parameter. And the question is, where and how it should be fixed?
If you're referring to the data that has a header like (when viewing the HTTP from Chrome tools):
Content-Disposition: form-data; name="operations"
and data like
{"operationName":"MyMutation","variables":{"myData"....}, "query":"mutation MyMutation"...},
the graphene-python library interprets this and assembles it into a query for you, inserting the variables and removing the file data from the query. If you are using Django, you can find all of the uploaded files in info.context.FILES when writing a mutation.
Here's my solution to support the latest apollo-upload-client (8.1). I recently had to revisit my Django code when I upgraded from apollo-upload-client 5.x to 8.x. Hope this helps.
Sorry I'm using an older graphene-django but hopefully you can update the mutation syntax to the latest.
Upload scalar type (passthrough, basically):
class Upload(Scalar):
'''A file upload'''
#staticmethod
def serialize(value):
raise Exception('File upload cannot be serialized')
#staticmethod
def parse_literal(node):
raise Exception('No such thing as a file upload literal')
#staticmethod
def parse_value(value):
return value
My upload mutation:
class UploadImage(relay.ClientIDMutation):
class Input:
image = graphene.Field(Upload, required=True)
success = graphene.Field(graphene.Boolean)
#classmethod
def mutate_and_get_payload(cls, input, context, info):
with NamedTemporaryFile(delete=False) as tmp:
for chunk in input['image'].chunks():
tmp.write(chunk)
image_file = tmp.name
# do something with image_file
return UploadImage(success=True)
The heavy lifting happens in a custom GraphQL view. Basically it injects the file object into the appropriate places in the variables map.
def maybe_int(s):
try:
return int(s)
except ValueError:
return s
class CustomGraphqlView(GraphQLView):
def parse_request_json(self, json_string):
try:
request_json = json.loads(json_string)
if self.batch:
assert isinstance(request_json,
list), ('Batch requests should receive a list, but received {}.').format(
repr(request_json))
assert len(request_json) > 0, ('Received an empty list in the batch request.')
else:
assert isinstance(request_json, dict), ('The received data is not a valid JSON query.')
return request_json
except AssertionError as e:
raise HttpError(HttpResponseBadRequest(str(e)))
except BaseException:
logger.exception('Invalid JSON')
raise HttpError(HttpResponseBadRequest('POST body sent invalid JSON.'))
def parse_body(self, request):
content_type = self.get_content_type(request)
if content_type == 'application/graphql':
return {'query': request.body.decode()}
elif content_type == 'application/json':
return self.parse_request_json(request.body.decode('utf-8'))
elif content_type in ['application/x-www-form-urlencoded', 'multipart/form-data']:
operations_json = request.POST.get('operations')
map_json = request.POST.get('map')
if operations_json and map_json:
operations = self.parse_request_json(operations_json)
map = self.parse_request_json(map_json)
for file_id, f in request.FILES.items():
for name in map[file_id]:
segments = [maybe_int(s) for s in name.split('.')]
cur = operations
while len(segments) > 1:
cur = cur[segments.pop(0)]
cur[segments.pop(0)] = f
logger.info('parse_body %s', operations)
return operations
else:
return request.POST
return {}
I have developed numerous iOS apps over the years so know Objective C reasonably well.
I'd like to build my first web service to offload some of the most processor intensive functions.
I'm leaning towards using my Mac as the server, which comes with Apache. I have configured this and it appears to be working as it should (I can type the Mac's IP address and receive a confirmation).
Now I'm trying to decide on how to build the server-side web service, which is totally new to me. I'd like to leverage my Objective C knowledge if possible. I think I'm looking for an Objective C-compatible web service engine and some examples how to connect it to browsers and mobile interfaces. I was leaning towards using Amazon's SimpleDB as the database.
BTW: I see Apple have Lion Server, but I cannot work out if this is an option.
Any thoughts/recommendations are appreciated.?
There are examples of simple web servers out there written in ObjC such as this and this.
That said, there are probably "better" ways of doing this if you don't mind using other technologies. This is a matter of preference; but I've use Python, MySQL, and the excellent web.py framework for these sorts of backends.
For example, here's an example web service (some redundancies omitted...) using the combination of technologies described. I just run this on my server, and it takes care of url redirection and serves JSON from the db.
import web
import json
import MySQLdb
urls = (
"/equip/gruppo", "gruppo", # GET = get all gruppos, # POST = save gruppo
"/equip/frame", "frame"
)
class StatusCode:
(Success,SuccessNoRows,FailConnect,FailQuery,FailMissingParam,FailOther) = range(6);
# top-level class that handles db interaction
class APIObject:
def __init__(self):
self.object_dict = {} # top-level dictionary to be turned into JSON
self.rows = []
self.cursor = ""
self.conn = ""
def dbConnect(self):
try:
self.conn = MySQLdb.connect( host = 'localhost', user = 'my_api_user', passwd = 'api_user_pw', db = 'my_db')
self.cursor = self.conn.cursor(MySQLdb.cursors.DictCursor)
except:
self.object_dict['api_status'] = StatusCode.FailConnect
return False
else:
return True
def queryExecute(self,query):
try:
self.cursor.execute(query)
self.rows = self.cursor.fetchall()
except:
self.object_dict['api_status'] = StatusCode.FailQuery
return False
else:
return True
class gruppo(APIObject):
def GET(self):
web.header('Content-Type', 'application/json')
if self.dbConnect() == False:
return json.dumps(self.object_dict,sort_keys=True, indent=4)
else:
if self.queryExecute("SELECT * FROM gruppos") == False:
return json.dumps(self.object_dict,sort_keys=True, indent=4)
else:
self.object_dict['api_status'] = StatusCode.SuccessNoRows if self.rows.count == 0 else StatusCode.Success
data_list = []
for row in self.rows:
# create a dictionary with the required elements
d = {}
d['id'] = row['id']
d['maker'] = row['maker_name']
d['type'] = row['type_name']
# append to the object list
data_list.append(d)
self.object_dict['data'] = data_list
# return to the client
return json.dumps(self.object_dict,sort_keys=True, indent=4)