PyMongo freezes when I call insert - pymongo

I'm trying to learn how to use PyMongo, so I borrowed some code from a tutorial. Here's the entire program:
from pymongo import MongoClient
cars = [ {'name': 'Audi', 'price': 52642},
{'name': 'Mercedes', 'price': 57127},
{'name': 'Skoda', 'price': 9000},
{'name': 'Volvo', 'price': 29000},
{'name': 'Bentley', 'price': 350000},
{'name': 'Citroen', 'price': 21000},
{'name': 'Hummer', 'price': 41400},
{'name': 'Volkswagen', 'price': 21600} ]
client = MongoClient('mongodb://localhost:27017/')
print("Created client")
with client:
db = client.testdb
print("Created db")
db.cars.insert_many(cars)
print("Inserted")
When I run it, it prints "Created client" and "Created db", but never prints "Inserted", and the program never terminates.
I'm using Python 3.8.5, the Eclipse IDE, and I just did "pip install PyMongo" today, so I should have the latest version. Thanks for any help.

SOLVED:
Whoops, I didn't realize that you have to install MongoDB separately from PyMongo!

Related

Data ingestion with dataflow write to bq file error

I'm trying to ingest a csv file into bigquery using apache beam and dataflow here's my code:
import logging
import apache_beam as beam
from apache_beam.options.pipeline_options import PipelineOptions
gcs_bucket_name = "gs://bck-fr-fichiers-manuel-dev/de_par_categorie_et_code_rome/"
target_table_annonce = 'fr-parisraw-dev-8ef8:pole_emploi.de_par_categorie_et_code_rome'
table_schema_annonce = {'fields': [
{'name': 'cd_metier_rome', 'type': 'STRING', 'mode': 'NULLABLE'},
{'name': 'lb_metier_rome', 'type': 'STRING', 'mode': 'NULLABLE'},
{'name': 'cd_departement', 'type': 'STRING', 'mode': 'NULLABLE'},
{'name': 'lb_departement', 'type': 'STRING', 'mode': 'NULLABLE'},
{'name': 'nb_demandeur', 'type': 'STRING', 'mode': 'NULLABLE'},
{'name': 'mois', 'type': 'STRING', 'mode': 'NULLABLE'}
]}
# DoFn
class PrepareBqRowDoFn(beam.DoFn):
def process(self, element, *args, **kwargs):
logging.basicConfig(level=logging.INFO)
DOFN_LOGGER = logging.getLogger("PREPAREBQROWDOFN_LOGGER")
import csv
from datetime import datetime, timedelta
import re
# element = re.sub(r'(?=[^"]+)ยค(?=[^"]+)', '', element)
line = csv.reader(element.splitlines(), quotechar='"',
delimiter=';',quoting=csv.QUOTE_ALL, skipinitialspace=True)
for row in line:
try:
bq_row = {"cd_metier_rome": row[0],
"lb_metier_rome": row[1],
"cd_departement": row[2],
"lb_departement": row[3],
"nb_demandeur": row[4],
"mois": row[5]
}
yield bq_row
except IndexError:
DOFN_LOGGER.info("Error Row : " + element)
def run():
pipeline = beam.Pipeline(options=PipelineOptions())
file_patterns = ['de_par_*.csv']
for file_pattern in file_patterns:
csv_lines = pipeline | 'Read File From GCS {}'.format(file_pattern) >> beam.io.ReadFromText(
gcs_bucket_name + file_pattern)
bq_row = csv_lines | 'Create Row {}'.format(file_pattern) >> beam.ParDo(PrepareBqRowDoFn())
bq_row | 'Write to BQ {}'.format(file_pattern) >> beam.io.Write(beam.io.WriteToBigQuery(
target_table_annonce,
schema=table_schema_annonce,
create_disposition=beam.io.BigQueryDisposition.CREATE_IF_NEEDED,
write_disposition=beam.io.BigQueryDisposition.WRITE_APPEND))
pipeline.run()
if __name__ == '__main__':
logging.getLogger().setLevel(logging.INFO)
run()
The pipeline generated looks like this :
Each step I can see the the rows are being treated by Dataflow :
Step 1 (Read File From GCS de_par_*.csv) :
Step 2 (Create Row de_par_*.csv) :
But the final step 3 (Write to BQ de_par_*.csv) :
I get 0 lines

Python ML LSTM Stock Prediction with Dash No Output Code Just Keeps Running

I'm trying to run the following code in Jupyter notebook - but it just keeps running endlessly with no output. I'm following the tutorial from: https://data-flair.training/blogs/stock-price-prediction-machine-learning-project-in-python/
The code is from the stock_app.py which doesn't seem to be working:
import dash
import dash_core_components as dcc
import dash_html_components as html
import pandas as pd
import plotly.graph_objs as go
from dash.dependencies import Input, Output
from keras.models import load_model
from sklearn.preprocessing import MinMaxScaler
import numpy as np
app = dash.Dash()
server = app.server
scaler=MinMaxScaler(feature_range=(0,1))
df_nse = pd.read_csv("./NSE-TATA.csv")
df_nse["Date"]=pd.to_datetime(df_nse.Date,format="%Y-%m-%d")
df_nse.index=df_nse['Date']
data=df_nse.sort_index(ascending=True,axis=0)
new_data=pd.DataFrame(index=range(0,len(df_nse)),columns=['Date','Close'])
for i in range(0,len(data)):
new_data["Date"][i]=data['Date'][i]
new_data["Close"][i]=data["Close"][i]
new_data.index=new_data.Date
new_data.drop("Date",axis=1,inplace=True)
dataset=new_data.values
train=dataset[0:987,:]
valid=dataset[987:,:]
scaler=MinMaxScaler(feature_range=(0,1))
scaled_data=scaler.fit_transform(dataset)
x_train,y_train=[],[]
for i in range(60,len(train)):
x_train.append(scaled_data[i-60:i,0])
y_train.append(scaled_data[i,0])
x_train,y_train=np.array(x_train),np.array(y_train)
x_train=np.reshape(x_train,(x_train.shape[0],x_train.shape[1],1))
model=load_model("saved_ltsm_model.h5")
inputs=new_data[len(new_data)-len(valid)-60:].values
inputs=inputs.reshape(-1,1)
inputs=scaler.transform(inputs)
X_test=[]
for i in range(60,inputs.shape[0]):
X_test.append(inputs[i-60:i,0])
X_test=np.array(X_test)
X_test=np.reshape(X_test,(X_test.shape[0],X_test.shape[1],1))
closing_price=model.predict(X_test)
closing_price=scaler.inverse_transform(closing_price)
train=new_data[:987]
valid=new_data[987:]
valid['Predictions']=closing_price
df= pd.read_csv("./stock_data.csv")
app.layout = html.Div([
html.H1("Stock Price Analysis Dashboard", style={"textAlign": "center"}),
dcc.Tabs(id="tabs", children=[
dcc.Tab(label='NSE-TATAGLOBAL Stock Data',children=[
html.Div([
html.H2("Actual closing price",style={"textAlign": "center"}),
dcc.Graph(
id="Actual Data",
figure={
"data":[
go.Scatter(
x=train.index,
y=valid["Close"],
mode='markers'
)
],
"layout":go.Layout(
title='scatter plot',
xaxis={'title':'Date'},
yaxis={'title':'Closing Rate'}
)
}
),
html.H2("LSTM Predicted closing price",style={"textAlign": "center"}),
dcc.Graph(
id="Predicted Data",
figure={
"data":[
go.Scatter(
x=valid.index,
y=valid["Predictions"],
mode='markers'
)
],
"layout":go.Layout(
title='scatter plot',
xaxis={'title':'Date'},
yaxis={'title':'Closing Rate'}
)
}
)
])
]),
dcc.Tab(label='Facebook Stock Data', children=[
html.Div([
html.H1("Facebook Stocks High vs Lows",
style={'textAlign': 'center'}),
dcc.Dropdown(id='my-dropdown',
options=[{'label': 'Tesla', 'value': 'TSLA'},
{'label': 'Apple','value': 'AAPL'},
{'label': 'Facebook', 'value': 'FB'},
{'label': 'Microsoft','value': 'MSFT'}],
multi=True,value=['FB'],
style={"display": "block", "margin-left": "auto",
"margin-right": "auto", "width": "60%"}),
dcc.Graph(id='highlow'),
html.H1("Facebook Market Volume", style={'textAlign': 'center'}),
dcc.Dropdown(id='my-dropdown2',
options=[{'label': 'Tesla', 'value': 'TSLA'},
{'label': 'Apple','value': 'AAPL'},
{'label': 'Facebook', 'value': 'FB'},
{'label': 'Microsoft','value': 'MSFT'}],
multi=True,value=['FB'],
style={"display": "block", "margin-left": "auto",
"margin-right": "auto", "width": "60%"}),
dcc.Graph(id='volume')
], className="container"),
])
])
])
#app.callback(Output('highlow', 'figure'),
[Input('my-dropdown', 'value')])
def update_graph(selected_dropdown):
dropdown = {"TSLA": "Tesla","AAPL": "Apple","FB": "Facebook","MSFT": "Microsoft",}
trace1 = []
trace2 = []
for stock in selected_dropdown:
trace1.append(
go.Scatter(x=df[df["Stock"] == stock]["Date"],
y=df[df["Stock"] == stock]["High"],
mode='lines', opacity=0.7,
name=f'High {dropdown[stock]}',textposition='bottom center'))
trace2.append(
go.Scatter(x=df[df["Stock"] == stock]["Date"],
y=df[df["Stock"] == stock]["Low"],
mode='lines', opacity=0.6,
name=f'Low {dropdown[stock]}',textposition='bottom center'))
traces = [trace1, trace2]
data = [val for sublist in traces for val in sublist]
figure = {'data': data,
'layout': go.Layout(colorway=["#5E0DAC", '#FF4F00', '#375CB1',
'#FF7400', '#FFF400', '#FF0056'],
height=600,
title=f"High and Low Prices for {', '.join(str(dropdown[i]) for i in selected_dropdown)} Over Time",
xaxis={"title":"Date",
'rangeselector': {'buttons': list([{'count': 1, 'label': '1M',
'step': 'month',
'stepmode': 'backward'},
{'count': 6, 'label': '6M',
'step': 'month',
'stepmode': 'backward'},
{'step': 'all'}])},
'rangeslider': {'visible': True}, 'type': 'date'},
yaxis={"title":"Price (USD)"})}
return figure
#app.callback(Output('volume', 'figure'),
[Input('my-dropdown2', 'value')])
def update_graph(selected_dropdown_value):
dropdown = {"TSLA": "Tesla","AAPL": "Apple","FB": "Facebook","MSFT": "Microsoft",}
trace1 = []
for stock in selected_dropdown_value:
trace1.append(
go.Scatter(x=df[df["Stock"] == stock]["Date"],
y=df[df["Stock"] == stock]["Volume"],
mode='lines', opacity=0.7,
name=f'Volume {dropdown[stock]}', textposition='bottom center'))
traces = [trace1]
data = [val for sublist in traces for val in sublist]
figure = {'data': data,
'layout': go.Layout(colorway=["#5E0DAC", '#FF4F00', '#375CB1',
'#FF7400', '#FFF400', '#FF0056'],
height=600,
title=f"Market Volume for {', '.join(str(dropdown[i]) for i in selected_dropdown_value)} Over Time",
xaxis={"title":"Date",
'rangeselector': {'buttons': list([{'count': 1, 'label': '1M',
'step': 'month',
'stepmode': 'backward'},
{'count': 6, 'label': '6M',
'step': 'month',
'stepmode': 'backward'},
{'step': 'all'}])},
'rangeslider': {'visible': True}, 'type': 'date'},
yaxis={"title":"Transactions Volume"})}
return figure
if __name__=='__main__':
app.run_server(debug=True)

How to parse a nested column in a df column?

Is there a smart pythonic way to parse a nested column in a pandas dataframe like this one to 3 different columns? So for example the column could look like this:
col1
[{'name': 'amount', 'value': 1}, {'name': 'frequency', 'value': 2}, {'name': 'freq_unit', 'value': 'month'}]
[{'name': 'amount', 'value': 3}, {'name': 'frequency', 'value': 1}, {'name': 'freq_unit', 'value': 'month'}]
And the expected result should be these 3 columns:
amount frequency freq_unit
1 2 month
3 1 month
That's just level 1. I have the level 2: What if the elements in the list still have the same names (amount, frequency and freq_unit) but the order could change? Could the code in the answer deal with this?
col1
[{'name': 'amount', 'value': 1}, {'name': 'frequency', 'value': 2}, {'name': 'freq_unit', 'value': 'month'}]
[{'name': 'amount', 'value': 3}, {'name': 'freq_unit', 'value': 'month'}, {'name': 'frequency', 'value': 1}]
Code for reproduce the data. Really look forward to see how the community would solve this. Thank you
data = {'col1':[[{'name': 'amount', 'value': 1}, {'name': 'frequency', 'value': 2}, {'name': 'freq_unit', 'value': 'month'}],
[{'name': 'amount', 'value': 3}, {'name': 'frequency', 'value': 1}, {'name': 'freq_unit', 'value': 'month'}]]}
df = pd.DataFrame(data)
A combination of list comprehension, itertools.chain, and collections.defaultdict could help out here:
from itertools import chain
from collections import defaultdict
data = defaultdict(list)
phase1 = [[(data["name"], data["value"])
for data in entry]
for entry in df.col1
]
phase1 = chain.from_iterable(phase1)
for key, value in phase1:
data[key].append(value)
pd.DataFrame(data)
amount frequency freq_unit
0 1 2 month
1 3 1 month
The above is verbose: #piRSquared's comment is much simpler, with a list comprehension:
pd.DataFrame([{x["name"]: x["value"] for x in lst} for lst in df.col1])
Another idea, but very unnecessary, is to use a list comprehension, combined with Pandas' string methods:
outcome = [(df.col1.str[num].str["value"]
.rename(df.col1.str[num].str["name"][0])
)
for num in range(df.col1.str.len()[0])
]
pd.concat(outcome, axis = 'columns')
#piRsquared's solution is the simplest, in my opinion.
You can write a function that will parse each cell in your Series and return a properly formatted Series and use apply to tuck the iteration away:
>>> def custom_parser(record):
... clean_record = {rec["name"]: rec["value"] for rec in record}
... return pd.Series(clean_record)
>>> df["col1"].apply(custom_parser)
amount frequency freq_unit
0 1 2 month
1 3 1 month

How to generate an invoice from a custom module in Odoo13?

I am developing a custom module.
I tried to add it through an object button with the following code but doesn't seem to work
def create_invoice(self):
rslt = self.env['account.invoice'].create({
'partner_id': self.instructor.id,
'name': 'customer invoice',
'type': 'out_invoice',
'date_invoice': 'create_date'
})
return rslt
How can I add a button that generates an invoice?
desu
From Odoo13 there is a change in invoice object, It is now account.move instead of account.invoice.You can take this reference demo example.
invoice = self.env['account.move'].create({
'type': 'out_invoice',
'journal_id': journal.id,
'partner_id': product_id.id,
'invoice_date': date_invoice,
'date': date_invoice,
'invoice_line_ids': [(0, 0, {
'product_id': product_id.id,
'quantity': 40.0,
'name': 'product test 1',
'discount': 10.00,
'price_unit': 2.27,
})]
})

populating nested dictionaries with rows from Pandas data frame

I'm trying to populate a dictionary of dictionaries with entries from a Pandas data frame in Python by iterating through the nested dictionary and populating the values of each sub-dictionary with entries from a row of a Pandas data frame.
Although there are as many sub-dictionaries as there are rows in the data frame, all dictionaries get populated with the data from the last row of the data frame, instead of using every row for every dictionary.
Here is a toy reproducible example.
import pandas as pd
# initialize an empty df
data = pd.DataFrame()
# populate data frame with entries
data['name'] = ['Joe Smith', 'Mary James', 'Charles Williams']
data['school'] = ["Jollywood Secondary", "Northgate Sixth From", "Brompton High"]
data['subjects'] = [['Maths', 'Art', 'Biology'], ['English', 'French', 'History'], ['Chemistry', 'Biology', 'English']]
# use dictionary comprehensions to set up main dictionary and sub-dictionary templates
# sub-dictionary
keys = ['name', 'school', 'subjects']
record = {key: None for key in keys}
# main dictionary
keys2 = ['cand1', 'cand2', 'cand3']
candidates = {key: record for key in keys2}
# as a result i get something like this
# {'cand1': {'name': None, 'school': None, 'subjects': None},
# 'cand2': {'name': None, 'school': None, 'subjects': None},
# 'cand3': {'name': None, 'school': None, 'subjects': None}}
# iterate through main dictionary and populate each sub-dict with row of df
for i, d in enumerate(candidates.items()):
d[1]['name'] = data['name'].iloc[i]
d[1]['school'] = data['school'].iloc[i]
d[1]['subjcts'] = data['subjects'].iloc[i]
# what i end up with is the last row entry in each sub-dictionary
#{'cand1': {'name': 'Charles Williams',
# 'school': 'Brompton High',
# 'subjects': None,
# 'subjcts': ['Chemistry', 'Biology', 'English']},
# 'cand2': {'name': 'Charles Williams',
# 'school': 'Brompton High',
# 'subjects': None,
# 'subjcts': ['Chemistry', 'Biology', 'English']},
# 'cand3': {'name': 'Charles Williams',
# 'school': 'Brompton High',
# 'subjects': None,
# 'subjcts': ['Chemistry', 'Biology', 'English']}}
How do I need to modify my code to get each dictionary populated with a different row from my data frame?
I did not work through your code to look for the bug, because the solution is a one-liner with the method to_dict.
Here is a minimal working example with your sample data.
import pandas as pd
# initialize an empty df
data = pd.DataFrame()
# populate data frame with entries
data['name'] = ['Joe Smith', 'Mary James', 'Charles Williams']
data['school'] = ["Jollywood Secondary", "Northgate Sixth From", "Brompton High"]
data['subjects'] = [['Maths', 'Art', 'Biology'], ['English', 'French', 'History'], ['Chemistry', 'Biology', 'English']]
# redefine index to match your keys
data.index = ['cand{}'.format(i) for i in range(1,len(data)+1)]
# convert to dict
data_dict = data.to_dict(orient='index')
print(data_dict)
This will look something like this
{'cand1': {
'name': 'Joe Smith',
'school': 'Jollywood Secondary',
'subjects': ['Maths', 'Art', 'Biology']},
'cand2': {
'name': 'Mary James',
'school': 'Northgate Sixth From',
'subjects': ['English', 'French', 'History']},
'cand3': {
'name': 'Charles Williams',
'school': 'Brompton High',
'subjects': ['Chemistry', 'Biology', 'English']}}
Consider avoiding the roundabout away of building dictionary as Pandas maintains various methods to render nested structures such as to_dict and to_json. Specifically, consider adding a new column, cand and set it as index for to_dict output:
data['cand'] = 'cand' + pd.Series((data.index.astype('int') + 1).astype('str'))
mydict = data.set_index('cand').to_dict(orient='index')
print(mydict)
{'cand1': {'name': 'Joe Smith', 'school': 'Jollywood Secondary',
'subjects': ['Maths', 'Art', 'Biology']},
'cand2': {'name': 'Mary James', 'school': 'Northgate Sixth From',
'subjects': ['English', 'French', 'History']},
'cand3': {'name': 'Charles Williams', 'school': 'Brompton High',
'subjects': ['Chemistry', 'Biology', 'English']}}