TypeError: super(type, obj): obj must be an instance or subtype of type - help me - google-bigquery

from google.cloud import bigquery
import os
credentials_path = '....keyBigQuery.json'
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = credentials_path
client = bigquery.Client()
table_id = '...VENTAS'
row_to_insert = [
{u'ID_VENTA':'E001-20',
u'TIPO_DOCUMENTO':'FACTURA',
u'CLIENTE':'LEOPOLDO',
u'FECHA_EMISION':'12/10/2023',
u'FORMA_COBRO':'CONTADO',
u'TOTAL_PRODUCTOS':1,
u'TOTAL_CUOTAS':1,
u'MONTO_TOTAL':15520,
u'ESTADO':'CANCELADO'},
{u'ID_VENTA':'E001-21',
u'TIPO_DOCUMENTO':'FACTURA',
u'CLIENTE':'LEOPOLDO',
u'FECHA_EMISION':'12/10/2023',
u'FORMA_COBRO':'CONTADO',
u'TOTAL_PRODUCTOS':1,
u'TOTAL_CUOTAS':1,
u'MONTO_TOTAL':15520,
u'ESTADO':'CANCELADO'}
]
client.insert_rows_json(table_id,row_to_insert)
good night,
I'm doing tests and I get this error
TypeError: super(type, obj): obj must be an instance or subtype of type
I appreciate a possible solution

Related

Getting error in a python script when using QuickSight API calls to retrieve the value of user parameter selection

I am working on a python script which will use QS APIs to retrieve the user parameter selections but keep getting the below error:
parameters = response['Dashboard']['Parameters'] KeyError: 'Parameters'
If I try a different code to retrieve the datasets in my QS account, it works but the Parameters code doesn't. I think I am missing some configuration.
#Code to retrieve the parameters from a QS dashboard (which fails):
import boto3
quicksight = boto3.client('quicksight')
response = quicksight.describe_dashboard(
AwsAccountId='99999999999',
DashboardId='zzz-zzzz-zzzz'
)
parameters = response['Dashboard']['Parameters']
for parameter in parameters:
print(parameter['Name'], ':', parameter['Value'])
#Code to display the datasets in the QS account (which works):
import boto3
import json
account_id = '99999999999'
session = boto3.Session(profile_name='default')
qs_client = session.client('quicksight')
response = qs_client.list_data_sets(AwsAccountId = account_id,MaxResults = 100)
results = response['DataSetSummaries']
while "NextToken" in response.keys():
response = qs_client.list_data_sets(AwsAccountId = account_id,MaxResults = 100,NextToken=response["NextToken"])
results.extend(response["DataSetSummaries"])
for i in results:
x = i['DataSetId']
try:
response = qs_client.describe_data_set(AwsAccountId=account_id,DataSetId=x)
print("succeeded loading: {} for data set {} ".format(x, response['DataSet']['Name']))
except:
print("failed loading: {} ".format(x))

Vertex AI Model Batch prediction, issue with referencing existing model and input file on Cloud Storage

I'm struggling to correctly set Vertex AI pipeline which does the following:
read data from API and store to GCS and as as input for batch prediction.
get an existing model (Video classification on Vertex AI)
create Batch prediction job with input from point 1.
As it will be seen, I don't have much experience with Vertex Pipelines/Kubeflow thus I'm asking for help/advice, hope it's just some beginner mistake.
this is the gist of the code I'm using as pipeline
from google_cloud_pipeline_components import aiplatform as gcc_aip
from kfp.v2 import dsl
from kfp.v2.dsl import component
from kfp.v2.dsl import (
Output,
Artifact,
Model,
)
PROJECT_ID = 'my-gcp-project'
BUCKET_NAME = "mybucket"
PIPELINE_ROOT = "{}/pipeline_root".format(BUCKET_NAME)
#component
def get_input_data() -> str:
# getting data from API, save to Cloud Storage
# return GS URI
gcs_batch_input_path = 'gs://somebucket/file'
return gcs_batch_input_path
#component(
base_image="python:3.9",
packages_to_install=['google-cloud-aiplatform==1.8.0']
)
def load_ml_model(project_id: str, model: Output[Artifact]):
"""Load existing Vertex model"""
import google.cloud.aiplatform as aip
model_id = '1234'
model = aip.Model(model_name=model_id, project=project_id, location='us-central1')
#dsl.pipeline(
name="batch-pipeline", pipeline_root=PIPELINE_ROOT,
)
def pipeline(gcp_project: str):
input_data = get_input_data()
ml_model = load_ml_model(gcp_project)
gcc_aip.ModelBatchPredictOp(
project=PROJECT_ID,
job_display_name=f'test-prediction',
model=ml_model.output,
gcs_source_uris=[input_data.output], # this doesn't work
# gcs_source_uris=['gs://mybucket/output/'], # hardcoded gs uri works
gcs_destination_output_uri_prefix=f'gs://{PIPELINE_ROOT}/prediction_output/'
)
if __name__ == '__main__':
from kfp.v2 import compiler
import google.cloud.aiplatform as aip
pipeline_export_filepath = 'test-pipeline.json'
compiler.Compiler().compile(pipeline_func=pipeline,
package_path=pipeline_export_filepath)
# pipeline_params = {
# 'gcp_project': PROJECT_ID,
# }
# job = aip.PipelineJob(
# display_name='test-pipeline',
# template_path=pipeline_export_filepath,
# pipeline_root=f'gs://{PIPELINE_ROOT}',
# project=PROJECT_ID,
# parameter_values=pipeline_params,
# )
# job.run()
When running the pipeline it throws this exception when running Batch prediction:
details = "List of found errors: 1.Field: batch_prediction_job.model; Message: Invalid Model resource name.
so I'm not sure what could be wrong. I tried to load model in the notebook (outside of component) and it correctly returns.
Second issue I'm having is referencing GCS URI as output from component to batch job input.
input_data = get_input_data2()
gcc_aip.ModelBatchPredictOp(
project=PROJECT_ID,
job_display_name=f'test-prediction',
model=ml_model.output,
gcs_source_uris=[input_data.output], # this doesn't work
# gcs_source_uris=['gs://mybucket/output/'], # hardcoded gs uri works
gcs_destination_output_uri_prefix=f'gs://{PIPELINE_ROOT}/prediction_output/'
)
During compilation, I get following exception TypeError: Object of type PipelineParam is not JSON serializable, though I think this could be issue of ModelBatchPredictOp component.
Again any help/advice appreciated, I'm dealing with this from yesterday, so maybe I missed something obvious.
libraries I'm using:
google-cloud-aiplatform==1.8.0
google-cloud-pipeline-components==0.2.0
kfp==1.8.10
kfp-pipeline-spec==0.1.13
kfp-server-api==1.7.1
UPDATE
After comments, some research and tuning, for referencing model this works:
#component
def load_ml_model(project_id: str, model: Output[Artifact]):
region = 'us-central1'
model_id = '1234'
model_uid = f'projects/{project_id}/locations/{region}/models/{model_id}'
model.uri = model_uid
model.metadata['resourceName'] = model_uid
and then I can use it as intended:
batch_predict_op = gcc_aip.ModelBatchPredictOp(
project=gcp_project,
job_display_name=f'batch-prediction-test',
model=ml_model.outputs['model'],
gcs_source_uris=[input_batch_gcs_path],
gcs_destination_output_uri_prefix=f'gs://{BUCKET_NAME}/prediction_output/test'
)
UPDATE 2
regarding GCS path, a workaround is to define path outside of the component and pass it as an input parameter, for example (abbreviated):
#dsl.pipeline(
name="my-pipeline",
pipeline_root=PIPELINE_ROOT,
)
def pipeline(
gcp_project: str,
region: str,
bucket: str
):
ts = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
gcs_prediction_input_path = f'gs://{BUCKET_NAME}/prediction_input/video_batch_prediction_input_{ts}.jsonl'
batch_input_data_op = get_input_data(gcs_prediction_input_path) # this loads input data to GCS path
batch_predict_op = gcc_aip.ModelBatchPredictOp(
project=gcp_project,
model=training_job_run_op.outputs["model"],
job_display_name='batch-prediction',
# gcs_source_uris=[batch_input_data_op.output],
gcs_source_uris=[gcs_prediction_input_path],
gcs_destination_output_uri_prefix=f'gs://{BUCKET_NAME}/prediction_output/',
).after(batch_input_data_op) # we need to add 'after' so it runs after input data is prepared since get_input_data doesn't returns anything
still not sure, why it doesn't work/compile when I return GCS path from get_input_data component
I'm glad you solved most of your main issues and found a workaround for model declaration.
For your input.output observation on gcs_source_uris, the reason behind it is because the way the function/class returns the value. If you dig inside the class/methods of google_cloud_pipeline_components you will find that it implements a structure that will allow you to use .outputs from the returned value of the function called.
If you go to the implementation of one of the components of the pipeline you will find that it returns an output array from convert_method_to_component function. So, in order to have that implemented in your custom class/function your function should return a value which can be called as an attribute. Below is a basic implementation of it.
class CustomClass():
def __init__(self):
self.return_val = {'path':'custompath','desc':'a desc'}
#property
def output(self):
return self.return_val
hello = CustomClass()
print(hello.output['path'])
If you want to dig more about it you can go to the following pages:
convert_method_to_component, which is the implementation of convert_method_to_component
Properties, basics of property in python.

TypeError: find_one_and_update() missing 1 required positional argument: 'update'

I need help, Not sure what im doing wrong. I keep getting this error and im not sure why can anyone give me any advice:
TypeError: find_one_and_update() missing 1 required positional argument: 'update'
Here is my Test Code:
import pymongo
from pymongo import ReturnDocument
import datetime
from animalsCRUD import AnimalShelter
#username = "aacuser"
#password = "password"
insertRec = AnimalShelter("aacuser", "password")
locateRec = AnimalShelter("aacuser", "password")
updateRec = AnimalShelter("aacuser", "password")
deleteRec = AnimalShelter("aacuser", "password")
animal = ({"age_upon_outcome":"5 years", "animal_id":"A333333", "animal_type":"Dog",
"breed":"Derp", "color":"White",
"date_of_birth":"07/19/19", "datetime": datetime.datetime.now(), "name":"",
"outcome_subtype":"Foster",
"outcome_type":"Adoption", "sex_upon_outcome":"Intact Female", "location_lat":30.60784677,
"location_long":-97.35087807, "age_upon_outcome_in_weeks":64.24642857})
critter = {"animal_id":"A333333"}
changeCritter = ({"animal_id": "A333333"}, {'$set': {'animal_type': 'Cat'}})
print(insertRec.create(animal))
locateRec.locate(critter)
updateRec.update(changeCritter)
locateRec.locate(critter)
deleteRec.delete(critter)
The update method has two parameters, you could decomposite the tuple
updateRec.update(changeCritter[0],changeCritter[1])

How to use `from_orm` if the pydantic model defines aliases?

Though the pydantic's ORM mode is documented here, unfortunately there is no documentation for usage with aliases.
How to use from_orm if the pydantic model defines aliases?
It seems that the from_orm factory forgets about all non-aliased names if aliases exist. - See error message and the corresponding code below. Is that a bug or a feature?
The code snippet below fails unexpectedly with a validation error:
pydantic.error_wrappers.ValidationError: 1 validation error for SimpleModel
threeWordsId
field required (type=value_error.missing)
from sqlalchemy import Column, String
from sqlalchemy.ext.declarative import declarative_base
from pydantic import BaseModel, Field
Base = declarative_base()
class SimpleOrm(Base):
__tablename__ = 'simples'
three_words_id = Column(String, primary_key=True)
class SimpleModel(BaseModel):
three_words_id: str = Field(..., alias="threeWordsId")
class Config:
orm_mode=True
simple_orm = SimpleOrm(three_words_id='abc')
simple_oops = SimpleModel.from_orm(simple_orm)
Use allow_population_by_field_name = True in config.
Like
from sqlalchemy import Column, String
from sqlalchemy.ext.declarative import declarative_base
from pydantic import BaseModel, Field
Base = declarative_base()
class SimpleOrm(Base):
__tablename__ = 'simples'
three_words_id = Column(String, primary_key=True)
class SimpleModel(BaseModel):
three_words_id: str = Field(..., alias="threeWordsId")
class Config:
orm_mode = True
allow_population_by_field_name = True
# allow_population_by_alias = True # in case pydantic.version.VERSION < 1.0
simple_orm = SimpleOrm(three_words_id='abc')
simple_oops = SimpleModel.from_orm(simple_orm)
print(simple_oops.json()) # {"three_words_id": "abc"}
print(simple_oops.json(by_alias=True)) # {"threeWordsId": "abc"}
from fastapi import FastAPI
app = FastAPI()
#app.get("/model", response_model=SimpleModel)
def get_model():
# results in {"threeWordsId":"abc"}
return SimpleOrm(three_words_id='abc')

Case Class serialization in Spark

In a Spark app (Spark 2.1) I'm trying to send a case class as input parameter of a function that is meant to run on executors
object TestJob extends App {
val appName = "TestJob"
val out = "out"
val p = Params("my-driver-string")
val spark = SparkSession.builder()
.appName(appName)
.getOrCreate()
import spark.implicits._
(1 to 100).toDF.as[Int].flatMap(i => Dummy.process(i, p))
.write
.option("header", "true")
.csv(out)
}
object Dummy {
def process(i: Int, v:Params): Vector[String] = {
Vector { if( i % 2 == 1) v + "_odd" else v + "_even" }
}
}
case class Params(v: String)
When I run it with master local[*] everything goes well, while when running in a cluster, Params class state is not getting serialized and the output results in
null_even
null_odd
...
Could you please help me understanding what I'm doing wrong?
Googling around I found this post that gave me the solution:Spark broadcasted variable returns NullPointerException when run in Amazon EMR cluster
In the end the problem is due to the extend Apps