How to perform a data migration with Alembic and two versions of my Table? - sql

I'm trying to refactor a database model; separate one column out of a table into another new one. I'd like to do this using existing SQLAlchemy Core models & Alembic. I'd also like to use server-side INSERT ... FROM SELECT ...-style query to migrate data (docs). By avoiding having to copy all the gazillion of rows to Python-world I hope to have maximum scalability, maximum performance and minimum downtime.
My problem is the programmatic use of SQLAlchemy running on two versions of the same table name in a single Metadata context. Should I resort to using an textual SQL instead? 😕
schema.py before:
class User(Base):
__tablename__ = "users"
id = Column(BigInteger, primary_key=True, autoincrement=False, nullable=False)
[...]
profile_picture_url = Column(String, nullable=True)
schema.py after:
class User(Base):
__tablename__ = "users"
id = Column(BigInteger, primary_key=True, autoincrement=False, nullable=False)
[...]
class UserProfileExtras(Base):
__tablename__ = "user_profile_extras"
user_id = Column(BigInteger, ForeignKey("users.id"), index=True, nullable=False)
profile_picture_url = Column(String, nullable=False)
So here's my attempt to create an Alembic upgrade script:
# Import the new/current-in-code models.
from ... import User, UserProfileExtras
# Define the previous User model in order to operate on the current/old schema.
class UserBeforeUpgrade(Base):
__tablename__ = "users"
id = Column(BigInteger, primary_key=True, autoincrement=False, nullable=False)
[...]
profile_picture_url = Column(String, nullable=True)
table_before_upgrade: Table = UserBeforeUpgrade.__table__
new_target_table = UserProfileExtras.__table__
[...]
def upgrade() -> None:
op.create_table(
"user_profile_extras",
sa.Column("user_id", sa.BigInteger(), autoincrement=False, nullable=False),
sa.Column("profile_picture_url", sa.VARCHAR(), nullable=False),
[...]
)
from_user_table = (select([table_before_upgrade.c.id, table_before_upgrade.c.profile_picture_url])
.where(table_before_upgrade.c.profile_picture_url != None))
insert_from = (
new_target_table.insert().from_select(
[new_target_table.c.user_id, new_target_table.c.profile_picture_url],
from_user_table)
)
op.execute(insert_from))
[...]
[...]
Error:
sqlalchemy.exc.InvalidRequestError: Table 'users' is already defined for this MetaData instance.
Specify 'extend_existing=True' to redefine options and columns on an existing Table object.

Related

How to remove SQLAlchemy Many-To-Many Orphans from database?

Context
I have a simple MySQL database written with SQLAlchemy. The following are my two models, Subreddit and Keyword, that have a many-to-many relationship, along with their association table:
subreddits_keywords = db.Table('subreddits_keywords', db.Model.metadata,
db.Column('subreddit_id', db.Integer, db.ForeignKey('subreddits.id', ondelete='CASCADE')),
db.Column('keyword_id', db.Integer, db.ForeignKey('keywords.id', ondelete='CASCADE')),
)
class Subreddit(db.Model, JsonSerializer):
__tablename__ = 'subreddits'
id = db.Column(db.Integer, primary_key=True)
subreddit_name = db.Column(db.String(128), index=True)
# Establish a parent-children relationship (subreddit -> keywords).
keywords = db.relationship('Keyword', secondary=subreddits_keywords, backref='subreddits', cascade='all, delete', passive_deletes=True, lazy='dynamic')
// ...
class Keyword(db.Model, JsonSerializer):
__tablename__ = 'keywords'
id = db.Column(db.Integer, primary_key=True)
keyword = db.Column(db.String(128), index=True)
// ...
As test data, I've created the following data set:
Subreddit:
test_subreddit
Keywords:
test_keyword1
test_keyword2
test_keyword3
In other words, test_subreddit.keywords should return [test_keyword1, test_keyword2, test_keyword3].
Problem
When I remove test_subreddit, test_keyword1, test_keyword2, test_keyword3 still persist in the database.
I understand that with many-to-many relationships, there is technically no parent so cascade's technically will not work according to this post:
https://stackoverflow.com/a/803584/10426919.
What I've Tried
I followed this link: https://github.com/sqlalchemy/sqlalchemy/wiki/ManyToManyOrphan.
This link provides a library function that should fix my exact problem.
However, the function does not work when integrated into my Model file in the following ways:
Method #1:
from app.extensions import db
from werkzeug.security import generate_password_hash, check_password_hash
from sqlalchemy.inspection import inspect
from sqlalchemy_utils import auto_delete_orphans <------ # library
subreddits_keywords = db.Table('subreddits_keywords', db.Model.metadata,
db.Column('subreddit_id', db.Integer, db.ForeignKey('subreddits.id', ondelete='CASCADE')),
db.Column('keyword_id', db.Integer, db.ForeignKey('keywords.id', ondelete='CASCADE')),
)
class Subreddit(db.Model, JsonSerializer):
__tablename__ = 'subreddits'
id = db.Column(db.Integer, primary_key=True)
subreddit_name = db.Column(db.String(128), index=True)
# Establish a parent-children relationship (subreddit -> keywords).
keywords = db.relationship('Keyword', secondary=subreddits_keywords, backref='subreddits', cascade='all, delete', passive_deletes=True, lazy='dynamic')
// ...
class Keyword(db.Model, JsonSerializer):
__tablename__ = 'keywords'
id = db.Column(db.Integer, primary_key=True)
keyword = db.Column(db.String(128), index=True)
// ...
auto_delete_orphans(Subreddit.keywords) <------ # Library function
However, this function does not seem to do anything. There is no error that is output to help guide me towards the right direction. When I check my database in MySQL workbench, the Subreddit, test_subreddit, is deleted, but the keywords [test_keyword1, test_keyword2, test_keyword3] are still in the database under the Keywords table.
Method #2:
I tried integrating the actual function, that the library function is based on, into my code as well:
from app.extensions import db
from werkzeug.security import generate_password_hash, check_password_hash
from sqlalchemy.inspection import inspect
from sqlalchemy_utils import auto_delete_orphans
# for deleting many-to-many "orphans".
from sqlalchemy import event, create_engine
from sqlalchemy.orm import attributes, sessionmaker
subreddits_keywords = db.Table('subreddits_keywords', db.Model.metadata,
db.Column('subreddit_id', db.Integer, db.ForeignKey('subreddits.id', ondelete='CASCADE')),
db.Column('keyword_id', db.Integer, db.ForeignKey('keywords.id', ondelete='CASCADE')),
)
class Subreddit(db.Model, JsonSerializer):
__tablename__ = 'subreddits'
id = db.Column(db.Integer, primary_key=True)
subreddit_name = db.Column(db.String(128), index=True)
# Establish a parent-children relationship (subreddit -> keywords).
keywords = db.relationship('Keyword', secondary=subreddits_keywords, backref='subreddits', cascade='all, delete', passive_deletes=True, lazy='dynamic')
// ...
class Keyword(db.Model, JsonSerializer):
__tablename__ = 'keywords'
id = db.Column(db.Integer, primary_key=True)
keyword = db.Column(db.String(128), index=True)
// ...
engine = create_engine("mysql://", echo=True)
Session = sessionmaker(bind=engine)
#event.listens_for(Session, 'after_flush')
def delete_tag_orphans(session, ctx):
# optional: look through Session state to see if we want
# to emit a DELETE for orphan Tags
flag = False
for instance in session.dirty:
if isinstance(instance, Subreddit) and \
attributes.get_history(instance, 'keywords').deleted:
flag = True
break
for instance in session.deleted:
if isinstance(instance, Subreddit):
flag = True
break
# emit a DELETE for all orphan Tags. This is safe to emit
# regardless of "flag", if a less verbose approach is
# desired.
if flag:
session.query(Keyword).\
filter(~Keyword.subreddits.any()).\
delete(synchronize_session=False)
Again, the keywords persisted despite being attached to no parent.
What I'm trying to accomplish
When children in the database no longer have a parent, I would like them to be removed from the database. What am I doing wrong?
Rather than using auto_delete_orphans, I created a method that I can call when I want to delete children. This method checks the child in question, and sees if it has any parents. If it does have a parent, we leave it be, but if it does not have a parent, we then delete the children.
Here is how I implemented this method, given that a Subreddit is a parent and a Keyword is a child of Subreddit.
def check_for_keyword_orphans(keyword):
# check if each keyword has an associated subreddit
if len(keyword.subreddits) == 0:
db.session.delete(keyword)
return True # keyword deleted
else:
return False # keyword has an associated subreddit
And here is how I used the method in my API route:
keywords = subreddit.keywords
for keyword in keywords:
check_for_keyword_orphans(keyword)
db.session.commit()

Flask-SqlAlchemy composite key one to many relationship Sql Server

I'm currently getting an error, I'm using sql server and trying to model a simple Parent with an array of Children:
sqlalchemy.exc.NoForeignKeysError: Could not determine join condition
between parent/child tables on relationship Parent.children- there are no
foreign keys linking these tables. Ensure that referencing columns
are associated with a ForeignKey or ForeignKeyConstraint, or specify a
'primaryjoin' expression.
my classes are set up simply as follows:
class Parent(db.Model):
__tablename__ = "parent"
parentId = db.Column(db.Integer, primary_key=True)
parentVersion = db.Column(db.Integer, primary_key=True)
children = db.relationship('Child', backref="parent",lazy=True)
class Child(db.Model):
__tablename__ = "child"
id = db.Column(db.Integer, primary_key=True)
name = db.Column(db.String(512), nullable=False)
parentId = db.Column(db.Integer, nullable=False)
parentVersion = db.Column(db.Integer, nullable=False)
ForeignKeyConstraint(['parentId', 'parentVersion'], ['parent.parentId', 'parent.parentVersion']
I've tried fiddling with declaring the relationship and foreign key in several ways but i always get an error, what is the correct way to do this?
Your forgot to add a foreign key:
parentId = db.Column(db.Integer, db.ForeignKey("parent.id))
There is a lot of documentation material regarding this topic, if there is still anything unclear to you.
You are missing adding the ForeignKeyConstraint to the table args, and you are using camel case, not snake case. And you don't need the __tablename__ with Flask-SQLAlchemy.
Try:
class Parent(db.Model):
id = db.Column(db.Integer, primary_key=True)
version = db.Column(db.Integer, primary_key=True)
children = db.relationship('Child', backref="parent", lazy=True)
class Child(db.Model):
id = db.Column(db.Integer, primary_key=True)
name = db.Column(db.String(512), nullable=False)
parent_id = db.Column(db.Integer, nullable=False)
parent_version = db.Column(db.Integer, nullable=False)
__table_args__ = (
db.ForeignKeyConstraint(
['parent_id', 'parent_version'],
['parent.id', 'parent.version']
),
)

Alembic sqlalchemy.exc.NoReferencedColumnError: (Using Flask-sqlalchemy and Flask-Migrate)

Alembic keeps giving me this error when I try to migrate my schema even though the initial migration went without a hitch.
sqlalchemy.exc.NoReferencedColumnError: Could not initialize target column for ForeignKey 'dataset.datasetid' on table 'analysis': table 'dataset' has no column named 'datasetid'
Here is a part of my models.py class
class Dataset(db.Model):
DatasetID = db.Column(db.Integer, primary_key = True)
SampleID = db.Column(db.String(50), db.ForeignKey('sample.SampleID', onupdate="cascade",ondelete="restrict"), nullable=False)
UploadDate = db.Column(db.Date, nullable=False)
UploadID = db.Column(db.Integer,db.ForeignKey('uploaders.UploadID', onupdate="cascade",ondelete="restrict"), nullable=False)
UploadStatus = db.Column(db.String(45), nullable=False)
HPFPath = db.Column(db.String(500))
DatasetType = db.Column(db.String(45), nullable=False)
SolvedStatus = db.Column(db.String(30), nullable=False)
InputFile = db.Column(db.Text)
RunID = db.Column(db.String(45))
Notes = db.Column(db.Text)
analyses = db.relationship('Analysis',backref='dataset',lazy='dynamic')
data2Cohorts = db.relationship('Dataset2Cohort',backref='dataset',lazy='dynamic')
class Dataset2Cohort(db.Model):
__tablename__='dataset2Cohort'
DatasetID = db.Column(db.Integer, db.ForeignKey('dataset.DatasetID', onupdate="cascade",ondelete="cascade"), nullable=False, primary_key = True)
CohortID = db.Column(db.Integer, db.ForeignKey('cohort.CohortID', onupdate="cascade", ondelete="restrict"), nullable=False, primary_key = True)
class Analysis(db.Model):
AnalysisID = db.Column(db.String(100), primary_key = True)
DatasetID = db.Column(db.Integer, db.ForeignKey('dataset.DatasetID', onupdate="cascade",ondelete="cascade"), nullable=False)
PipelineVersion = db.Column(db.String(30))
ResultsDirectory = db.Column(db.Text)
ResultsBAM = db.Column(db.Text)
AssignedTo = db.Column(db.String(100), nullable=True)
analysisStatuses = db.relationship('AnalysisStatus', backref='analysis', lazy='dynamic')
Does anyone know why I keep getting that error even though I have the DatasetID column in the Dataset table?
Thank you,
Teja.
Found a solution.
This seems to be an issue with how Mysql 8.x versions refer to column names in the foreign key declaration - Mysql 8.x versions always use lowercase when a column is referenced in Foreign Key statements, which cause an incompatibility with sqlalchemy. This issue is discussed here
https://github.com/sqlalchemy/sqlalchemy/issues/4344
Solution is to just upgrade the sqlalchemy to the latest version (>=1.2.x)
Teja.

one to many relationships in different columns

This is newbee SQL question I struggled to find a clear answer to. So please help. I have two tables in database with fields as listed below:
ConstructionProjects
id (unique, nonempty)
developer (one, nonempty)
main_contractor (one, nonempty)
architect (one, nonempty)
(other fields)
Companies
id (unique, nonempty)
projects_developed (many or none)
projects_as_main_contractor (many or none)
projects_as_architect (many or none)
(other fields)
So every project has only one developer, one architect and one contractor, however, it may be the same company. Any company may be involved in as many projects in any roles.
Is there a way to avoid creating 3 additional association tables to establish many to many relationships? and make 3 one to many relationships instead?
If so, which practice is better?
*In other words, I don't understand relationships (one to many and many to many) relate (1) row to row or (2) row to "specific cell"?
(1) if row to row then I have many to many relationships
(2) if row to specific cell then it is multiple one to many relationships...*
I'm learning Flask_alchemy and PostgreSQL.
I ran into problem, writing a code like this (there's no reference to specific columns between tables). So this is not ok?
class Company(db.Model):
id = db.Column(db.Integer, primary_key = True)
constr_projects_developed = db.relationship('ConstrProject', backref='developer')
constr_projects_main_contracts = db.relationship('ConstrProject', backref='main_contractor')
constr_projects_architect = db.relationship('ConstrProject', backref='architect')
class ConstrProject(db.Model):
id = db.Column(db.Integer, primary_key = True)
developer_id = db.Column(db.Integer, db.ForeignKey('company.id'))
main_contractor_id = db.Column(db.Integer, db.ForeignKey('company.id'))
architect_id = db.Column(db.Integer, db.ForeignKey('company.id'))
Then my question is, the correct way to do it is like this (1):
class Company(db.Model):
id = db.Column(db.Integer, primary_key = True)
constr_projects_developed = db.relationship('ConstrProject', back_populates='developer')
constr_projects_main_contracts = db.relationship('ConstrProject', back_populates='main_contractor')
constr_projects_architect = db.relationship('ConstrProject', back_populates='architect')
class ConstrProject(db.Model):
id = db.Column(db.Integer, primary_key = True)
developer_id = db.Column(db.Integer, db.ForeignKey('company.id'))
developer = db.relationship('Company', back_populates='constr_projects_developed')
main_contractor_id = db.Column(db.Integer, db.ForeignKey('company.id'))
main contractor = db.relationship('Company', back_populates='constr_projects_main_contracts')
architect_id = db.Column(db.Integer, db.ForeignKey('company.id'))
architect = db.relationship('Company', back_populates='constr_projects_architect')
Or like this(2)?:
class Company(db.Model):
id = db.Column(db.Integer, primary_key = True)
cp_developed = db.relationship('Company', secondary=cp_developer_company, back_populates='developer')
cp_main_contracts = db.relationship('Company', secondary=cp_main_contractor_company, back_populates='main_contractor')
cp_architects = db.relationship('Company', secondary=cp_architect_company, back_populates='architect')
class ConstrProject(db.Model):
id = db.Column(db.Integer, primary_key = True)
developer = db.relationship('Company', secondary=cp_developer_company, back_populates='cp_developed')
main_contractor = db.relationship('Company', secondary=cp_main_contractor_company, back_populates='cp_main_contracts')
architect = db.relationship('Company', secondary=cp_architect_company, back_populates='cp_architects')
cp_developer_company = db.Table('cp_developer_company'
db.Column('company_id', db.Integer, db.ForeignKey('company.id'))
db.Column('constr_project_id', db.Integer, db.ForeignKey('constrproject.id'))
)
cp_main_contractor_company = db.Table('cp_main_contractor_company'
db.Column('company_id', db.Integer, db.ForeignKey('company.id'))
db.Column('constr_project_id', db.Integer, db.ForeignKey('constrproject.id'))
)
cp_architect_company = db.Table('cp_architect_company'
db.Column('company_id', db.Integer, db.ForeignKey('company.id'))
db.Column('constr_project_id', db.Integer, db.ForeignKey('constrproject.id'))

nested sqlalchemy filter with parent and son

With the following scheme:
class User(Base):
id = Column(Integer, primary_key=True)
name = Column(String)
class Photo(Base):
id = Column(Integer, primary_key=True)
user_id = Column(Integer, ForeignKey(User.id), nullable=False)
user = relationship(User)
class Tag(Base):
id = Column(Integer, primary_key=True)
tag_name = Column(String)
tag_version = Column(Integer)
photo_id = Column(Integer, ForeignKey(Photo.id), nullable=False)
photo = relationship(Photo)
How do I create an SQLAlchemy query to get all the photos of a specific user, that don't have a specific tag and version.
As in "all the photos of the user with id "1234" that don't have a "cat" of version "2" tagged in them".
Also interesting would be "all the users who have at least one photo without a specific tag"
I'm using postgreSQL btw.
Here is a complete example that sets up relationships, creates some sample data, then performs your two queries.
Setup:
from datetime import datetime
from sqlalchemy import create_engine, Column, Integer, String, ForeignKey, not_
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker, relationship
engine = create_engine('sqlite:///', echo=True)
Session = sessionmaker(bind=engine)
session = Session()
Base = declarative_base(bind=engine)
class User(Base):
__tablename__ = 'user'
id = Column(Integer, primary_key=True)
name = Column(String, nullable=False)
class Photo(Base):
__tablename__ = 'photo'
id = Column(Integer, primary_key=True)
name = Column(String, nullable=False)
user_id = Column(Integer, ForeignKey(User.id), nullable=False)
user = relationship(User, backref='photos')
class Tag(Base):
__tablename__ = 'tag'
id = Column(Integer, primary_key=True)
name = Column(String, nullable=False)
photo_id = Column(Integer, ForeignKey(Photo.id), nullable=False)
photo = relationship(Photo, backref='tags')
Base.metadata.create_all()
session.add(User(name='davidism', photos=[
Photo(name='sun', tags=[Tag(name='bright'), Tag(name='day')]),
Photo(name='moon', tags=[Tag(name='bright'), Tag(name='night')])
]))
session.add(User(name='eran', photos=[
Photo(name='party', tags=[Tag(name='people'), Tag(name='night')]),
Photo(name='cat')
]))
session.commit()
Query all photos with no tags at all:
no_tags = session.query(Photo).outerjoin(Photo.tags).filter(not_(Photo.tags.any())).all()
print 'no tags: ', len(no_tags)
Query all photos without the tag 'night':
not_night = session.query(Photo).outerjoin(Photo.tags).filter(not_(Photo.tags.any(Tag.name == 'night'))).all()
print 'not night: ', len(not_night)
Assuming existance of backrefs Tag.photo = relationship(Photo, backref='tags') and
Photo.user = relationship(User, backref="photos") both can be done using any construct. This might not generate the most optimal SQL SELECT statement, but it is a very clean sqlalchemy.
Part-1: "all the photos of the user with id "1234" that don't have a "cat" of version "2" tagged in them"
def get_user_photos_without_tag(user_id, tag_name, tag_version):
qry = (session.query(Photo)
.filter(~Photo.tags.any(and_(
Tag.tag_name == tag_name,
Tag.tag_version == tag_version))
)
.filter(Photo.user_id == user_id)
)
return qry.all()
photos = get_user_photos_without_tag(1234, 'cat', 2)
Part-2: "all the users who have at least one photo without a specific tag"
def get_user_with_photos_without_tag(tag_name, tag_version):
qry = (session.query(User)
.filter(User.photos.any(
~Photo.tags.any(and_(
Tag.tag_name == tag_name,
Tag.tag_version == tag_version))
))
)
return qry.all()
res = get_user_with_photos_without_tag('cat', 2)