from sqlalchemy import create_engine,MetaData from sqlalchemy import Integer,String,ForeignKey,Column,TEXT,LargeBinary from sqlalchemy.orm import relationship from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import sessionmaker from sqlalchemy import PrimaryKeyConstraint,UniqueConstraint import sqlalchemy import re import time import jieba.analyse from jieba import cut_for_search Base=declarative_base() class con: def connect(): '''Returns a connection and a metadata object''' # We connect with the help of the PostgreSQL URL url = 'postgresql://stu10205501415:Stu10205501415@dase-cdms-2022-pub.pg.rds.aliyuncs.com:5432/stu10205501415' # The return value of create_engine() is our connection object con = create_engine(url, client_encoding='utf8') # We then bind the connection to MetaData() meta = MetaData(bind=con) return con, meta class User(Base): __tablename__ = 'user' user_id = Column(TEXT, primary_key=True, comment="主键") password = Column(TEXT, nullable=False, comment="密码") balance = Column(Integer, nullable=False, comment="") token = Column(TEXT, comment="缓存的令牌") terminal = Column(TEXT, comment="终端代码") # class Store(Base): # __tablename__ = 'store' # store_id = Column(TEXT, primary_key=True,comment="主键") # stock_level = Column(Integer, comment = "货存") class Store_Book(Base): __tablename__ = 'store_book' store_id = Column(TEXT, comment="主键") book_id = Column(TEXT, comment="主键") book_info = Column(TEXT, comment="书籍信息") stock_level = Column(Integer, comment = "货存") __table_args__ = ( PrimaryKeyConstraint('store_id', 'book_id'), ) class User_Store(Base): __tablename__ = 'user_store' # id = Column(Integer, primary_key=True, autoincrement=True, comment="主键") user_id = Column( TEXT, # ForeignKey( # "user.user_id", # ondelete="CASCADE", # onupdate="CASCADE", # ), nullable=False, comment="user外键" ) store_id = Column( TEXT, # ForeignKey( # "store.store_id", # ondelete="CASCADE", # onupdate="CASCADE", # ), nullable=False, comment="store外键" ) # 多对多关系的中间表必须使用联合唯一约束,防止出现重复数据 __table_args__ = ( PrimaryKeyConstraint('store_id', 'user_id'), ) class New_Order(Base): __tablename__ = 'new_order' order_id = Column(TEXT, primary_key = True, comment = '订单id') user_id = Column( TEXT, ForeignKey( "user.user_id", ondelete="CASCADE", onupdate="CASCADE", ), nullable=False, comment="user外键" ) store_id = Column( TEXT, # ForeignKey( # "store.store_id", # ondelete="CASCADE", # onupdate="CASCADE", # ), nullable=False, comment="store外键" ) creat_time = Column(TEXT, nullable=False,comment="订单创建时间") status = Column(Integer,nullable=False,comment="订单状态") class New_Order_Detail(Base): __tablename__ = 'new_order_detail' order_id = Column(TEXT, comment='订单id') book_id = Column(TEXT, comment='订单书籍') count = Column(Integer, comment='购买书籍数') price = Column(Integer, comment='单价') __table_args__ = ( PrimaryKeyConstraint('order_id','book_id'), ) class Book(Base): __tablename__ = 'book' book_id = Column(TEXT, primary_key=True) title = Column(TEXT, nullable=False) author = Column(TEXT) publisher = Column(TEXT) original_title = Column(TEXT) translator = Column(TEXT) pub_year = Column(TEXT) pages = Column(Integer) original_price = Column(Integer) # 原价 currency_unit = Column(TEXT) binding = Column(TEXT) isbn = Column(TEXT) author_intro = Column(TEXT) book_intro = Column(TEXT) content = Column(TEXT) tags = Column(TEXT) picture = Column(LargeBinary) # 搜索标题表 class Search_title(Base): __tablename__ = 'search_title' search_id = Column(Integer, nullable=False) title = Column(TEXT, nullable=False) book_id = Column(TEXT, ForeignKey('book.book_id'), nullable=False) __table_args__ = ( PrimaryKeyConstraint('search_id', 'title'), {}, ) # 搜索标签表 class Search_tags(Base): __tablename__ = 'search_tags' search_id = Column(Integer, nullable=False) tags = Column(TEXT, nullable=False) book_id = Column(TEXT, ForeignKey('book.book_id'), nullable=False) __table_args__ = ( PrimaryKeyConstraint('search_id', 'tags'), {}, ) # 搜索作者表 class Search_author(Base): __tablename__ = 'search_author' search_id = Column(Integer, nullable=False) author = Column(TEXT, nullable=False) book_id = Column(TEXT, ForeignKey('book.book_id'), nullable=False) __table_args__ = ( PrimaryKeyConstraint('search_id', 'author'), {}, ) # 搜索书本内容表 class Search_book_intro(Base): __tablename__ = 'search_book_intro' search_id = Column(Integer, nullable=False) book_intro = Column(TEXT, nullable=False) book_id = Column(TEXT, ForeignKey('book.book_id'), nullable=False) __table_args__ = ( PrimaryKeyConstraint('search_id', 'book_intro'), {}, ) def insert_tags(session): row = session.query(Book).all() for i in row: tags = i.tags for j in tags: row_tmp = session.query(Search_tags).filter(Search_tags.tags==j).order_by(Search_tags.search_id.desc()).first() if row_tmp is None: max_num = 0 else: max_num = row_tmp.search_id + 1 new_search_tags = Search_tags(search_id=max_num,tags=j,book_id=i.book_id) session.add(new_search_tags) session.commit() def insert_author(session): row = session.query(Book).all() for i in row: tmp = i.author if tmp == None: j = '作者不详' else: j = tmp row_tmp = session.query(Search_author).filter(Search_author.author==j).order_by(Search_author.search_id.desc()).first() if row_tmp is None: max_num = 0 else: max_num = row_tmp.search_id + 1 # print(max_num, j, i.book_id) new_search_author = Search_author(search_id=max_num,author=j,book_id=i.book_id) session.add(new_search_author) session.commit() def insert_title(session): row = session.query(Book).all() for i in row: tmp = i.title # print(tmp) tmp = re.sub(r'[\(\[\{(【][^))】]*[\)\]\{\】\)]\s?', '', tmp) tmp = re.sub(r'[^\w\s]', '', tmp) # 处理空标题 if len(tmp) == 0: continue # 搜索引擎模式,在精确模式的基础上,对长词再次切分,提高召回率,适合用于搜索引擎分词。 seg_list = cut_for_search(tmp) sig_list = [] tag = 0 for k in seg_list: sig_list.append(k) if k == tmp: tag = 1 if tag == 0: sig_list.append(tmp) for j in sig_list: if j == "" or j == " ": continue row_tmp = session.query(Search_title).filter(Search_title.title==j).order_by(Search_title.search_id.desc()).first() if row_tmp is None: max_num = 0 else: max_num = row_tmp.search_id + 1 # print(max_num, j, i.book_id) new_search_title = Search_title(search_id=max_num,title=j,book_id=i.book_id) session.add(new_search_title) session.commit() def insert_book_intro(session): row = session.query(Book).all() for i in row: tmp = i.book_intro if tmp != None: # print(tmp) # 采用textrank进行分词 keywords_textrank = jieba.analyse.textrank(tmp) # print(keywords_textrank) # keywords_tfidf = jieba.analyse.extract_tags(tmp) # print(keywords_tfidf) for j in keywords_textrank: row_tmp = session.query(Search_book_intro).filter(Search_book_intro.book_intro==j).order_by(Search_title.search_id.desc()).first() if row_tmp is None: max_num = 0 else: max_num = row_tmp.search_id + 1 # print(max_num, j, i.book_id) new_search_book_intro = Search_book_intro(search_id=max_num,book_intro=j,book_id=i.book_id) session.add(new_search_book_intro) session.commit() engine, meta = con.connect() Base.metadata.bind = engine DBSession = sessionmaker(bind=engine) session = DBSession() insert_tags(session=session) insert_author(session=session) insert_title(session=session) insert_book_intro(session=session)