|
|
@ -1,12 +1,14 @@ |
|
|
|
from sqlalchemy import create_engine,MetaData |
|
|
|
from sqlalchemy import Integer,String,ForeignKey,Column,TEXT |
|
|
|
from sqlalchemy import Integer,String,ForeignKey,Column,TEXT,LargeBinary |
|
|
|
from sqlalchemy.orm import relationship |
|
|
|
from sqlalchemy.ext.declarative import declarative_base |
|
|
|
from sqlalchemy.orm import sessionmaker |
|
|
|
from sqlalchemy import PrimaryKeyConstraint,UniqueConstraint |
|
|
|
import sqlalchemy |
|
|
|
|
|
|
|
import re |
|
|
|
import time |
|
|
|
import jieba.analyse |
|
|
|
from jieba import cut_for_search |
|
|
|
|
|
|
|
Base=declarative_base() |
|
|
|
class con: |
|
|
@ -117,6 +119,177 @@ class New_Order_Detail(Base): |
|
|
|
PrimaryKeyConstraint('order_id','book_id'), |
|
|
|
) |
|
|
|
|
|
|
|
class Book(Base): |
|
|
|
__tablename__ = 'book' |
|
|
|
book_id = Column(TEXT, primary_key=True) |
|
|
|
title = Column(TEXT, nullable=False) |
|
|
|
author = Column(TEXT) |
|
|
|
publisher = Column(TEXT) |
|
|
|
original_title = Column(TEXT) |
|
|
|
translator = Column(TEXT) |
|
|
|
pub_year = Column(TEXT) |
|
|
|
pages = Column(Integer) |
|
|
|
original_price = Column(Integer) # 原价 |
|
|
|
currency_unit = Column(TEXT) |
|
|
|
binding = Column(TEXT) |
|
|
|
isbn = Column(TEXT) |
|
|
|
author_intro = Column(TEXT) |
|
|
|
book_intro = Column(TEXT) |
|
|
|
content = Column(TEXT) |
|
|
|
tags = Column(TEXT) |
|
|
|
picture = Column(LargeBinary) |
|
|
|
|
|
|
|
|
|
|
|
# 搜索标题表 |
|
|
|
class Search_title(Base): |
|
|
|
__tablename__ = 'search_title' |
|
|
|
search_id = Column(Integer, nullable=False) |
|
|
|
title = Column(TEXT, nullable=False) |
|
|
|
book_id = Column(TEXT, ForeignKey('book.book_id'), nullable=False) |
|
|
|
|
|
|
|
__table_args__ = ( |
|
|
|
PrimaryKeyConstraint('search_id', 'title'), |
|
|
|
{}, |
|
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
# 搜索标签表 |
|
|
|
class Search_tags(Base): |
|
|
|
__tablename__ = 'search_tags' |
|
|
|
search_id = Column(Integer, nullable=False) |
|
|
|
tags = Column(TEXT, nullable=False) |
|
|
|
book_id = Column(TEXT, ForeignKey('book.book_id'), nullable=False) |
|
|
|
|
|
|
|
__table_args__ = ( |
|
|
|
PrimaryKeyConstraint('search_id', 'tags'), |
|
|
|
{}, |
|
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
# 搜索作者表 |
|
|
|
class Search_author(Base): |
|
|
|
__tablename__ = 'search_author' |
|
|
|
search_id = Column(Integer, nullable=False) |
|
|
|
author = Column(TEXT, nullable=False) |
|
|
|
book_id = Column(TEXT, ForeignKey('book.book_id'), nullable=False) |
|
|
|
|
|
|
|
__table_args__ = ( |
|
|
|
PrimaryKeyConstraint('search_id', 'author'), |
|
|
|
{}, |
|
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 搜索书本内容表 |
|
|
|
class Search_book_intro(Base): |
|
|
|
__tablename__ = 'search_book_intro' |
|
|
|
search_id = Column(Integer, nullable=False) |
|
|
|
book_intro = Column(TEXT, nullable=False) |
|
|
|
book_id = Column(TEXT, ForeignKey('book.book_id'), nullable=False) |
|
|
|
|
|
|
|
__table_args__ = ( |
|
|
|
PrimaryKeyConstraint('search_id', 'book_intro'), |
|
|
|
{}, |
|
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
def insert_tags(session): |
|
|
|
row = session.query(Book).all() |
|
|
|
for i in row: |
|
|
|
tags = i.tags |
|
|
|
for j in tags: |
|
|
|
row_tmp = session.query(Search_tags).filter(Search_tags.tags==j).order_by(Search_tags.search_id.desc()).first() |
|
|
|
if row_tmp is None: |
|
|
|
max_num = 0 |
|
|
|
else: |
|
|
|
max_num = row_tmp.search_id + 1 |
|
|
|
new_search_tags = Search_tags(search_id=max_num,tags=j,book_id=i.book_id) |
|
|
|
session.add(new_search_tags) |
|
|
|
session.commit() |
|
|
|
|
|
|
|
|
|
|
|
def insert_author(session): |
|
|
|
row = session.query(Book).all() |
|
|
|
for i in row: |
|
|
|
tmp = i.author |
|
|
|
if tmp == None: |
|
|
|
j = '作者不详' |
|
|
|
else: |
|
|
|
j = tmp |
|
|
|
row_tmp = session.query(Search_author).filter(Search_author.author==j).order_by(Search_author.search_id.desc()).first() |
|
|
|
if row_tmp is None: |
|
|
|
max_num = 0 |
|
|
|
else: |
|
|
|
max_num = row_tmp.search_id + 1 |
|
|
|
# print(max_num, j, i.book_id) |
|
|
|
new_search_author = Search_author(search_id=max_num,author=j,book_id=i.book_id) |
|
|
|
session.add(new_search_author) |
|
|
|
session.commit() |
|
|
|
|
|
|
|
def insert_title(session): |
|
|
|
row = session.query(Book).all() |
|
|
|
for i in row: |
|
|
|
tmp = i.title |
|
|
|
# print(tmp) |
|
|
|
tmp = re.sub(r'[\(\[\{(【][^))】]*[\)\]\{\】\)]\s?', '', tmp) |
|
|
|
tmp = re.sub(r'[^\w\s]', '', tmp) |
|
|
|
# 处理空标题 |
|
|
|
if len(tmp) == 0: |
|
|
|
continue |
|
|
|
|
|
|
|
# 搜索引擎模式,在精确模式的基础上,对长词再次切分,提高召回率,适合用于搜索引擎分词。 |
|
|
|
seg_list = cut_for_search(tmp) |
|
|
|
sig_list = [] |
|
|
|
tag = 0 |
|
|
|
for k in seg_list: |
|
|
|
sig_list.append(k) |
|
|
|
if k == tmp: |
|
|
|
tag = 1 |
|
|
|
if tag == 0: |
|
|
|
sig_list.append(tmp) |
|
|
|
|
|
|
|
for j in sig_list: |
|
|
|
if j == "" or j == " ": |
|
|
|
continue |
|
|
|
|
|
|
|
row_tmp = session.query(Search_title).filter(Search_title.title==j).order_by(Search_title.search_id.desc()).first() |
|
|
|
if row_tmp is None: |
|
|
|
max_num = 0 |
|
|
|
else: |
|
|
|
max_num = row_tmp.search_id + 1 |
|
|
|
# print(max_num, j, i.book_id) |
|
|
|
new_search_title = Search_title(search_id=max_num,title=j,book_id=i.book_id) |
|
|
|
session.add(new_search_title) |
|
|
|
session.commit() |
|
|
|
|
|
|
|
def insert_book_intro(session): |
|
|
|
row = session.query(Book).all() |
|
|
|
for i in row: |
|
|
|
tmp = i.book_intro |
|
|
|
if tmp != None: |
|
|
|
# print(tmp) |
|
|
|
# 采用textrank进行分词 |
|
|
|
keywords_textrank = jieba.analyse.textrank(tmp) |
|
|
|
# print(keywords_textrank) |
|
|
|
# keywords_tfidf = jieba.analyse.extract_tags(tmp) |
|
|
|
# print(keywords_tfidf) |
|
|
|
for j in keywords_textrank: |
|
|
|
|
|
|
|
row_tmp = session.query(Search_book_intro).filter(Search_book_intro.book_intro==j).order_by(Search_title.search_id.desc()).first() |
|
|
|
if row_tmp is None: |
|
|
|
max_num = 0 |
|
|
|
else: |
|
|
|
max_num = row_tmp.search_id + 1 |
|
|
|
# print(max_num, j, i.book_id) |
|
|
|
new_search_book_intro = Search_book_intro(search_id=max_num,book_intro=j,book_id=i.book_id) |
|
|
|
session.add(new_search_book_intro) |
|
|
|
session.commit() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
engine, meta = con.connect() |
|
|
|
|
|
|
|
Base.metadata.bind = engine |
|
|
@ -124,3 +297,8 @@ Base.metadata.bind = engine |
|
|
|
DBSession = sessionmaker(bind=engine) |
|
|
|
|
|
|
|
session = DBSession() |
|
|
|
|
|
|
|
insert_tags(session=session) |
|
|
|
insert_author(session=session) |
|
|
|
insert_title(session=session) |
|
|
|
insert_book_intro(session=session) |