|
|
- import click
-
-
- from model.utils.data_generator import DataGenerator
- from model.utils.text import build_vocab, write_vocab
- from model.utils.image import build_images
- from model.utils.general import Config
-
-
- @click.command()
- @click.option('--data', default="configs/data.json",
- help='Path to data json config')
- @click.option('--vocab', default="configs/vocab.json",
- help='Path to vocab json config')
- def main(data, vocab):
- data_config = Config(data)
-
- # datasets
- train_set = DataGenerator(
- path_formulas=data_config.path_formulas_train,
- dir_images=data_config.dir_images_train,
- path_matching=data_config.path_matching_train)
- """
-
- test_set = DataGenerator(
- path_formulas=data_config.path_formulas_test,
- dir_images=data_config.dir_images_test,
- path_matching=data_config.path_matching_test)
- """
- val_set = DataGenerator(
- path_formulas=data_config.path_formulas_val,
- dir_images=data_config.dir_images_val,
- path_matching=data_config.path_matching_val)
-
- # produce images and matching files
- train_set.build(buckets=data_config.buckets)
- #test_set.build(buckets=data_config.buckets)
- val_set.build(buckets=data_config.buckets)
-
- # vocab
- vocab_config = Config(vocab)
- vocab = build_vocab([train_set], min_count=vocab_config.min_count_tok)
- write_vocab(vocab, vocab_config.path_vocab)
-
-
- if __name__ == "__main__":
- main()
|