NoteOnMe博客平台搭建
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

46 lines
1.4 KiB

  1. import click
  2. from model.utils.data_generator import DataGenerator
  3. from model.utils.text import build_vocab, write_vocab
  4. from model.utils.image import build_images
  5. from model.utils.general import Config
  6. @click.command()
  7. @click.option('--data', default="configs/data.json",
  8. help='Path to data json config')
  9. @click.option('--vocab', default="configs/vocab.json",
  10. help='Path to vocab json config')
  11. def main(data, vocab):
  12. data_config = Config(data)
  13. # datasets
  14. train_set = DataGenerator(
  15. path_formulas=data_config.path_formulas_train,
  16. dir_images=data_config.dir_images_train,
  17. path_matching=data_config.path_matching_train)
  18. """
  19. test_set = DataGenerator(
  20. path_formulas=data_config.path_formulas_test,
  21. dir_images=data_config.dir_images_test,
  22. path_matching=data_config.path_matching_test)
  23. """
  24. val_set = DataGenerator(
  25. path_formulas=data_config.path_formulas_val,
  26. dir_images=data_config.dir_images_val,
  27. path_matching=data_config.path_matching_val)
  28. # produce images and matching files
  29. train_set.build(buckets=data_config.buckets)
  30. #test_set.build(buckets=data_config.buckets)
  31. val_set.build(buckets=data_config.buckets)
  32. # vocab
  33. vocab_config = Config(vocab)
  34. vocab = build_vocab([train_set], min_count=vocab_config.min_count_tok)
  35. write_vocab(vocab, vocab_config.path_vocab)
  36. if __name__ == "__main__":
  37. main()