您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

48 行
1.5 KiB

3 年前
  1. #!/usr/bin/env PYTHONUNBUFFERED=1 python
  2. #
  3. # Created by Bjarni R. Einarsson, placed in the public domain. Go wild!
  4. #
  5. import json
  6. import os
  7. import sys
  8. try:
  9. dirtydb_input = sys.argv[1]
  10. dirtydb_output = '%s.new' % dirtydb_input
  11. assert(os.path.exists(dirtydb_input))
  12. assert(not os.path.exists(dirtydb_output))
  13. except:
  14. print()
  15. print('Usage: %s /path/to/dirty.db' % sys.argv[0])
  16. print()
  17. print('Note: Will create a file named dirty.db.new in the same folder,')
  18. print(' please make sure permissions are OK and a file by that')
  19. print(' name does not exist already. This script works by omitting')
  20. print(' duplicate lines from the dirty.db file, keeping only the')
  21. print(' last (latest) instance. No revision data should be lost,')
  22. print(' but be careful, make backups. If it breaks you get to keep')
  23. print(' both pieces!')
  24. print()
  25. sys.exit(1)
  26. dirtydb = {}
  27. lines = 0
  28. with open(dirtydb_input, 'r') as fd:
  29. print('Reading %s' % dirtydb_input)
  30. for line in fd:
  31. lines += 1
  32. try:
  33. data = json.loads(line)
  34. dirtydb[data['key']] = line
  35. except:
  36. print("Skipping invalid JSON!")
  37. if lines % 10000 == 0:
  38. sys.stderr.write('.')
  39. print()
  40. print('OK, found %d unique keys in %d lines' % (len(dirtydb), lines))
  41. with open(dirtydb_output, 'w') as fd:
  42. for data in list(dirtydb.values()):
  43. fd.write(data)
  44. print('Wrote data to %s. All done!' % dirtydb_output)