You may test your 'mydict.txt' with the following program:
from google.colab import drive
drive.mount('/content/gdrive')
def readPoemBody(fn):
infile = open(fn, "r")
# Skip the first 3 lines (title, author, separator)
title = infile.readline()[:-1]
author = infile.readline()[:-1]
separator = infile.readline()[:-1]
if separator != '':
print("[Warning] 3rd line of {} not empty\n {}".format(
fn, separator))
else:
print("Reading {}({})".format(title, author))
body = infile.read()
infile.close()
return body
# 斷詞
import jieba
jieba.load_userdict('gdrive/MyDrive/KGHS/mydict.txt')
import glob
filenames = glob.glob('gdrive/MyDrive/KGHS/war*.txt')
for fn in filenames:
body = readPoemBody(fn)
if body[-1] == '\n':
body = body[:-1]
for line in body.split('\n'):
tokens = jieba.lcut(line)
print(line, '->', '/'.join(tokens))