10from pathlib
import Path
18 BLUE_LIGHT =
'\033[1;36m'
21 GREEN_LIGHT =
'\033[1;32m'
24 YELLOW_LIGHT =
'\033[1;33m'
27 RED_LIGHT =
'\033[1;31m'
35 print(
'\n' * nr, end=
'')
39 if sys.stdout.isatty():
44 if sys.stdout.isatty():
46 prefix, suffix = color, Colors.END
49 prefix, suffix =
'',
''
51 print(prefix + string + suffix, **kwargs)
75 _print_internal(Colors.YELLOW_LIGHT,
" ".join(map(str, args)), **kwargs)
87 filename = os.path.basename(str(path))
91 file, extension = os.path.splitext(filename)
100 if message
is not None:
110 if not os.path.isabs(path):
111 return os.path.abspath(path)
121 with open(path)
as fh:
124 except ValueError
as e:
125 print_red(
"Can't parse file = `{}`. Error = `{}`".format(path, e))
128 print_red(
"Can't open file = `{}`. IOError = `{}`".format(path, e))
134 with open(path,
'w')
as fh:
136 json.dump(dictionary, fh, indent=4)
137 except ValueError
as e:
138 print_red(
"Can't save file = `{}`. Error = `{}`".format(path, e))
141 print_red(
"Can't open file = `{}`. IOError = `{}`".format(path, e))
162 return "InnerEdge(text = {}, edge_text = {})".format(self.
text, self.
edge_text)
175 return "Node(node_index = {}, text = {}, edges =\n{})".format(self.
node_index, self.
text,
"\n".join(str(n)
for n
in self.
edges))
189 return "Node(node_index = {}, inner_edges = {}, edges =\n{})".format(self.
node_index,
"\n".join(str(n)
for n
in self.
inner_edges),
"\n".join(str(n)
for n
in self.
edges))
196 if not os.path.isfile(file_path):
199 path = Path(file_path)
200 if "".join(path.suffixes) !=
".dlg_human.json":
210JSON_KEY_SPEECH_NODES =
"SpeechNodes"
211JSON_KEY_SPEECH_SEQUENCE_NODES =
"SpeechSequenceNodes"
212JSON_KEY_NODE_INDEX =
"NodeIndex"
213JSON_KEY_TEXT =
"Text"
214JSON_KEY_NODE_EDGES =
"Edges"
215JSON_KEY_NODE_SEQUENCE =
"Sequence"
216JSON_KEY_INNER_EDGE_TEXT =
"EdgeText"
217JSON_KEY_EDGE_TARGET_NODE_INDEX =
"TargetNodeIndex"
222 if JSON_KEY_NODE_EDGES
in node_json:
223 for node_edge
in node_json[JSON_KEY_NODE_EDGES]:
224 if JSON_KEY_EDGE_TARGET_NODE_INDEX
in node_edge
and JSON_KEY_TEXT
in node_edge:
225 edge =
DlgEdge(node_edge[JSON_KEY_EDGE_TARGET_NODE_INDEX], node_edge[JSON_KEY_TEXT])
232 if JSON_KEY_NODE_SEQUENCE
in node_json:
233 for node_inner_edge
in node_json[JSON_KEY_NODE_SEQUENCE]:
234 if JSON_KEY_TEXT
in node_inner_edge
and JSON_KEY_INNER_EDGE_TEXT
in node_inner_edge:
235 inner_edge =
DlgInnerEdge(node_inner_edge[JSON_KEY_TEXT], node_inner_edge[JSON_KEY_INNER_EDGE_TEXT])
236 inner_edges.append(inner_edge)
241 if not words_list_str:
248 process = subprocess.run(
"echo \"{}\" | aspell list -l en_us".format(words_list_str), check=
True, cwd=
None, universal_newlines=
True, \
249 stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=
True)
250 except ValueError
as e:
255 return process.stdout.strip()
258 print_blue(
"Reading file = `{}`".format(file_path))
266 speech_sequence_node = []
267 if JSON_KEY_SPEECH_NODES
in file_json:
268 for node_json
in file_json[JSON_KEY_SPEECH_NODES]:
271 if JSON_KEY_NODE_INDEX
in node_json
and JSON_KEY_TEXT
in node_json:
272 node =
DlgNode(node_json[JSON_KEY_NODE_INDEX], node_json[JSON_KEY_TEXT])
274 speech_nodes.append(node)
278 if JSON_KEY_SPEECH_SEQUENCE_NODES
in file_json:
279 for node_json
in file_json[JSON_KEY_SPEECH_SEQUENCE_NODES]:
281 if JSON_KEY_NODE_INDEX
in node_json
and JSON_KEY_NODE_SEQUENCE
in node_json:
285 speech_sequence_node.append(node)
288 for node
in speech_nodes:
291 print_yellow(
"Node Index = {}\n\tOriginal text = `{}`\n\tMistakes = `{}`\n".format(node.node_index, node.text, node_mistakes))
294 for edge
in node.edges:
297 warning_edges.append(
"TargetNodeIndex = {}\n\t\tOriginal text = `{}`\n\t\tMistakes = `{}`\n".format(edge.target_node_index, edge.text, edge_mistakes))
299 if warning_edges
and not node_mistakes:
302 for warning
in warning_edges:
305 for node_sequence
in speech_sequence_node:
306 warnings_inner_edges = []
307 for index, inner_edge
in enumerate(node_sequence.inner_edges):
308 warning =
"InnerEdge Index = {}\n".format(index)
309 found_mistake =
False
315 warning =
"\tOriginal Text = `{}`\n\tMistakes = `{}`\n".format(inner_edge.text, text_mistakes)
321 warning =
"\tOriginal EdgeText = `{}`\n\tMistakes = `{}`\n".format(inner_edge.edge_text, edge_mistakes)
324 warnings_inner_edges.append(warning)
327 for edge
in node_sequence.edges:
330 warning_edges.append(
"TargetNodeIndex = {}\n\t\tOriginal text = `{}`\n\t\tMistakes = `{}`\n".format(edge.target_node_index, edge.text, edge_mistakes))
332 if warning_edges
or warnings_inner_edges:
333 print_yellow(
"Speech Sequence Node index = {}".format(node.node_index))
335 for warning
in warnings_inner_edges:
337 for warning
in warning_edges:
341 print(
"\n" +
"-" * 20)
344 print_blue(
"Finding json human text files inside directory = {}\n".format(directory))
347 for path, subdirs, files
in os.walk(directory):
349 full_filename = os.path.join(path, name)
353 print_yellow(
"Path = `{}` is not a file or not a valid json human text".format(full_filename))
356if __name__ ==
"__main__":
357 parser = argparse.ArgumentParser()
358 parser.add_argument(
'directory', nargs=
"?", type=str, help=
'Directory containing all the json human text files', default=
"DialoguesJsonHumanText/")
359 args = parser.parse_args()
361 if not os.path.isdir(args.directory):
__init__(self, target_node_index, text)
__init__(self, text, edge_text)
__init__(self, node_index, text)
__init__(self, node_index)
print_green(*args, **kwargs)
is_path_json_human_text(file_path)
print_blue_light(*args, **kwargs)
json_save_dictionary(path, dictionary)
print_blue(*args, **kwargs)
_print_internal(color, string, **kwargs)
print_yellow(*args, **kwargs)
get_edges_from_node_json(node_json)
get_filename_from_path(path, include_extension=True)
print_yellow_light(*args, **kwargs)
get_inner_edges_from_node_json(node_json)
spellcheck_json_human_text(file_path)
print_green_light(*args, **kwargs)
print_red(*args, **kwargs)
run_aspell_on_words(words_list_str)
exit_program_error(message=None)
print_red_light(*args, **kwargs)
convert_path_to_absolute_if_not_already(path)