A Demo Project for the UnrealEngineSDK
Loading...
Searching...
No Matches
DlgSpellcheckJsonHumanText.py
Go to the documentation of this file.
1#!/usr/bin/env python3
2# Copyright 2017-2018 Csaba Molnar, Daniel Butum
3
4import os
5import json
6import sys
7import argparse
8import subprocess
9import shlex
10from pathlib import Path
11
12# NOTE: This script is standalone does not include any libraries
13
14class Colors:
15 HEADER = '\033[95m'
16
17 BLUE = '\033[0;36m'
18 BLUE_LIGHT = '\033[1;36m'
19
20 GREEN = '\033[0;32m'
21 GREEN_LIGHT = '\033[1;32m'
22
23 YELLOW = '\033[0;33m'
24 YELLOW_LIGHT = '\033[1;33m'
25
26 RED = '\033[0;31m'
27 RED_LIGHT = '\033[1;31m'
28
29 # No Color
30 END = '\033[0m'
31
32
33def print_newlines(nr = 1):
34 if nr > 0:
35 print('\n' * nr, end='')
36
37
39 if sys.stdout.isatty():
40 print(Colors.END)
41
42
43def _print_internal(color, string, **kwargs):
44 if sys.stdout.isatty():
45 # You're running in a real terminal
46 prefix, suffix = color, Colors.END
47 else:
48 # You're being piped or redirected
49 prefix, suffix = '', ''
50
51 print(prefix + string + suffix, **kwargs)
52
53
54def print_red(*args, **kwargs):
55 _print_internal(Colors.RED, " ".join(map(str, args)), **kwargs)
56
57
58def print_red_light(*args, **kwargs):
59 _print_internal(Colors.RED_LIGHT, " ".join(map(str, args)), **kwargs)
60
61
62def print_blue(*args, **kwargs):
63 _print_internal(Colors.BLUE, " ".join(map(str, args)), **kwargs)
64
65
66def print_blue_light(*args, **kwargs):
67 _print_internal(Colors.BLUE_LIGHT, " ".join(map(str, args)), **kwargs)
68
69
70def print_yellow(*args, **kwargs):
71 _print_internal(Colors.YELLOW, " ".join(map(str, args)), **kwargs)
72
73
74def print_yellow_light(*args, **kwargs):
75 _print_internal(Colors.YELLOW_LIGHT, " ".join(map(str, args)), **kwargs)
76
77
78def print_green(*args, **kwargs):
79 _print_internal(Colors.GREEN, " ".join(map(str, args)), **kwargs)
80
81
82def print_green_light(*args, **kwargs):
83 _print_internal(Colors.GREEN_LIGHT, " ".join(map(str, args)), **kwargs)
84
85
86def get_filename_from_path(path, include_extension=True):
87 filename = os.path.basename(str(path))
88 if include_extension:
89 return filename
90
91 file, extension = os.path.splitext(filename)
92 return file
93
94
95def exit_program(status):
96 sys.exit(status)
97
98
99def exit_program_error(message=None):
100 if message is not None:
101 print_red(message)
102 exit_program(1)
103
104
106 exit_program(0)
107
108
110 if not os.path.isabs(path):
111 return os.path.abspath(path)
112
113 return path
114
115
117 """
118 Returns a dictionary
119 """
120 try:
121 with open(path) as fh:
122 try:
123 return json.load(fh)
124 except ValueError as e:
125 print_red("Can't parse file = `{}`. Error = `{}`".format(path, e))
126 return None
127 except IOError as e:
128 print_red("Can't open file = `{}`. IOError = `{}`".format(path, e))
129 return None
130
131
132def json_save_dictionary(path, dictionary):
133 try:
134 with open(path, 'w') as fh:
135 try:
136 json.dump(dictionary, fh, indent=4)
137 except ValueError as e:
138 print_red("Can't save file = `{}`. Error = `{}`".format(path, e))
139 return None
140 except IOError as e:
141 print_red("Can't open file = `{}`. IOError = `{}`".format(path, e))
142
143
145 def __init__(self, target_node_index, text):
146 self.target_node_index = target_node_index
147 self.text = text
148
149 def __str__(self):
150 return "DlgEdge(target_node_index = {}, text = {})".format(self.target_node_index, self.text)
151
152 def __repr__(self):
153 return str(self)
154
155
157 def __init__(self, text, edge_text):
158 self.text = text
159 self.edge_text = edge_text
160
161 def __str__(self):
162 return "InnerEdge(text = {}, edge_text = {})".format(self.text, self.edge_text)
163
164 def __repr__(self):
165 return str(self)
166
167
169 def __init__(self, node_index, text):
170 self.node_index = node_index
171 self.text = text
172 self.edges = []
173
174 def __str__(self):
175 return "Node(node_index = {}, text = {}, edges =\n{})".format(self.node_index, self.text, "\n".join(str(n) for n in self.edges))
176
177 def __repr__(self):
178 return str(self)
179
180
182 def __init__(self, node_index):
183 self.node_index = node_index
184 self.inner_edges = []
185 self.edges = []
186
187
188 def __str__(self):
189 return "Node(node_index = {}, inner_edges = {}, edges =\n{})".format(self.node_index, "\n".join(str(n) for n in self.inner_edges), "\n".join(str(n) for n in self.edges))
190
191 def __repr__(self):
192 return str(self)
193
194
196 if not os.path.isfile(file_path):
197 return False
198
199 path = Path(file_path)
200 if "".join(path.suffixes) != ".dlg_human.json":
201 return False
202
203 # Collect all words in file
204 all_words = set()
205
206
207 return True
208
209
210JSON_KEY_SPEECH_NODES = "SpeechNodes"
211JSON_KEY_SPEECH_SEQUENCE_NODES = "SpeechSequenceNodes"
212JSON_KEY_NODE_INDEX = "NodeIndex"
213JSON_KEY_TEXT = "Text"
214JSON_KEY_NODE_EDGES = "Edges"
215JSON_KEY_NODE_SEQUENCE = "Sequence"
216JSON_KEY_INNER_EDGE_TEXT = "EdgeText"
217JSON_KEY_EDGE_TARGET_NODE_INDEX = "TargetNodeIndex"
218
219
221 edges = []
222 if JSON_KEY_NODE_EDGES in node_json:
223 for node_edge in node_json[JSON_KEY_NODE_EDGES]:
224 if JSON_KEY_EDGE_TARGET_NODE_INDEX in node_edge and JSON_KEY_TEXT in node_edge:
225 edge = DlgEdge(node_edge[JSON_KEY_EDGE_TARGET_NODE_INDEX], node_edge[JSON_KEY_TEXT])
226 edges.append(edge)
227
228 return edges
229
231 inner_edges = []
232 if JSON_KEY_NODE_SEQUENCE in node_json:
233 for node_inner_edge in node_json[JSON_KEY_NODE_SEQUENCE]:
234 if JSON_KEY_TEXT in node_inner_edge and JSON_KEY_INNER_EDGE_TEXT in node_inner_edge:
235 inner_edge = DlgInnerEdge(node_inner_edge[JSON_KEY_TEXT], node_inner_edge[JSON_KEY_INNER_EDGE_TEXT])
236 inner_edges.append(inner_edge)
237
238 return inner_edges
239
240def run_aspell_on_words(words_list_str):
241 if not words_list_str:
242 return ""
243
244 try:
245 # shlex.quote()
246 # list -l en_us
247 # pipe -l en_us --suggest --dont-byte-offsets --dont-guess
248 process = subprocess.run("echo \"{}\" | aspell list -l en_us".format(words_list_str), check=True, cwd=None, universal_newlines=True, \
249 stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
250 except ValueError as e:
251 exit_program_error("ValueError = {}".format(e))
252 except OSError as e:
253 exit_program_error("OSError = {}".format(e))
254
255 return process.stdout.strip()
256
258 print_blue("Reading file = `{}`".format(file_path))
259
260 file_json = json_parse_file(file_path)
261 if not file_json:
262 return
263
264 # Speech nodes
265 speech_nodes = []
266 speech_sequence_node = []
267 if JSON_KEY_SPEECH_NODES in file_json:
268 for node_json in file_json[JSON_KEY_SPEECH_NODES]:
269
270 # Read node
271 if JSON_KEY_NODE_INDEX in node_json and JSON_KEY_TEXT in node_json:
272 node = DlgNode(node_json[JSON_KEY_NODE_INDEX], node_json[JSON_KEY_TEXT])
273 node.edges = get_edges_from_node_json(node_json)
274 speech_nodes.append(node)
275
276
277 # Speech sequence nodes
278 if JSON_KEY_SPEECH_SEQUENCE_NODES in file_json:
279 for node_json in file_json[JSON_KEY_SPEECH_SEQUENCE_NODES]:
280 # Read node
281 if JSON_KEY_NODE_INDEX in node_json and JSON_KEY_NODE_SEQUENCE in node_json:
282 node = DlgSpeechSequenceNode(node_json[JSON_KEY_NODE_INDEX])
283 node.edges = get_edges_from_node_json(node_json)
284 node.inner_edges = get_inner_edges_from_node_json(node_json)
285 speech_sequence_node.append(node)
286
287 # Run spell checker on all words
288 for node in speech_nodes:
289 node_mistakes = run_aspell_on_words(node.text)
290 if node_mistakes:
291 print_yellow("Node Index = {}\n\tOriginal text = `{}`\n\tMistakes = `{}`\n".format(node.node_index, node.text, node_mistakes))
292
293 warning_edges = []
294 for edge in node.edges:
295 edge_mistakes = run_aspell_on_words(edge.text)
296 if edge_mistakes:
297 warning_edges.append("TargetNodeIndex = {}\n\t\tOriginal text = `{}`\n\t\tMistakes = `{}`\n".format(edge.target_node_index, edge.text, edge_mistakes))
298
299 if warning_edges and not node_mistakes:
300 print_yellow("Node index = {}".format(node.node_index))
301
302 for warning in warning_edges:
303 print_yellow("\t" + warning)
304
305 for node_sequence in speech_sequence_node:
306 warnings_inner_edges = []
307 for index, inner_edge in enumerate(node_sequence.inner_edges):
308 warning = "InnerEdge Index = {}\n".format(index)
309 found_mistake = False
310
311 # Check text
312 text_mistakes = run_aspell_on_words(inner_edge.text)
313 if text_mistakes:
314 found_mistake = True
315 warning = "\tOriginal Text = `{}`\n\tMistakes = `{}`\n".format(inner_edge.text, text_mistakes)
316
317 # Check EdgeText
318 edge_mistakes = run_aspell_on_words(inner_edge.edge_text)
319 if edge_mistakes:
320 found_mistake = True
321 warning = "\tOriginal EdgeText = `{}`\n\tMistakes = `{}`\n".format(inner_edge.edge_text, edge_mistakes)
322
323 if found_mistake:
324 warnings_inner_edges.append(warning)
325
326 warning_edges = []
327 for edge in node_sequence.edges:
328 edge_mistakes = run_aspell_on_words(edge.text)
329 if edge_mistakes:
330 warning_edges.append("TargetNodeIndex = {}\n\t\tOriginal text = `{}`\n\t\tMistakes = `{}`\n".format(edge.target_node_index, edge.text, edge_mistakes))
331
332 if warning_edges or warnings_inner_edges:
333 print_yellow("Speech Sequence Node index = {}".format(node.node_index))
334
335 for warning in warnings_inner_edges:
336 print_yellow(warning)
337 for warning in warning_edges:
338 print_yellow("\t" + warning)
339
340
341 print("\n" + "-" * 20)
342
343def main(directory):
344 print_blue("Finding json human text files inside directory = {}\n".format(directory))
345
346 # Walk over all files in directory
347 for path, subdirs, files in os.walk(directory):
348 for name in files:
349 full_filename = os.path.join(path, name)
350 if is_path_json_human_text(full_filename):
351 spellcheck_json_human_text(full_filename)
352 else:
353 print_yellow("Path = `{}` is not a file or not a valid json human text".format(full_filename))
354
355
356if __name__ == "__main__":
357 parser = argparse.ArgumentParser()
358 parser.add_argument('directory', nargs="?", type=str, help='Directory containing all the json human text files', default="DialoguesJsonHumanText/")
359 args = parser.parse_args()
360
361 if not os.path.isdir(args.directory):
362 exit_program_error("`{}` is not directory".format(args.directory))
363
364 main(args.directory)
_print_internal(color, string, **kwargs)
get_filename_from_path(path, include_extension=True)