import os
import codecs
root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))).replace('\\', '/')
data_path = root_dir + '/python/data'
[ドキュメント]def dump_ele(tree_f, max_len_dic, ele, nest, processed):
if not ele in max_len_dic.keys():
return len(processed)
if ele in processed:
if tree_f is not None:
tree_f.write("%s%s %s *\n" % (" "*nest, ele.verbose_label, ele.id))
else:
processed.add(ele)
if tree_f is not None:
tree_f.write("%s%s %s\n" % (" "*nest, ele.verbose_label, ele.id))
for x in ele.child_elements:
dump_ele(tree_f, max_len_dic, x, nest + 1, processed)
return len(processed)
[ドキュメント]def set_max_len_parent2(top_items, max_len_dic, ele):
if len(ele.parents) == 0:
top_items.add(ele)
return 0
if len(ele.parents) == 1:
return set_max_len_parent(top_items, max_len_dic, ele.parents[0]) + 1
parent_lens = [ set_max_len_parent(top_items, max_len_dic, x) for x in ele.parents ]
max_len = max(parent_lens)
for parent, parent_len in zip(list(ele.parents), parent_lens):
if parent_len < max_len:
assert parent in ele.parents
ele.parents.remove(parent)
assert ele in parent.child_elements
parent.child_elements.remove(ele)
return max_len + 1
[ドキュメント]def set_max_len_parent(top_items, max_len_dic, ele):
if ele in max_len_dic:
return max_len_dic[ele]
max_len = set_max_len_parent2(top_items, max_len_dic, ele)
max_len_dic[ele] = max_len
return max_len
[ドキュメント]def write_calc_tree(context_names, ns_xsd_dic, annual_account_stats, quarterly_account_stats, rank = 200):
instant_account_dic = set()
duration_account_dic = set()
# 報告書の種類ごとに
for report_name, account_stats in zip([ "有価証券報告書", "四半期報告書" ], [annual_account_stats, quarterly_account_stats]):
# 会計基準ごとに
for accounting_standard, stats in account_stats.items():
# コンテキストの種類ごとに
for idx, context_name in enumerate(context_names):
if not context_name.startswith('CurrentYear'):
continue
if 'Instant' in context_name:
# 時点の場合
dic = instant_account_dic
else:
# 期間の場合
assert 'Duration' in context_name
dic = duration_account_dic
counts = list(sorted(stats[idx].items(), key=lambda x:x[1], reverse=True))
# 頻出上位の項目のみ使う。
counts = counts[: rank]
for count in counts:
dic.add(count[0])
instant_account_ids = list(instant_account_dic)
duration_account_ids = list(duration_account_dic)
ns_xsd_dic2 = {}
for ns, dic in ns_xsd_dic.items():
dic2 = {}
ns_xsd_dic2[ns] = dic2
for key, ele in dic.items():
if key != ele.id:
continue
dic2[key] = ele
if len(ele.calcTo) != 0:
ele.calcTo = sorted(ele.calcTo, key=lambda x: x.order)
ele.child_elements = [x.to for x in ele.calcTo]
for x in ele.child_elements:
assert not ele in x.parents
x.parents.append(ele)
tree_f = codecs.open("%s/calc_tree.txt" % data_path, 'w', 'utf-8')
for ids, context_name in zip([ instant_account_ids, duration_account_ids ], [ "会計終了時点", "会計期間" ]):
tree_f.write("\n%s\nコンテスト : %s\n%s\n" % ('-'*80, context_name, '-'*80) )
all_items = []
for id in ids:
ns, tag_name = id.split(':')
# 名前空間に対応するスキーマの辞書を得る。
assert ns in ns_xsd_dic2
xsd_dic = ns_xsd_dic2[ns]
# タグ名に対応する要素を得る。
if not tag_name in xsd_dic:
tag_name = ns + "_" + tag_name
assert tag_name in xsd_dic
ele =xsd_dic[tag_name]
if ele.type in ['stringItemType', 'textBlockItemType', 'dateItemType']:
continue
if not ele in all_items:
all_items.append(ele)
top_items = set()
max_len_dic = {}
for ele in all_items:
set_max_len_parent(top_items, max_len_dic, ele)
top_cnts = [ [ele, dump_ele(None, max_len_dic, ele, 0, set())] for ele in top_items ]
top_cnts = sorted(top_cnts, key=lambda x:x[1], reverse=True)
for ele, cnt in top_cnts:
dump_ele(tree_f, max_len_dic, ele, 0, set())
tree_f.close()