Me ayuden a comprender este codigo.
Publicado por Alejandra (1 intervención) el 01/12/2017 14:57:29
Hola quisiera que alguien me ayude a comprender cual es el input de este codigo y que hace.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
''' Module to generate max, total and docs json files. '''
import json
import argparse
from os import listdir
from os.path import isfile, join
import math
import sys
TITLE_FIELD = 'title'
WORDS_FIELD = 'words'
def gen_jsons(files):
''' Generate max, total and docs files from FILES files array. '''
max_map = dict()
total_map = dict()
docs_map = dict()
docs_count = 0
for filename in files:
f = open(filename, 'r')
# each document
docs = f.readlines()
doc = docs[0:3]
i = 3
while i <= len(docs):
content = doc[2]
words = content.split(' ')[:-1] # omit \n char
doc_id = doc[0][:-1] # omit \n char
docs_map[doc_id] = dict() # add doc entry
docs_map[doc_id][TITLE_FIELD] = doc[1][:-1] # add title entry, omit \n char
docs_map[doc_id][WORDS_FIELD] = dict() # add words entry
doc_words = docs_map[doc_id][WORDS_FIELD]
for word in words:
doc_words[word] = doc_words.get(word, 0) + 1
for word in doc_words:
doc_words[word] = doc_words[word] / float(len(doc_words))
total_map[word] = total_map.get(word, 0) + 1 # add to total docs with word
doc = docs[i:i + 3]
i += 3
docs_count += 1
f.close()
for doc in docs_map:
words = docs_map[doc][WORDS_FIELD]
for word in words:
words[word] = words[word] * math.log(docs_count / total_map[word])
max_map[word] = words[word] if words[word] > max_map.get(word, -sys.maxint) else\
max_map[word]
for doc in docs_map:
words = docs_map[doc][WORDS_FIELD]
norm = 0.0
for word in words:
norm += math.pow(words[word], 2)
docs_map[doc]['norm'] = math.pow(norm, 0.5)
docs_str = json.dumps(docs_map)
total_str = json.dumps(total_map)
max_str = json.dumps(max_map)
docs_file = open('docs.json', 'w')
total_file = open('total.json', 'w')
max_file = open('max.json', 'w')
docs_file.write(docs_str)
total_file.write(total_str)
max_file.write(max_str)
docs_file.close()
total_file.close()
max_file.close()
def main():
''' Main function. '''
parser = argparse.ArgumentParser(description='Generate json files.')
parser.add_argument('paths', metavar='path', type=str, nargs='+',
help='a path to directories or files')
parser.add_argument('--dir', '-d', dest='directories', action='store_const',
const=True, default=False,
help='paths are directories (default: paths are files)')
args = parser.parse_args()
files = []
if args.directories:
# directories
for path in args.paths:
files = [join(path, f) for f in listdir(path) if isfile(join(path, f))]
else:
# files
for path in args.paths:
if isfile(path):
files.append(path)
gen_jsons(files)
if __name__ == "__main__":
main()
Valora esta pregunta
0