Python - Me ayuden a comprender este codigo.

 
Vista:

Me ayuden a comprender este codigo.

Publicado por Alejandra (1 intervención) el 01/12/2017 14:57:29
Hola quisiera que alguien me ayude a comprender cual es el input de este codigo y que hace.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
''' Module to generate max, total and docs json files. '''
import json
 
import argparse
 
from os import listdir
from os.path import isfile, join
 
import math
import sys
 
TITLE_FIELD = 'title'
WORDS_FIELD = 'words'
 
def gen_jsons(files):
    ''' Generate max, total and docs files from FILES files array. '''
    max_map = dict()
    total_map = dict()
    docs_map = dict()
 
    docs_count = 0
 
    for filename in files:
        f = open(filename, 'r')
 
        # each document
        docs = f.readlines()
        doc = docs[0:3]
        i = 3
        while i <= len(docs):
            content = doc[2]
            words = content.split(' ')[:-1] # omit \n char
 
            doc_id = doc[0][:-1] # omit \n char
            docs_map[doc_id] = dict() # add doc entry
            docs_map[doc_id][TITLE_FIELD] = doc[1][:-1] # add title entry, omit \n char
            docs_map[doc_id][WORDS_FIELD] = dict() # add words entry
 
            doc_words = docs_map[doc_id][WORDS_FIELD]
 
            for word in words:
                doc_words[word] = doc_words.get(word, 0) + 1
            for word in doc_words:
                doc_words[word] = doc_words[word] / float(len(doc_words))
 
                total_map[word] = total_map.get(word, 0) + 1 # add to total docs with  word
 
            doc = docs[i:i + 3]
            i += 3
 
            docs_count += 1
        f.close()
 
    for doc in docs_map:
        words = docs_map[doc][WORDS_FIELD]
        for word in words:
            words[word] = words[word] * math.log(docs_count / total_map[word])
 
            max_map[word] = words[word] if words[word] > max_map.get(word, -sys.maxint) else\
                max_map[word]
    for doc in docs_map:
        words = docs_map[doc][WORDS_FIELD]
        norm = 0.0
        for word in words:
            norm += math.pow(words[word], 2)
        docs_map[doc]['norm'] = math.pow(norm, 0.5)
 
    docs_str = json.dumps(docs_map)
    total_str = json.dumps(total_map)
    max_str = json.dumps(max_map)
 
    docs_file = open('docs.json', 'w')
    total_file = open('total.json', 'w')
    max_file = open('max.json', 'w')
 
    docs_file.write(docs_str)
    total_file.write(total_str)
    max_file.write(max_str)
 
    docs_file.close()
    total_file.close()
    max_file.close()
 
def main():
    ''' Main function. '''
    parser = argparse.ArgumentParser(description='Generate json files.')
    parser.add_argument('paths', metavar='path', type=str, nargs='+',
                        help='a path to directories or files')
    parser.add_argument('--dir', '-d', dest='directories', action='store_const',
                        const=True, default=False,
                        help='paths are directories (default: paths are files)')
 
    args = parser.parse_args()
 
    files = []
    if args.directories:
        # directories
        for path in args.paths:
            files = [join(path, f) for f in listdir(path) if isfile(join(path, f))]
    else:
        # files
        for path in args.paths:
            if isfile(path):
                files.append(path)
 
    gen_jsons(files)
 
if __name__ == "__main__":
    main()
Valora esta pregunta
Me gusta: Está pregunta es útil y esta claraNo me gusta: Está pregunta no esta clara o no es útil
0
Responder