-
Notifications
You must be signed in to change notification settings - Fork 20
/
Copy pathMarkovChain.py
95 lines (77 loc) · 3.2 KB
/
MarkovChain.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# -*- coding: utf-8 -*-
import os
import numpy as np #For matrix multiplication
import networkx as nx # For the chain building
wd = 'C:\\PythonCode' #Your working directory with the data set file in it
os.chdir(wd)
filename = 'worklist.csv' #file name
sep1 = ';' #separator in the file
start_state = 'start General course' #name of the start node
end_states = ['Finished course', 'Dropped General', 'fell asleep'] #list of end nodes
steps = 1000 # Number of steps in the random walk in the chain
fhand = open(filename, 'r')
i = 0
data01 = []
states = []
#just a sorting key to sort the array by first student_id and then event time
def sort_key (line):
result = int(line[0]) * 1000000 + float(line[2])
return(result)
for line in fhand:
data01.append(line.rstrip().replace(',', '.').split(sep = sep1))
states.append(line.rstrip().replace(',', '.').split(sep = sep1)[1])
fhand.close()
data01.sort(key = sort_key)
states = list(set(states))
calc_matrix = []
for i in range(len(states)):
calc_matrix.append([])
for j in range(len(states)):
calc_matrix[i].append(0)
for i in range (len(data01) - 1):
if data01[i][0] == data01[i+1][0]: #if the student is the same
calc_matrix[states.index(data01[i][1])][states.index(data01[i+1][1])] += 1
for i in range(len(calc_matrix)):
if states[i] in end_states:
for j in range(len(calc_matrix[i])):
calc_matrix[i][j] = 0
TransitionMatrix = []
for i in range(len(calc_matrix)):
TransitionMatrix.append([])
for j in range(len(calc_matrix)):
if sum(calc_matrix[i]) != 0:
TransitionMatrix[i].append(float(calc_matrix[i][j]) / sum(calc_matrix[i]))
else:
TransitionMatrix[i].append(0)
#the graph
G = nx.MultiDiGraph()
for i in range(len(TransitionMatrix)):
for j in range(len(TransitionMatrix)):
if round(TransitionMatrix[i][j], 2) != 0:
G.add_edge(states[i], states[j], weight=round(TransitionMatrix[i][j], 2), label="{:.02f}".format(TransitionMatrix[i][j]))
#nx.drawing.nx_pydot.write_dot(G, 'MarkovChain.dot') #dot file with the graph
#to make the end node probability calculation we have to make sure the matrix multiplication stays valid
#for that the nodes without out-connections are given the self-connecting edge
for i in range(len(TransitionMatrix)):
if sum(TransitionMatrix[i]) == 0:
TransitionMatrix[i][i] = 1
#Initial position
InitialVector = []
for state in states:
if state == start_state:
InitialVector.append(1)
else:
InitialVector.append(0)
#ProbabilityMatrix
ProbabilityMatrix = np.linalg.matrix_power(TransitionMatrix, steps)
#ProbabilityVector
ProbabilityVector = np.matmul(InitialVector, ProbabilityMatrix)
for i in range(len(ProbabilityVector)):
ProbabilityVector[i] = round(ProbabilityVector[i], 2)
Result = dict(zip(states, ProbabilityVector))
print('\nProbability Vector:\n', Result)
#adding sizes (probabilities) to the graph nodes
for i in range(len(TransitionMatrix)):
G.add_node(states[i], size = int(ProbabilityVector[i]*100))
#the gef file of the graph. You can use Gephi to open
nx.write_gexf(G, "Graph.gexf")