clear
vocab_path = "..\vocab.txt";
vocabfileID = fopen(vocab_path,'r');
vocab_size = linecount(vocabfileID);
fclose(vocabfileID);
vocab_size = vocab_size - 1;
vector_size = 50;
target_matrix = rand(vocab_size, vector_size);
context_matrix = rand(vocab_size, vector_size);
bias_target = rand(vocab_size , 1);
bias_context = rand(vocab_size , 1);
options = optimoptions(@fminunc,'Display','none','Algorithm','quasi-newton', 'UseParallel', false,'FunValCheck', 'on', 'MaxFunctionEvaluations',50);
cooccurrence_file='..\cooccurrence.txt';
fileID = fopen(cooccurrence_file);
concatenated_vector = rand(2*vector_size, 1);
x_max = 10;
alpha = 0.75;
counter = 0;
while ~feof(fileID)
counter = counter + 1;
if mod(counter,100000)==0
disp(counter);
end
thisline = fgetl(fileID);
records = strsplit(thisline, ' ');
target_id = str2double(records(1));
context_id = str2double(records(2));
cooccurrence_value = str2double(records(3));
concatenated_vector = horzcat(target_matrix(target_id,:),context_matrix(context_id,:),bias_target(target_id),bias_context(context_id));
if cooccurrence_value < x_max
fun = @(concatenated_vector)(cooccurrence_value / x_max)^alpha * dot(concatenated_vector(1:vector_size),concatenated_vector(vector_size+1:2*vector_size)+concatenated_vector(2*vector_size+1)+concatenated_vector(2*vector_size+2)-log(cooccurrence_value)^2);
else
fun = @(concatenated_vector)dot(concatenated_vector(1:vector_size),concatenated_vector(vector_size+1:2*vector_size)+concatenated_vector(2*vector_size+1)+concatenated_vector(2*vector_size+2)-log(cooccurrence_value)^2);
end
concatenated_vector = fminunc(fun,concatenated_vector,options);
target_matrix(target_id,:) = concatenated_vector(1:vector_size);
context_matrix(context_id,:) = concatenated_vector(vector_size+1:2*vector_size);
bias_target(target_id) = concatenated_vector(2*vector_size+1);
bias_context(context_id) = concatenated_vector(2*vector_size+2);
end
fclose(fileID);