Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
etcart
/
RIVet
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Snippets
Members
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
31b0c44b
authored
Feb 11, 2018
by
Ethan
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
added more comment
parent
7c37cc43
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
62 additions
and
34 deletions
RIVtoolsCPUlinux.h
RIVtoolsCPUlinux.h
View file @
31b0c44b
...
...
@@ -32,19 +32,26 @@ sparseRIV fileToL2(FILE *input);
* this is important if you will be lexPush-ing those words later
*/
sparseRIV
fileToL2Clean
(
FILE
*
data
);
/* cosine determines the "similarity" between two RIVs. */
void
cosineCompare
(
sparseRIV
baseRIV
,
sparseRIV
*
multipliers
,
size_t
multiplierCount
,
int
(
*
action
)(
float
cosine
,
sparseRIV
base
,
sparseRIV
multiplier
));
/* magnitudes will be used later in cosine comparison */
void
getMagnitudes
(
sparseRIV
*
inputs
,
size_t
RIVCount
);
sparseRIV
text2L2
(
unsigned
char
*
text
);
//unused
unsigned
char
*
sscanAdvance
(
unsigned
char
**
string
,
unsigned
char
*
word
);
//unused
unsigned
char
*
sscanAdvance
(
unsigned
char
**
string
,
unsigned
char
*
word
);
//unused
except in text2l2
sparseRIV
fileToL2
(
FILE
*
data
){
unsigned
int
blockSize
;
unsigned
char
word
[
100
]
=
{
0
};
/* locations (implicit RIV) are temp stored in temp block, and moved
* to permanent home in consolidation */
int
*
locations
=
RIVKey
.
h_tempBlock
;
int
locationCount
=
0
;
while
(
fscanf
(
data
,
"%99s"
,
word
)){
if
(
feof
(
data
)){
...
...
@@ -53,22 +60,28 @@ unsigned char *sscanAdvance(unsigned char **string, unsigned char *word);//unuse
if
(
!
(
*
word
)){
break
;
}
blockSize
=
locationCount
+
RIVKey
.
nonZeros
;
/* if this word would overflow the locations block, grow it */
if
(
blockSize
>
RIVKey
.
tempSize
){
RIVKey
.
h_tempBlock
=
(
int
*
)
realloc
(
RIVKey
.
h_tempBlock
,
blockSize
*
sizeof
(
int
));
locations
=
RIVKey
.
h_tempBlock
;
RIVKey
.
tempSize
+=
RIVKey
.
nonZeros
;
}
/* add word's L1 RIV to the accumulating implicit RIV */
makeSparseLocations
(
word
,
locations
,
locationCount
);
locationCount
+
+
;
locationCount
+
=
RIVKey
.
nonZeros
;
}
int
*
L2dense
;
L2dense
=
mapI2D
(
locations
,
locationCount
);
/* in the next two steps, an implicit RIV is converted to a sparseRIV */
L2dense
=
mapI2D
(
locations
,
locationCount
);
sparseRIV
output
=
consolidateD2S
(
L2dense
);
free
(
L2dense
);
/* frequency records the number of words in this file */
output
.
frequency
=
locationCount
/
RIVKey
.
nonZeros
;
output
.
boolean
=
1
;
return
output
;
...
...
@@ -92,6 +105,10 @@ sparseRIV fileToL2Clean(FILE *data){
if
(
!
(
*
word
)){
break
;
}
/* if the word is not clean, skip it */
if
(
!
isWordClean
((
char
*
)
word
)){
continue
;
}
blockSize
=
locationCount
+
RIVKey
.
nonZeros
;
if
(
blockSize
>
RIVKey
.
tempSize
){
RIVKey
.
h_tempBlock
=
(
int
*
)
realloc
(
RIVKey
.
h_tempBlock
,
blockSize
*
sizeof
(
int
));
...
...
@@ -113,58 +130,65 @@ sparseRIV fileToL2Clean(FILE *data){
output
.
boolean
=
1
;
return
output
;
}
void
cosineCompare
Unbound
(
sparseRIV
baseRIV
,
sparseRIV
*
multipliers
,
size_t
multiplierCount
,
float
threshold
){
void
cosineCompare
(
sparseRIV
baseRIV
,
sparseRIV
*
multipliers
,
size_t
multiplierCount
,
int
(
*
action
)(
float
cosine
,
sparseRIV
base
,
sparseRIV
multiplier
)
){
int
*
baseDenseRIV
=
RIVKey
.
h_tempBlock
;
mapS2D
(
baseDenseRIV
,
baseRIV
);
float
cosSim
;
sparseRIV
*
multipliersStop
=
multipliers
+
multiplierCount
;
while
(
multipliers
<
multipliersStop
){
if
((
*
multipliers
).
boolean
){
/* if two vectors are too different in size, we can ignore the risk of similarity */
float
minsize
=
baseRIV
.
magnitude
*
.
85
;
float
maxsize
=
baseRIV
.
magnitude
*
1
.
15
;
int
dot
=
0
;
int
*
values
=
(
*
multipliers
).
values
;
int
*
locations
=
(
*
multipliers
).
locations
;
int
*
locations_Stop
=
locations
+
(
*
multipliers
).
count
;
while
(
locations
<
locations_Stop
){
int
*
values
;
int
*
locations
;
int
*
locations_Stop
;
/* check the baseRIV against each multiplier */
while
(
multipliers
<
multipliersStop
){
/* skip a pair if the multiplier has already been culled, or if
* the size difference is too great */
if
(((
*
multipliers
).
boolean
)
&&
(((
*
multipliers
).
magnitude
<
maxsize
)
&&
((
*
multipliers
).
magnitude
>
minsize
))){
dot
=
0
;
values
=
(
*
multipliers
).
values
;
locations
=
(
*
multipliers
).
locations
;
locations_Stop
=
locations
+
(
*
multipliers
).
count
;
while
(
locations
<
locations_Stop
){
/* we calculate the dot-product to derive the cosine */
dot
+=
(
*
values
)
*
(
*
(
baseDenseRIV
+
(
*
locations
)));
locations
++
;
values
++
;
}
/* magnitudes had better already be calculated at this point*/
cosSim
=
dot
/
((
baseRIV
.
magnitude
)
*
((
*
multipliers
).
magnitude
));
if
(
cosSim
>=
threshold
){
printf
(
"%s
\t
%s
\n
%f
\n
"
,
(
*
multipliers
).
name
,
baseRIV
.
name
,
cosSim
);
(
*
multipliers
).
boolean
=
0
;
RIVKey
.
thing
++
;
scanf
(
"%d"
,
&
RIVKey
.
thing
);
}
/* perform the action defined by the acction function */
action
(
cosSim
,
baseRIV
,
(
*
multipliers
));
}
multipliers
++
;
}
}
void
cosineCompare
(
sparseRIV
baseRIV
,
sparseRIV
*
multipliers
,
size_t
multiplierCount
,
int
(
*
action
)(
float
cosine
,
sparseRIV
base
,
sparseRIV
multiplier
)){
/* unbound works without skipping on size */
void
cosineCompareUnbound
(
sparseRIV
baseRIV
,
sparseRIV
*
multipliers
,
size_t
multiplierCount
,
float
threshold
){
int
*
baseDenseRIV
=
RIVKey
.
h_tempBlock
;
mapS2D
(
baseDenseRIV
,
baseRIV
);
float
cosSim
;
sparseRIV
*
multipliersStop
=
multipliers
+
multiplierCount
;
float
minsize
=
baseRIV
.
magnitude
*
.
85
;
float
maxsize
=
baseRIV
.
magnitude
*
1
.
15
;
int
dot
=
0
;
int
*
values
;
int
*
locations
;
int
*
locations_Stop
;
while
(
multipliers
<
multipliersStop
){
if
(((
*
multipliers
).
boolean
)
&&
(((
*
multipliers
).
magnitude
<
maxsize
)
&&
((
*
multipliers
).
magnitude
>
minsize
))){
dot
=
0
;
values
=
(
*
multipliers
).
values
;
locations
=
(
*
multipliers
).
locations
;
locations_Stop
=
locations
+
(
*
multipliers
).
count
;
if
((
*
multipliers
).
boolean
){
int
dot
=
0
;
int
*
values
=
(
*
multipliers
).
values
;
int
*
locations
=
(
*
multipliers
).
locations
;
int
*
locations_Stop
=
locations
+
(
*
multipliers
).
count
;
while
(
locations
<
locations_Stop
){
...
...
@@ -173,14 +197,18 @@ void cosineCompare(sparseRIV baseRIV, sparseRIV *multipliers, size_t multiplierC
values
++
;
}
cosSim
=
dot
/
((
baseRIV
.
magnitude
)
*
((
*
multipliers
).
magnitude
));
action
(
cosSim
,
baseRIV
,
(
*
multipliers
));
if
(
cosSim
>=
threshold
){
printf
(
"%s
\t
%s
\n
%f
\n
"
,
(
*
multipliers
).
name
,
baseRIV
.
name
,
cosSim
);
(
*
multipliers
).
boolean
=
0
;
RIVKey
.
thing
++
;
scanf
(
"%d"
,
&
RIVKey
.
thing
);
}
}
multipliers
++
;
}
}
void
getMagnitudes
(
sparseRIV
*
inputs
,
size_t
RIVCount
){
for
(
int
i
=
0
;
i
<
RIVCount
;
i
++
){
unsigned
int
temp
=
0
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment