Skip to content

Commit bd1fdca

Browse files
set score to 1 for each document (#1607)
* set score to 1 for each document * test fulltext search scoring * Update proc_fulltext_query.c * Add documentation Co-authored-by: Jeffrey Lovitz <[email protected]>
1 parent 2d43f9d commit bd1fdca

File tree

4 files changed

+48
-7
lines changed

4 files changed

+48
-7
lines changed

docs/commands.md

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -773,7 +773,7 @@ YIELD modifiers are only required if explicitly specified; by default the value
773773
| db.indexes | none | `type`, `label`, `properties` | Yield all indexes in the graph, denoting whether they are exact-match or full-text and which label and properties each covers. |
774774
| db.idx.fulltext.createNodeIndex | `label`, `property` [, `property` ...] | none | Builds a full-text searchable index on a label and the 1 or more specified properties. |
775775
| db.idx.fulltext.drop | `label` | none | Deletes the full-text index associated with the given label. |
776-
| db.idx.fulltext.queryNodes | `label`, `string` | `node` | Retrieve all nodes that contain the specified string in the full-text indexes on the given label. |
776+
| db.idx.fulltext.queryNodes | `label`, `string` | `node`, `score` | Retrieve all nodes that contain the specified string in the full-text indexes on the given label. |
777777
| algo.pageRank | `label`, `relationship-type` | `node`, `score` | Runs the pagerank algorithm over nodes of given label, considering only edges of given relationship type. |
778778
| [algo.BFS](#BFS) | `source-node`, `max-level`, `relationship-type` | `nodes`, `edges` | Performs BFS to find all nodes connected to the source. A `max level` of 0 indicates unlimited and a non-NULL `relationship-type` defines the relationship type that may be traversed. |
779779
| dbms.procedures() | none | `name`, `mode` | List all procedures in the DBMS, yields for every procedure its name and mode (read/write). |
@@ -883,6 +883,20 @@ RETURN m ORDER BY m.rating"
883883
3) 1) "Query internal execution time: 0.226914 milliseconds"
884884
```
885885
886+
In addition to yielding matching nodes, full-text index scans will return the score of each node. This is the [TF-IDF](https://oss.redislabs.com/redisearch/Scoring/#tfidf_default) score of the node, which is informed by how many times the search terms appear in the node and how closely grouped they are. This can be observed in the example:
887+
```sh
888+
GRAPH.QUERY DEMO_GRAPH
889+
"CALL db.idx.fulltext.queryNodes('Node', 'hello world') YIELD node, score RETURN score, node.val"
890+
1) 1) "score"
891+
2) "node.val"
892+
2) 1) 1) "2"
893+
2) "hello world"
894+
2) 1) "1"
895+
2) "hello to a different world"
896+
3) 1) "Cached execution: 1"
897+
2) "Query internal execution time: 0.335401 milliseconds"
898+
```
899+
886900
## GRAPH.PROFILE
887901
888902
Executes a query and produces an execution plan augmented with metrics for each operation's execution.

src/index/index.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ void Index_RemoveField(Index *idx, const char *field) {
113113
}
114114

115115
void Index_IndexNode(Index *idx, const Node *n) {
116-
double score = 0; // default score
116+
double score = 1; // default score
117117
const char *lang = NULL; // default language
118118
RSIndex *rsIdx = idx->idx;
119119
NodeID node_id = ENTITY_GET_ID(n);

src/procedures/proc_fulltext_query.c

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -51,11 +51,11 @@ ProcedureResult Proc_FulltextQueryNodeInvoke(ProcedureCtx *ctx, const SIValue *a
5151
pdata->idx = idx;
5252
pdata->g = gc->g;
5353
pdata->n = GE_NEW_NODE();
54-
pdata->output = array_new(SIValue, 2);
54+
pdata->output = array_new(SIValue, 4);
5555
pdata->output = array_append(pdata->output, SI_ConstStringVal("node"));
5656
pdata->output = array_append(pdata->output, SI_Node(&pdata->n));
57-
// pdata->output = array_append(pdata->output, SI_ConstStringVal("score"));
58-
// pdata->output = array_append(pdata->output, SI_DoubleVal(0.0));
57+
pdata->output = array_append(pdata->output, SI_ConstStringVal("score"));
58+
pdata->output = array_append(pdata->output, SI_DoubleVal(0.0));
5959

6060
// Execute query
6161
pdata->iter = Index_Query(pdata->idx, query, &err);
@@ -89,11 +89,15 @@ SIValue *Proc_FulltextQueryNodeStep(ProcedureCtx *ctx) {
8989
// Depleted.
9090
if(!id) return NULL;
9191

92+
double score = RediSearch_ResultsIteratorGetScore(pdata->iter);
93+
9294
// Get Node.
9395
Node *n = &pdata->n;
9496
Graph_GetNode(pdata->g, *id, n);
9597

9698
pdata->output[1] = SI_Node(n);
99+
pdata->output[3] = SI_DoubleVal(score);
100+
97101
return pdata->output;
98102
}
99103

@@ -111,9 +115,11 @@ ProcedureResult Proc_FulltextQueryNodeFree(ProcedureCtx *ctx) {
111115

112116
ProcedureCtx *Proc_FulltextQueryNodeGen() {
113117
void *privateData = NULL;
114-
ProcedureOutput *output = array_new(ProcedureOutput, 1);
115-
ProcedureOutput out_node = {.name = "node", .type = T_NODE};
118+
ProcedureOutput *output = array_new(ProcedureOutput, 2);
119+
ProcedureOutput out_node = {.name = "node", .type = T_NODE};
120+
ProcedureOutput out_score = {.name = "score", .type = T_DOUBLE};
116121
output = array_append(output, out_node);
122+
output = array_append(output, out_score);
117123

118124
ProcedureCtx *ctx = ProcCtxNew("db.idx.fulltext.queryNodes",
119125
2,

tests/flow/test_index_scans.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -363,3 +363,24 @@ def test14_index_scan_utilize_array(self):
363363
# No index scans should be performed.
364364
self.env.assertEqual(plan.count("Label Scan"), 1)
365365
self.env.assertEqual(plan.count("Index Scan"), 0)
366+
367+
# Test fulltext result scoring
368+
def test15_fulltext_result_scoring(self):
369+
g = Graph('fulltext_scoring', self.env.getConnection())
370+
371+
# create full-text index over label 'L', attribute 'v'
372+
g.call_procedure('db.idx.fulltext.createNodeIndex', 'L', 'v')
373+
374+
# introduce 2 nodes
375+
g.query("create (:L {v:'hello world hello'})")
376+
g.query("create (:L {v:'hello world hello world'})")
377+
378+
# query nodes using fulltext search
379+
q = """CALL db.idx.fulltext.queryNodes('L', 'hello world') YIELD node, score
380+
RETURN node.v, score
381+
ORDER BY score"""
382+
res = g.query(q)
383+
actual = res.result_set
384+
expected = [['hello world hello', 1.5], ['hello world hello world', 2]]
385+
self.env.assertEqual(expected, actual)
386+

0 commit comments

Comments
 (0)