Sphinx cannot index - sphinx

I know there is not for sphinx support. But you always answer very quickly, hope you excuse me.
my sphinx file look like:
#
indexer settings
#
indexer
{
mem_limit = 32M
}
#
searchd settings
#
searchd
{
listen = 127.0.0.1
log = /home/jackey/workspace/socrates/sphinx/log/searchd.log
query_log = /home/jackey/workspace/socrates/sphinx/log/query.log
read_timeout = 5
client_timeout = 300
max_children = 30
pid_file = /home/jackey/workspace/socrates/sphinx/log/searchd.pid
max_matches = 1000
seamless_rotate = 1
preopen_indexes = 0
unlink_old = 1
mva_updates_pool = 1M
max_packet_size = 8M
max_filters = 256
max_filter_values = 4096
}
#
datasource: socrates_dev_local_node
#
source socrates_dev_local_node
{
type = mysql
sql_host = localhost
sql_user = root
sql_pass = admin
sql_db = socrates_dev_local
sql_port =
sql_query_pre = \
SET NAMES utf8
sql_query = \
SELECT node.nid AS nid, node.title AS node_title, 'node' AS field_data_body_node_entity_type, 0 AS searchlight_deleted \
FROM \
node node \
WHERE (( (node.nid BETWEEN $start AND $end ) ))
sql_query_info = \
SELECT node.nid AS nid, node.title AS node_title, 'node' AS field_data_body_node_entity_type, 0 AS searchlight_deleted \
FROM \
node node \
WHERE (( (node.nid = $id) ))
sql_query_range = SELECT MIN(nid), MAX(nid) FROM node WHERE nid > 0
sql_range_step = 1000
sql_ranged_throttle = 0
sql_attr_uint = searchlight_deleted
sql_attr_uint = nid
}
#
index: socrates_dev_local_node
#
index socrates_dev_local_node
{
Index configuration
source = socrates_dev_local_node
path = /home/jackey/workspace/socrates/sphinx/indexes/
docinfo = extern
mlock = 0
morphology = stem_en
charset_type = utf-8
html_strip = 0
}
#
datasource: socrates_dev_local_search_node
#
source socrates_dev_local_search_node
{
type = mysql
sql_host = localhost
sql_user = root
sql_pass = admin
sql_db = socrates_dev_local
sql_port =
sql_query_pre = \
SET NAMES utf8
sql_query = \
SELECT node.nid AS nid, node.title AS node_title, node.status AS node_status, node.created AS node_created, users.name AS users_name, users.uid AS users_uid, node.type AS node_type, 'node' AS field_data_body_node_entity_type, 0 AS searchlight_deleted \
FROM \
node node \
INNER JOIN users users ON node.uid = users.uid \
WHERE (( (node.nid BETWEEN $start AND $end ) ))
sql_query_info = \
SELECT node.nid AS nid, node.title AS node_title, node.status AS node_status, node.created AS node_created, users.name AS users_name, users.uid AS users_uid, node.type AS node_type, 'node' AS field_data_body_node_entity_type, 0 AS searchlight_deleted \
FROM \
node node \
INNER JOIN users users ON node.uid = users.uid \
WHERE (( (node.nid = $id) ))
sql_query_range = SELECT MIN(nid), MAX(nid) FROM node WHERE nid > 0
sql_range_step = 1000
sql_ranged_throttle = 0
sql_attr_uint = searchlight_deleted
sql_attr_uint = node_status
sql_attr_timestamp = node_created
sql_attr_uint = users_uid
sql_attr_str2ordinal = node_type
sql_attr_uint = field_data_body_entity_id
}
#
index: socrates_dev_local_search_node
#
index socrates_dev_local_search_node
{
Index configuration
source = socrates_dev_local_search_node
path = /home/jackey/workspace/socrates/sphinx/indexes/
docinfo = extern
mlock = 0
morphology = stem_en
charset_type = utf-8
html_strip = 0
}
when use index command:
indexer --config socrates.conf
output like that:
Copyright (c) 2001-2009, Andrew Aksyonoff
using config file '/home/jackey/workspace/socrates/sphinx/sphinx.merged.conf'...
total 0 reads, 0.000 sec, 0.0 kb/call avg, 0.0 msec/call avg
total 0 writes, 0.000 sec, 0.0 kb/call avg, 0.0 msec/call avg
Indexing complete.
I am very sure it have many node in database.
I lose something ??

There seems to be no error. The indexer only tells you that it has indexed 0 domcuments, which means that your sql query doesn't output anything. Try it more simpler, whithout $start and $end

Related

SphinxSearch don't return some of defined attributes

I'm dealing with configuration of Sphinx Search. Problem is - not every value is returned from index.
My current config:
source pl_PL_main_source {
type = pgsql
sql_host = xxx
sql_user = xxx
sql_pass = xxx
sql_db = xxx
sql_port = xxx
}
source pl_PL_artifacts_search : pl_PL_main_source {
sql_query = select \
id, \
title, \
description, \
slug, \
'-' AS thumb_url, \
'/' AS test, \
'\.' AS test2, \
'\:' as test3, \
(CASE WHEN COALESCE(thumb_width, 0) = 0 THEN 280 ELSE thumb_width END) AS thumb_width, \
(CASE WHEN COALESCE(thumb_height, 0) = 0 THEN 280 ELSE thumb_height END) AS thumb_height, \
(COALESCE(thumb_height, 0) > COALESCE(thumb_width, 0)) AS is_vertical, \
add_timestamp \
FROM artifacts.item WHERE is_removed IS FALSE AND is_public IS TRUE
sql_field_string = title
sql_attr_string = description
sql_attr_string = slug
sql_attr_string = thumb_url
sql_attr_string = test
sql_attr_string = test2
sql_attr_string = test3
sql_attr_uint = thumb_width
sql_attr_uint = thumb_height
sql_attr_bool = is_vertical
sql_attr_string = add_timestamp
}
index pl_PL_artifacts_search {
source = pl_PL_artifacts_search
path = /usr/local/sphinx/var/data/pl_PL/pl_PL_artifacts_search
min_word_len = 2
infix_fields = title
charset_table = 0..9, A..Z->a..z, a..z, U+0143->U+0144, \
U+0104->U+0105, U+0106->U+0107, U+0118->U+0119, \
U+0141->U+0142, U+00D3->U+00F3, U+015A->U+015B, \
U+0179->U+017A, U+017B->U+017C, U+0105, U+0107, \
U+0119, U+0142, U+00F3, U+015B, U+017A, U+017C, \
U+0144
wordforms = /usr/local/sphinx/wordform-pl-dict-urf-8
}
indexer {
mem_limit = 512M
}
searchd {
listen = xxx:sphinx
log = /usr/local/sphinx/var/log/pl_PL/pl_PL_searchd.log
query_log = /usr/local/sphinx/var/log/pl_PL/pl_PL_query.log
read_timeout = 5
max_children = 30
pid_file = /usr/local/sphinx/var/log/pl_PL/pl_PL_searchd.pid
seamless_rotate = 1
unlink_old = 1
binlog_path = /usr/local/sphinx/var/log/pl_PL
}
Indexer rebuilt index with no problem. Problem is when I using Sphinx Client and query that index. It returning proper results, BUT it not returning all attributes. Below is sample or results:
[692307] => Array
(
[weight] => 100
[attrs] => Array
(
[title] => xxx
[description] =>
[slug] => xxx
[thumb_url] => -
[thumb_width] => 533
[thumb_height] => 400
[is_vertical] => 0
[add_timestamp] =>
)
)
[692411] => Array
(
[weight] => 100
[attrs] => Array
(
[title] => yyy
[description] =>
[slug] => yyy
[thumb_url] => -
[thumb_width] => 563
[thumb_height] => 388
[is_vertical] => 0
[add_timestamp] =>
)
)
Why my test attributes (test, test2, test3) were not returned ?

sphinxsearch does not return me content and title fields

I am using sphinx search with document table. I want to fetch all the fields but it doesn't return me all the fields. Please check my config file and let me know where to make change? using test index
table : documents
Fields: id, group_id, group_id2, date_added, content, title
mysql> select * from test1 where match ('my document');
+------+----------+------------+
| id | group_id | date_added |
+------+----------+------------+
| 1 | 1 | 1461672351 |
| 2 | 1 | 1461672351 |
+------+----------+------------+
2 rows in set (0.00 sec)
mysql>
source src1
{
type = mysql
sql_host = localhost
sql_user = root
sql_pass = india#123
sql_db = test
sql_port = 3306
sql_query = \
SELECT id, group_id, UNIX_TIMESTAMP(date_added) AS date_added, title, content \
FROM documents
sql_attr_uint = group_id
sql_attr_timestamp = date_added
sql_ranged_throttle = 0
sql_query_info = SELECT * FROM documents WHERE id=$id
}
source src1throttled : src1
{
sql_ranged_throttle = 100
}
index test1
{
source = src1
path = /var/lib/sphinxsearch/data/test1
docinfo = extern
dict = keywords
mlock = 0
morphology = none
min_word_len = 1
html_strip = 0
}
index test1stemmed : test1
{
path = /var/lib/sphinxsearch/data/test1stemmed
morphology = stem_en
}
index dist1
{
type = distributed
local = test1
local = test1stemmed
agent = localhost:9313:remote1
agent = localhost:9314:remote2,remote3
agent_connect_timeout = 1000
agent_query_timeout = 3000
}
index rt
{
type = rt
path = /var/lib/sphinxsearch/data/rt
rt_field = title
rt_field = content
rt_attr_uint = gid
}
indexer
{
mem_limit = 128M
}
searchd
{
listen = 9312
listen = 9306:mysql41
log = /var/log/sphinxsearch/searchd.log
query_log = /var/log/sphinxsearch/query.log
read_timeout = 5
client_timeout = 300
max_children = 30
persistent_connections_limit = 30
pid_file = /var/run/sphinxsearch/searchd.pid
seamless_rotate = 1
preopen_indexes = 1
unlink_old = 1
mva_updates_pool = 1M
max_packet_size = 8M
max_filters = 256
max_filter_values = 4096
max_batch_queries = 32
workers = threads # for RT to work
}
common
{
}
No. sphinx does not 'store' Fields. They can't be returned.
Sphinx 'indexes' the fields, but in its internal Inverse-Index format.
... to get data back out of sphinx - use Attributes. In your example index, you've made group_id and date_added as attributes via the sql_attr_*directives.
sql_field_string could be useful, as it makes a column BOTH a Field, AND a string attribute.
I play a lot with Sphinxsearch and I get same result (no title, content).
If you add this rows to the config file, the SELECT * FROM table1; query will return the title and content fields, too:
sql_field_string = title
sql_field_string = content

skipping non-plain index rt (sphinx 2.1.6)

There is the question. Sphinx, version 2.1.6. I used to rt(real time) index, but when indexing display message in koncole:
using config file 'sphinx.conf'...
skipping non-plain index 'rt'...
But at a connection to sphinxbase and write query mysql> desc rt - displays:
+------------+--------+
| Field | Type |
+------------+--------+
| id | bigint |
| id | field |
| first_name | field |
| last_name | field |
+------------+--------+
This is default data?? They do not meet my request. How to work with index rt?
Sphinx.conf.
source database
{
type = mysql
sql_host = 127.0.0.1
sql_user = test
sql_pass = test
sql_db = community
sql_port = 3306
mysql_connect_flags = 32 # enable compression
sql_query_pre = SET NAMES utf8
sql_query_pre = SET SESSION query_cache_type=OFF
}
source rt : database
{
sql_query_range = SELECT MIN(id),MAX(id) FROM mbt_accounts
sql_query = SELECT id AS 'accountId', first_name AS 'fname', last_name AS 'lname' FROM mbt_accounts WHERE id >= 0 AND id<= 1000
sql_range_step = 1000
sql_ranged_throttle = 1000 # milliseconds
}
index rt
{
source = rt
type = rt
path = /etc/sphinxsearch/rtindex
rt_mem_limit = 700M
rt_field = accountId
rt_field = fname
rt_field = lname
rt_attr_string = fname
rt_attr_string = lname
charset_type = utf-8
charset_table = 0..9, A..Z->a..z, _, -, a..z, U+410..U+42F->U+430..U+44F, U+430..U+44F, U+401->U+451, U+451
}
searchd
{
listen = localhost:9312 # port for API
listen = localhost:9306:mysql41 #port for a SphinxQL
log = /var/log/sphinxsearch/searchd.log
binlog_path = /var/log/sphinxsearch/
query_log = /var/log/sphinxsearch/query.log
query_log_format = sphinxql
pid_file = /var/run/sphinxsearch/searchd.pid
workers = threads
max_matches = 1000
read_timeout = 5
client_timeout = 300
max_children = 30
max_packet_size = 8M
binlog_flush = 2
binlog_max_log_size = 90M
thread_stack = 8M
expansion_limit = 500
rt_flush_period = 1800
collation_server = utf8_general_ci
compat_sphinxql_magics = 0
prefork_rotation_throttle = 100
}
Thanks.
indexer only works with indexes that have a 'source' - ie plain disk indexesd. ie indexer does the stuff in the source to get the data to create the index.
RT (Real Time) indexes work very differently. indexer is not involved with RT indexes at all. They are handled totally by searchd.
To add data to a RT index, you need to run a bunch of SphinxQL commands (INSERT, UPDATE etc) that actually add the data to the index.
(DESCRIBE works, because searchd knows the 'structure' of the index (you told it via the rt_field etc) - even if never inserted any data)
Ah, I think you are asking why the structure is different. That's probably because the index was probably created before, you modified sphinx.conf. If you change the definiton of a RT index, you need to 'destroy' the index, to allow it be recreated again.
The simplest way is to shutdown searchd, delete the index files, delete the binlog (it no longer relevent) and then restart searchd.
searchd --stopwait
rm /etc/sphinxsearch/rtindex*
rm /path/to/binlog* #(you dont define a path, so it must be the default, which varies)
searchd #(starts searchd again)

Sphinx stopped indexing

After doing rotate and re-generating index, Sphinx doesn't include new records from database. It doesn't give any error and also it includes old index data.
I removed data files and tried again but the result was same.
I also applied range query but the result was same.
So, I cannot update my search index now :(
Here I'm giving my configuration, thank you for your tips :)
source search_song
{
type = mysql
sql_host = localhost
sql_user = myusername
sql_pass = mypass
sql_db = mydb
sql_port = 3306 # optional, default is 3306
sql_query_pre = SET NAMES utf8
sql_query_pre = SET NAMES utf8 COLLATE utf8_turkish_ci
sql_query_pre = SET CHARACTER SET utf8
sql_query_pre = SET COLLATION_CONNECTION = utf8_turkish_ci
sql_query_range = SELECT MIN(song_ID), MAX(song_ID) FROM song
sql_range_step = 20000
sql_query = SELECT song.song_ID, artist.artist_ID, song.title, song_stats.total_read, IF(artist.flag_The = 1, CONCAT("The ", artist.name), artist.name) AS fullname \
FROM song \
INNER JOIN artist ON artist.artist_ID = song.artist_ID \
LEFT JOIN song_stats ON song_stats.song_ID = song.song_ID \
WHERE song.song_ID >= $start AND song.song_ID <= $end;
sql_attr_uint = total_read
}
index search_song
{
source = search_song
path = /var/lib/sphinxsearch/data/search_song
morphology = metaphone
min_word_len = 1
min_prefix_len = 2
enable_star = 1
charset_type = utf-8
# exceptions = /var/lib/sphinxsearch/exceptions.txt
charset_table = A->a, B->b, C->c, U+C7->c, U+E7->c, D..G->d..g, U+11E->g, U+11F->g, H->h, I->i, U+131->i, U+130->i, J..O->j..o, U+D6->o, U+F6->o, P..S->p..s, U+15E->s, U+15F->s, T..U->t..u, U+DC->u, U+FC->u, V..Z->v..z, _, a..z,[,],0..9
}
Does it work without the range ? just the sql_query , no range and step ?
I suspect the SQL query might be limiting results.

Error after converting Sphinx original indexes to real-time indexes

I used this tutorial to convert my original sphinx indexes to real-time indexes: http://www.ivinco.com/blog/converting-sphinx-original-indexes-to-real-time-indexes/
I changed my sphinx.conf:
source movies_dev
{
type = mysql
sql_host = localhost
sql_user = ********
sql_pass = ********
sql_db = ********
sql_sock = /var/run/mysqld/mysqld.sock
sql_port = 3306
sql_query = \
SELECT \
CRC32(movie_id) AS id, movie_id, format_id, active, year, title \
FROM \
movie;
sql_attr_uint = format_id
sql_attr_uint = active
sql_attr_uint = year
sql_field_string = movie_id
sql_field_string = title
sql_query_info = SELECT * FROM movie WHERE CRC32(movie_id)=$id
sql_query_pre = SET NAMES utf8
}
index movies_dev
{
source = movies_dev
path = /var/data/sphinx/movies_dev
morphology = stem_en
enable_star = 1
min_word_len = 3
min_prefix_len = 0
min_infix_len = 3
charset_type = utf-8
charset_table = 0..9, A..Z->a..z, _, a..z, U+410..U+42F->U+430..U+44F, U+430..U+44F, U+DC->U+FC, U+C4->U+E4, U+D6->U+F6, U+DF, U+E4, U+F6, U+FC
}
index rt_movies_dev
{
type = rt
rt_mem_limit = 32M
path = /var/data/sphinx/rt_movies_dev
charset_type = utf-8
rt_field = movie_id
rt_field = title
rt_attr_uint = format_id
rt_attr_uint = year
rt_attr_uint = active
}
source attach_movies_dev
{
type = mysql
sql_host = localhost
sql_user = ********
sql_pass = ********
sql_db = ********
sql_query = SELECT 1 FROM rt_movies_dev
sql_query_post = ATTACH INDEX movies_dev TO RTINDEX rt_movies_dev
}
index attach_movies_dev
{
source = attach_movies_dev
path = /var/data/sphinx/attach_movies_dev
docinfo = extern
charset_type = utf-8
}
I created the "rt_movies_dev" table:
SET NAMES utf8;
SET foreign_key_checks = 0;
SET time_zone = '+01:00';
SET sql_mode = 'NO_AUTO_VALUE_ON_ZERO';
DROP TABLE IF EXISTS `rt_movies_dev`;
CREATE TABLE `rt_movies_dev` (
`movie_id` varchar(20) NOT NULL,
`format_id` int(10) NOT NULL,
`title` varchar(255) NOT NULL,
`year` int(20) DEFAULT NULL,
`active` tinyint(1) NOT NULL,
PRIMARY KEY (`movie_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
After that, I run these three commands:
root#server:~# /usr/local/sphinx/bin/searchd --config /usr/local/sphinx/etc/sphinx.conf;
root#server:~# /usr/local/sphinx/bin/indexer --config /usr/local/sphinx/etc/sphinx.conf movies_dev --rotate;
root#server:~# /usr/local/sphinx/bin/indexer --config /usr/local/sphinx/etc/sphinx.conf attach_movies_dev;
No errors after the first two commands (except the warnings like in the tutorial).
But the last command throws this:
ERROR: index 'attach_movies_dev': No fields in schema - will not index.
I do not know exactly what the error says and I could find nothing useful. Can you say what's wrong? I'm stuck here.
Firstly attach_movies_dev source, connects to SPHINX, not to mysql. So no mysql table is required.
You are just using indexer to invoke SphinxQL commands.
But from what I can see trying to index the attach index, will always result in an error, because the RT index itself must be empty (so can attach a disk index to it!)
So change your attach index to connect to searchd instead. And it should work better. Probably an empty RT index, is ok, indexer will just create an empty index, but importantly it will still run the _post command. Which is the whole reason the index exists!
Also beware that your disk index and RT index have different fields, in your disk index, you have two sql_field_string, which create both attributes AND fields. So your RT index, should to contain two string attributes to match (rather than just fields).