crates/moraine-config/src/lib.rs
1 use anyhow::{Context, Result};
2 use serde::Deserialize;
3 use std::path::{Component, Path, PathBuf};
4
5 #[derive(Debug, Clone, Deserialize)]
6 #[serde(deny_unknown_fields)]
7 pub struct IngestSource {
8 #[serde(default)]
9 pub name: String,
10 #[serde(default)]
11 pub provider: String,
12 #[serde(default = "default_enabled")]
13 pub enabled: bool,
14 #[serde(default)]
15 pub glob: String,
16 #[serde(default)]
17 pub watch_root: String,
18 }
19
20 #[derive(Debug, Clone, Deserialize)]
21 #[serde(deny_unknown_fields)]
22 pub struct ClickHouseConfig {
23 #[serde(default = "default_ch_url")]
24 pub url: String,
25 #[serde(default = "default_ch_database")]
26 pub database: String,
27 #[serde(default = "default_ch_username")]
28 pub username: String,
29 #[serde(default)]
30 pub password: String,
31 #[serde(default = "default_timeout_seconds")]
32 pub timeout_seconds: f64,
33 #[serde(default = "default_true")]
34 pub async_insert: bool,
35 #[serde(default = "default_true")]
36 pub wait_for_async_insert: bool,
37 }
38
39 #[derive(Debug, Clone, Deserialize)]
40 #[serde(deny_unknown_fields)]
41 pub struct IngestConfig {
42 #[serde(default = "default_sources")]
43 pub sources: Vec<IngestSource>,
44 #[serde(default = "default_batch_size")]
45 pub batch_size: usize,
46 #[serde(default = "default_flush_interval_seconds")]
47 pub flush_interval_seconds: f64,
48 #[serde(default = "default_state_dir")]
49 pub state_dir: String,
50 #[serde(default = "default_true")]
51 pub backfill_on_start: bool,
52 #[serde(default = "default_max_file_workers")]
53 pub max_file_workers: usize,
54 #[serde(default = "default_max_inflight_batches")]
55 pub max_inflight_batches: usize,
56 #[serde(default = "default_debounce_ms")]
57 pub debounce_ms: u64,
58 #[serde(default = "default_reconcile_interval_seconds")]
59 pub reconcile_interval_seconds: f64,
60 #[serde(default = "default_heartbeat_interval_seconds")]
61 pub heartbeat_interval_seconds: f64,
62 }
63
64 #[derive(Debug, Clone, Deserialize)]
65 #[serde(deny_unknown_fields)]
66 pub struct McpConfig {
67 #[serde(default = "default_max_results")]
68 pub max_results: u16,
69 #[serde(default = "default_preview_chars")]
70 pub preview_chars: u16,
71 #[serde(default = "default_context_before")]
72 pub default_context_before: u16,
73 #[serde(default = "default_context_after")]
74 pub default_context_after: u16,
75 #[serde(default = "default_false")]
76 pub default_include_tool_events: bool,
77 #[serde(default = "default_true")]
78 pub default_exclude_codex_mcp: bool,
79 #[serde(default = "default_false")]
80 pub async_log_writes: bool,
81 #[serde(default = "default_protocol_version")]
82 pub protocol_version: String,
83 }
84
85 #[derive(Debug, Clone, Deserialize)]
86 #[serde(deny_unknown_fields)]
87 pub struct Bm25Config {
88 #[serde(default = "default_k1")]
89 pub k1: f64,
90 #[serde(default = "default_b")]
91 pub b: f64,
92 #[serde(default = "default_min_score")]
93 pub default_min_score: f64,
94 #[serde(default = "default_min_should_match")]
95 pub default_min_should_match: u16,
96 #[serde(default = "default_max_query_terms")]
97 pub max_query_terms: usize,
98 }
99
100 #[derive(Debug, Clone, Deserialize)]
101 #[serde(deny_unknown_fields)]
102 pub struct MonitorConfig {
103 #[serde(default = "default_monitor_host")]
104 pub host: String,
105 #[serde(default = "default_monitor_port")]
106 pub port: u16,
107 }
108
109 #[derive(Debug, Clone, Deserialize)]
110 #[serde(deny_unknown_fields)]
111 pub struct RuntimeConfig {
112 #[serde(default = "default_runtime_root")]
113 pub root_dir: String,
114 #[serde(default = "default_runtime_logs_dir")]
115 pub logs_dir: String,
116 #[serde(default = "default_runtime_pids_dir")]
117 pub pids_dir: String,
118 #[serde(default = "default_service_bin_dir")]
119 pub service_bin_dir: String,
120 #[serde(default = "default_managed_clickhouse_dir")]
121 pub managed_clickhouse_dir: String,
122 #[serde(default = "default_clickhouse_start_timeout_seconds")]
123 pub clickhouse_start_timeout_seconds: f64,
124 #[serde(default = "default_healthcheck_interval_ms")]
125 pub healthcheck_interval_ms: u64,
126 #[serde(default = "default_true")]
127 pub clickhouse_auto_install: bool,
128 #[serde(default = "default_clickhouse_version")]
129 pub clickhouse_version: String,
130 #[serde(default = "default_true")]
131 pub start_monitor_on_up: bool,
132 #[serde(default = "default_false")]
133 pub start_mcp_on_up: bool,
134 }
135
136 #[derive(Debug, Clone, Deserialize, Default)]
137 #[serde(deny_unknown_fields)]
138 pub struct AppConfig {
139 #[serde(default)]
140 pub clickhouse: ClickHouseConfig,
141 #[serde(default)]
142 pub ingest: IngestConfig,
143 #[serde(default)]
144 pub mcp: McpConfig,
145 #[serde(default)]
146 pub bm25: Bm25Config,
147 #[serde(default)]
148 pub monitor: MonitorConfig,
149 #[serde(default)]
150 pub runtime: RuntimeConfig,
151 }
152
153 impl Default for ClickHouseConfig {
154 fn default() -> Self {
155 Self {
156 url: default_ch_url(),
157 database: default_ch_database(),
158 username: default_ch_username(),
159 password: String::new(),
160 timeout_seconds: default_timeout_seconds(),
161 async_insert: true,
162 wait_for_async_insert: true,
163 }
164 }
165 }
166
167 impl Default for IngestConfig {
168 fn default() -> Self {
169 Self {
170 sources: default_sources(),
171 batch_size: default_batch_size(),
172 flush_interval_seconds: default_flush_interval_seconds(),
173 state_dir: default_state_dir(),
174 backfill_on_start: true,
175 max_file_workers: default_max_file_workers(),
176 max_inflight_batches: default_max_inflight_batches(),
177 debounce_ms: default_debounce_ms(),
178 reconcile_interval_seconds: default_reconcile_interval_seconds(),
179 heartbeat_interval_seconds: default_heartbeat_interval_seconds(),
180 }
181 }
182 }
183
184 impl Default for McpConfig {
185 fn default() -> Self {
186 Self {
187 max_results: default_max_results(),
188 preview_chars: default_preview_chars(),
189 default_context_before: default_context_before(),
190 default_context_after: default_context_after(),
191 default_include_tool_events: false,
192 default_exclude_codex_mcp: true,
193 async_log_writes: true,
194 protocol_version: default_protocol_version(),
195 }
196 }
197 }
198
199 impl Default for Bm25Config {
200 fn default() -> Self {
201 Self {
202 k1: default_k1(),
203 b: default_b(),
204 default_min_score: default_min_score(),
205 default_min_should_match: default_min_should_match(),
206 max_query_terms: default_max_query_terms(),
207 }
208 }
209 }
210
211 impl Default for MonitorConfig {
212 fn default() -> Self {
213 Self {
214 host: default_monitor_host(),
215 port: default_monitor_port(),
216 }
217 }
218 }
219
220 impl Default for RuntimeConfig {
221 fn default() -> Self {
222 Self {
223 root_dir: default_runtime_root(),
224 logs_dir: default_runtime_logs_dir(),
225 pids_dir: default_runtime_pids_dir(),
226 service_bin_dir: default_service_bin_dir(),
227 managed_clickhouse_dir: default_managed_clickhouse_dir(),
228 clickhouse_start_timeout_seconds: default_clickhouse_start_timeout_seconds(),
229 healthcheck_interval_ms: default_healthcheck_interval_ms(),
230 clickhouse_auto_install: true,
231 clickhouse_version: default_clickhouse_version(),
232 start_monitor_on_up: true,
233 start_mcp_on_up: false,
234 }
235 }
236 }
237
238 fn default_ch_url() -> String {
239 "http://127.0.0.1:8123".to_string()
240 }
241
242 fn default_ch_database() -> String {
243 "moraine".to_string()
244 }
245
246 fn default_ch_username() -> String {
247 "default".to_string()
248 }
249
250 fn default_timeout_seconds() -> f64 {
251 30.0
252 }
253
254 fn default_enabled() -> bool {
255 true
256 }
257
258 fn default_sources() -> Vec<IngestSource> {
259 vec![
260 IngestSource {
261 name: "codex".to_string(),
262 provider: "codex".to_string(),
263 enabled: true,
264 glob: "~/.codex/sessions/**/*.jsonl".to_string(),
265 watch_root: "~/.codex/sessions".to_string(),
266 },
267 IngestSource {
268 name: "claude".to_string(),
269 provider: "claude".to_string(),
270 enabled: true,
271 glob: "~/.claude/projects/**/*.jsonl".to_string(),
272 watch_root: "~/.claude/projects".to_string(),
273 },
274 ]
275 }
276
277 fn default_batch_size() -> usize {
278 4000
279 }
280
281 fn default_flush_interval_seconds() -> f64 {
282 0.5
283 }
284
285 fn default_state_dir() -> String {
286 "~/.moraine/ingestor".to_string()
287 }
288
289 fn default_max_file_workers() -> usize {
290 8
291 }
292
293 fn default_max_inflight_batches() -> usize {
294 64
295 }
296
297 fn default_debounce_ms() -> u64 {
298 50
299 }
300
301 fn default_reconcile_interval_seconds() -> f64 {
302 30.0
303 }
304
305 fn default_heartbeat_interval_seconds() -> f64 {
306 5.0
307 }
308
309 fn default_max_results() -> u16 {
310 25
311 }
312
313 fn default_preview_chars() -> u16 {
314 320
315 }
316
317 fn default_context_before() -> u16 {
318 3
319 }
320
321 fn default_context_after() -> u16 {
322 3
323 }
324
325 fn default_protocol_version() -> String {
326 "2024-11-05".to_string()
327 }
328
329 fn default_k1() -> f64 {
330 1.2
331 }
332
333 fn default_b() -> f64 {
334 0.75
335 }
336
337 fn default_min_score() -> f64 {
338 0.0
339 }
340
341 fn default_min_should_match() -> u16 {
342 1
343 }
344
345 fn default_max_query_terms() -> usize {
346 32
347 }
348
349 fn default_monitor_host() -> String {
350 "127.0.0.1".to_string()
351 }
352
353 fn default_monitor_port() -> u16 {
354 8080
355 }
356
357 fn default_runtime_root() -> String {
358 "~/.moraine".to_string()
359 }
360
361 fn default_runtime_logs_dir() -> String {
362 "logs".to_string()
363 }
364
365 fn default_runtime_pids_dir() -> String {
366 "run".to_string()
367 }
368
369 fn default_service_bin_dir() -> String {
370 "~/.local/bin".to_string()
371 }
372
373 fn default_managed_clickhouse_dir() -> String {
374 "~/.local/lib/moraine/clickhouse/current".to_string()
375 }
376
377 fn default_clickhouse_start_timeout_seconds() -> f64 {
378 30.0
379 }
380
381 fn default_healthcheck_interval_ms() -> u64 {
382 500
383 }
384
385 fn default_clickhouse_version() -> String {
386 "v25.12.5.44-stable".to_string()
387 }
388
389 fn default_true() -> bool {
390 true
391 }
392
393 fn default_false() -> bool {
394 false
395 }
396
397 pub fn expand_path(path: &str) -> String {
398 if let Some(stripped) = path.strip_prefix("~/") {
399 if let Some(home) = std::env::var_os("HOME") {
400 return format!("{}/{}", home.to_string_lossy(), stripped);
401 }
402 }
403 path.to_string()
404 }
405
406 pub fn watch_root_from_glob(glob_pattern: &str) -> String {
407 fn component_contains_glob(component: Component<'_>) -> bool {
408 match component {
409 Component::Normal(part) => {
410 let value = part.to_string_lossy();
411 value.contains('*')
412 || value.contains('?')
413 || value.contains('[')
414 || value.contains(']')
415 || value.contains('{')
416 || value.contains('}')
417 }
418 _ => false,
419 }
420 }
421
422 let path = Path::new(glob_pattern);
423 let mut root = PathBuf::new();
424
425 for component in path.components() {
426 if component_contains_glob(component) {
427 return if root.as_os_str().is_empty() {
428 ".".to_string()
429 } else {
430 root.to_string_lossy().to_string()
431 };
432 }
433
434 root.push(component.as_os_str());
435 }
436
437 path.parent()
438 .filter(|parent| !parent.as_os_str().is_empty())
439 .map(|parent| parent.to_string_lossy().to_string())
440 .unwrap_or_else(|| ".".to_string())
441 }
442
443 fn home_config_path() -> Option<PathBuf> {
444 std::env::var_os("HOME").map(|home| PathBuf::from(home).join(".moraine").join("config.toml"))
445 }
446
447 fn repo_default_config_path() -> PathBuf {
448 PathBuf::from("config/moraine.toml")
449 }
450
451 fn resolve_config_path_with_overrides(
452 raw_path: Option<PathBuf>,
453 env_keys: &[&str],
454 home_path: Option<PathBuf>,
455 repo_default: PathBuf,
456 ) -> PathBuf {
457 if let Some(path) = raw_path {
458 return path;
459 }
460
461 for key in env_keys {
462 if let Ok(value) = std::env::var(key) {
463 let trimmed = value.trim();
464 if !trimmed.is_empty() {
465 return PathBuf::from(trimmed);
466 }
467 }
468 }
469
470 if let Some(path) = home_path {
471 if path.exists() {
472 return path;
473 }
474 }
475
476 if repo_default.exists() {
477 return repo_default;
478 }
479
480 home_config_path().unwrap_or(repo_default)
481 }
482
483 pub fn resolve_config_path(raw_path: Option<PathBuf>) -> PathBuf {
484 resolve_config_path_with_overrides(
485 raw_path,
486 &["MORAINE_CONFIG"],
487 home_config_path(),
488 repo_default_config_path(),
489 )
490 }
491
492 pub fn resolve_mcp_config_path(raw_path: Option<PathBuf>) -> PathBuf {
493 resolve_config_path_with_overrides(
494 raw_path,
495 &["MORAINE_MCP_CONFIG", "MORAINE_CONFIG"],
496 home_config_path(),
497 repo_default_config_path(),
498 )
499 }
500
501 pub fn resolve_monitor_config_path(raw_path: Option<PathBuf>) -> PathBuf {
502 resolve_config_path_with_overrides(
503 raw_path,
504 &["MORAINE_MONITOR_CONFIG", "MORAINE_CONFIG"],
505 home_config_path(),
506 repo_default_config_path(),
507 )
508 }
509
510 pub fn resolve_ingest_config_path(raw_path: Option<PathBuf>) -> PathBuf {
511 resolve_config_path_with_overrides(
512 raw_path,
513 &["MORAINE_INGEST_CONFIG", "MORAINE_CONFIG"],
514 home_config_path(),
515 repo_default_config_path(),
516 )
517 }
518
519 fn resolve_runtime_subdir(root: &str, value: &str) -> String {
520 let expanded = expand_path(value);
521 let path = Path::new(&expanded);
522 if path.is_absolute() {
523 return expanded;
524 }
525
526 Path::new(root).join(path).to_string_lossy().to_string()
527 }
528
529 fn normalize_provider(provider: &str, source_idx: usize, source_name: &str) -> Result<String> {
530 let normalized = provider.trim().to_ascii_lowercase();
531 if normalized == "codex" || normalized == "claude" {
532 return Ok(normalized);
533 }
534
535 Err(anyhow::anyhow!(
536 "invalid ingest.sources[{source_idx}].provider `{}` for source `{}`; expected one of: codex, claude",
537 provider.trim(),
538 source_name
539 ))
540 }
541
542 fn normalize_config(mut cfg: AppConfig) -> Result<AppConfig> {
543 for (source_idx, source) in cfg.ingest.sources.iter_mut().enumerate() {
544 source.provider = normalize_provider(&source.provider, source_idx, &source.name)?;
545 source.glob = expand_path(&source.glob);
546 source.watch_root = if source.watch_root.trim().is_empty() {
547 watch_root_from_glob(&source.glob)
548 } else {
549 expand_path(&source.watch_root)
550 };
551 }
552
553 cfg.ingest.state_dir = expand_path(&cfg.ingest.state_dir);
554 cfg.runtime.root_dir = expand_path(&cfg.runtime.root_dir);
555 cfg.runtime.logs_dir = resolve_runtime_subdir(&cfg.runtime.root_dir, &cfg.runtime.logs_dir);
556 cfg.runtime.pids_dir = resolve_runtime_subdir(&cfg.runtime.root_dir, &cfg.runtime.pids_dir);
557 cfg.runtime.service_bin_dir = expand_path(&cfg.runtime.service_bin_dir);
558 cfg.runtime.managed_clickhouse_dir = expand_path(&cfg.runtime.managed_clickhouse_dir);
559
560 Ok(cfg)
561 }
562
563 pub fn load_config(path: impl AsRef<Path>) -> Result<AppConfig> {
564 let content = std::fs::read_to_string(path.as_ref())
565 .with_context(|| format!("failed to read config {}", path.as_ref().display()))?;
566 let cfg: AppConfig = toml::from_str(&content).context("failed to parse TOML config")?;
567 normalize_config(cfg)
568 }
569
570 #[cfg(test)]
571 mod tests {
572 use super::*;
573
574 fn write_temp_config(contents: &str, label: &str) -> PathBuf {
575 let path = std::env::temp_dir().join(format!(
576 "moraine-config-{label}-{}-{}.toml",
577 std::process::id(),
578 std::time::SystemTime::now()
579 .duration_since(std::time::UNIX_EPOCH)
580 .expect("system time after unix epoch")
581 .as_nanos()
582 ));
583 std::fs::write(&path, contents).expect("write temp config");
584 path
585 }
586
587 #[test]
588 fn resolve_order_prefers_cli_then_env_then_home_then_repo() {
589 let raw = Some(PathBuf::from("/tmp/cli.toml"));
590 let chosen = resolve_config_path_with_overrides(
591 raw,
592 &["MORAINE_CONFIG"],
593 Some(PathBuf::from("/tmp/home.toml")),
594 PathBuf::from("/tmp/repo.toml"),
595 );
596 assert_eq!(chosen, PathBuf::from("/tmp/cli.toml"));
597 }
598
599 #[test]
600 fn watch_root_extracts_prefix() {
601 assert_eq!(watch_root_from_glob("/tmp/a/**/*.jsonl"), "/tmp/a");
602 assert_eq!(watch_root_from_glob("/tmp/a/*.jsonl"), "/tmp/a");
603 assert_eq!(watch_root_from_glob("logs/*.jsonl"), "logs");
604 assert_eq!(watch_root_from_glob("logs/session-*.jsonl"), "logs");
605 assert_eq!(watch_root_from_glob("*.jsonl"), ".");
606 assert_eq!(watch_root_from_glob("*/*.jsonl"), ".");
607 assert_eq!(watch_root_from_glob("/**/*.jsonl"), "/");
608 }
609
610 #[test]
611 fn runtime_subdir_joins_relative_paths() {
612 let root = "/tmp/moraine";
613 assert_eq!(
614 resolve_runtime_subdir(root, "logs"),
615 "/tmp/moraine/logs".to_string()
616 );
617 assert_eq!(
618 resolve_runtime_subdir(root, "/var/tmp/moraine"),
619 "/var/tmp/moraine".to_string()
620 );
621 }
622
623 #[test]
624 fn resolve_order_prefers_env_over_home_and_repo() {
625 let env_key = "MORAINE_CONFIG_TEST_KEY";
626 std::env::set_var(env_key, "/tmp/from-env.toml");
627
628 let chosen = resolve_config_path_with_overrides(
629 None,
630 &[env_key],
631 Some(PathBuf::from("/tmp/from-home.toml")),
632 PathBuf::from("/tmp/from-repo.toml"),
633 );
634
635 std::env::remove_var(env_key);
636 assert_eq!(chosen, PathBuf::from("/tmp/from-env.toml"));
637 }
638
639 #[test]
640 fn resolve_order_uses_repo_when_home_missing() {
641 let repo_default = std::env::temp_dir().join("moraine-config-repo-default.toml");
642 std::fs::write(&repo_default, "x=1").expect("write temp repo default");
643
644 let chosen = resolve_config_path_with_overrides(
645 None,
646 &["MORAINE_CONFIG_TEST_DOES_NOT_EXIST"],
647 Some(PathBuf::from("/tmp/definitely-missing-home.toml")),
648 repo_default.clone(),
649 );
650
651 std::fs::remove_file(&repo_default).ok();
652 assert_eq!(chosen, repo_default);
653 }
654
655 #[test]
656 fn mcp_config_env_has_priority_over_generic_env() {
657 std::env::set_var("MORAINE_MCP_CONFIG", "/tmp/mcp.toml");
658 std::env::set_var("MORAINE_CONFIG", "/tmp/generic.toml");
659
660 let chosen = resolve_mcp_config_path(None);
661
662 std::env::remove_var("MORAINE_MCP_CONFIG");
663 std::env::remove_var("MORAINE_CONFIG");
664 assert_eq!(chosen, PathBuf::from("/tmp/mcp.toml"));
665 }
666
667 #[test]
668 fn load_config_errors_when_path_missing() {
669 let path = std::env::temp_dir().join("moraine-missing-config-does-not-exist.toml");
670 let err = load_config(&path).expect_err("missing config path should fail");
671 assert!(
672 err.to_string().contains("failed to read config"),
673 "unexpected error: {err:#}"
674 );
675 }
676
677 #[test]
678 fn load_config_accepts_minimal_comment_only_file() {
679 let path = write_temp_config(
680 r#"
681 # Moraine default config.
682 # Values omitted here are filled by built-in defaults.
683 "#,
684 "minimal-comment-only",
685 );
686 let cfg = load_config(&path).expect("minimal config should load with defaults");
687 std::fs::remove_file(&path).ok();
688 assert_eq!(cfg.clickhouse.url, "http://127.0.0.1:8123");
689 assert!(!cfg.ingest.sources.is_empty());
690 }
691
692 #[test]
693 fn load_config_errors_on_unknown_top_level_section() {
694 let path = write_temp_config(
695 r#"
696 [clickhouse]
697 url = "http://127.0.0.1:8123"
698
699 [unexpected]
700 enabled = true
701 "#,
702 "unknown-top-level",
703 );
704 let err = load_config(&path).expect_err("unknown top-level section should fail");
705 std::fs::remove_file(&path).ok();
706 assert!(
707 format!("{err:#}").contains("unknown field `unexpected`"),
708 "unexpected error: {err:#}"
709 );
710 }
711
712 #[test]
713 fn load_config_errors_on_unknown_ingest_source_key() {
714 let path = write_temp_config(
715 r#"
716 [[ingest.sources]]
717 name = "codex"
718 provider = "codex"
719 enabled = true
720 glob = "~/.codex/sessions/**/*.jsonl"
721 watch_root = "~/.codex/sessions"
722 extra = "not-allowed"
723 "#,
724 "unknown-source-key",
725 );
726 let err = load_config(&path).expect_err("unknown ingest source key should fail");
727 std::fs::remove_file(&path).ok();
728 assert!(
729 format!("{err:#}").contains("unknown field `extra`"),
730 "unexpected error: {err:#}"
731 );
732 }
733
734 #[test]
735 fn load_config_errors_on_unknown_ingest_provider() {
736 let path = write_temp_config(
737 r#"
738 [[ingest.sources]]
739 name = "custom"
740 provider = "openai"
741 enabled = true
742 glob = "~/.custom/sessions/**/*.jsonl"
743 watch_root = "~/.custom/sessions"
744 "#,
745 "unknown-provider",
746 );
747 let err = load_config(&path).expect_err("unknown ingest provider should fail");
748 std::fs::remove_file(&path).ok();
749 assert!(
750 format!("{err:#}").contains("expected one of: codex, claude"),
751 "unexpected error: {err:#}"
752 );
753 }
754 }