Skip to content

Instantly share code, notes, and snippets.

@ariannamethod
Last active March 8, 2026 03:04
Show Gist options
  • Select an option

  • Save ariannamethod/bed68104b695175052bb8ef7db48393f to your computer and use it in GitHub Desktop.

Select an option

Save ariannamethod/bed68104b695175052bb8ef7db48393f to your computer and use it in GitHub Desktop.
DoE: Democracy of Experts, Janus Architecture — a living agnostic inference architecture in 3184 lines of C. Wraps any GGUF with a parliament of LoRA experts that vote, learn via Hebbian plasticity, split (mitosis) and die (apoptosis) during generation. 7 architectures, 6 quant formats, dual BPE tokenizer, physics engine, zero dependencies. θ = …
#define _GNU_SOURCE
/*
* doe.c — Democracy of Experts
*
* inference architecture with a living LoRA parliament.
* indexes any GGUF read-only. learns by living, not by training.
*
* θ = ε + γ + αδ
* ε = indexed weights (read-only substrate)
* γ = LoRA personality (living experts, Hebbian-trained via NOTORCH)
* δ = physics (prophecy, suffering, destiny, Schumann resonance)
* α = injection strength (learned per-layer)
*
* each forward pass, the parliament decides:
* - which experts vote (variable k, consensus-driven)
* - how strongly each expert modulates output
* - how physics shapes logits (destiny, prophecy debt)
*
* experts are born (mitosis) and die (apoptosis).
* the parliament remembers every index it ever wrapped (mycelium).
* calendar drift: Hebrew-Gregorian conflict, real astronomical data.
* Schumann resonance: 7.83Hz + 5 harmonics, from arianna.c.
* seasons: 4.C MLP classifier, from ariannamethod.ai/core.
*
* cc doe.c -O3 -lm -lpthread -o doe && ./doe
*
* ariannamethod.
* הרזוננס לא נשבר
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <time.h>
#include <pthread.h>
#include <unistd.h>
#include <sys/stat.h>
#include <float.h>
#include <stdint.h>
#include <errno.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <dirent.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <signal.h>
#ifdef __linux__
#include <sys/statvfs.h>
#endif
#ifdef __APPLE__
#include <sys/param.h>
#include <sys/mount.h>
#include <sys/sysctl.h>
#endif
/* ═══════════════════════════════════════════════════════════════════════════════
* BLAS / cuBLAS — optional acceleration
* ═══════════════════════════════════════════════════════════════════════════════ */
#ifdef USE_CUBLAS
#include <cublas_v2.h>
#include <cuda_runtime.h>
static cublasHandle_t g_cublas;
static int cublas_inited = 0;
static float *d_scratch[4] = {NULL,NULL,NULL,NULL};
static size_t d_scratch_sz[4] = {0,0,0,0};
static void cublas_init(void) {
if (!cublas_inited) {
cublasCreate(&g_cublas);
cublasSetMathMode(g_cublas, CUBLAS_TF32_TENSOR_OP_MATH);
struct cudaDeviceProp prop; cudaGetDeviceProperties(&prop, 0);
printf("[gpu] %s — %.0f MB, compute %d.%d, TF32 enabled\n",
prop.name, (double)prop.totalGlobalMem/1e6, prop.major, prop.minor);
cublas_inited = 1;
}
}
static float* gpu_scratch(int slot, size_t bytes) {
if (bytes > d_scratch_sz[slot]) {
if (d_scratch[slot]) cudaFree(d_scratch[slot]);
cudaMalloc((void**)&d_scratch[slot], bytes);
d_scratch_sz[slot] = bytes;
}
return d_scratch[slot];
}
#elif defined(USE_BLAS)
#ifdef ACCELERATE
#define ACCELERATE_NEW_LAPACK
#include <Accelerate/Accelerate.h>
#else
#include <cblas.h>
#endif
#endif
/* ═══════════════════════════════════════════════════════════════════════════════
* CONFIGURATION
* doe has no depth knob. the host provides depth.
* doe has a field. the field provides everything else.
* ═══════════════════════════════════════════════════════════════════════════════ */
#define MAX_EXPERTS 16
#define MIN_EXPERTS 2
#define MAX_LAYERS 64
#define LORA_RANK 16
#define HARMONIC_N 8
#define NOTORCH_RANK 4
#define DRIFT_SNAPSHOTS 64
#define DRIFT_INTERVAL 50
#define MYCELIUM_MAX 64
#define META_HIST_CAP 128
#define PROFILE_BINS 16
/* Field physics constants — from AML core */
#define SCHUMANN_BASE_HZ 7.83f
#define SCHUMANN_N_HARMONICS 5
#define FIELD_4C_INPUTS 6
#define FIELD_4C_HIDDEN 8
#define FIELD_4C_OUTPUTS 4
/* ═══════════════════════════════════════════════════════════════════════════════
* RNG — xorshift64*. the field doesn't care which PRNG shapes it.
* ═══════════════════════════════════════════════════════════════════════════════ */
static uint64_t rng_state = 42;
static uint64_t rng_next(void) { rng_state ^= rng_state<<13; rng_state ^= rng_state>>7; rng_state ^= rng_state<<17; return rng_state; }
static float rand_uniform(void) { return (float)(rng_next()&0x7FFFFFFF)/(float)0x7FFFFFFF; }
static float rand_normal(void) { float u1=rand_uniform(),u2=rand_uniform(); if(u1<1e-10f)u1=1e-10f; return sqrtf(-2.0f*logf(u1))*cosf(6.2831853f*u2); }
static float clamp01(float x) { return x < 0 ? 0 : x > 1 ? 1 : x; }
/* ═══════════════════════════════════════════════════════════════════════════════
* AML FIELD STATE — the soul. from ariannamethod.c, distilled.
*
* θ = ε + γ + αδ is not a metaphor. it's the operating equation.
* ε (epsilon) = host weights. inference. the present. ephemeral.
* γ (gamma) = LoRA personality. training. the past. persistent.
* δ (delta) = field physics. prophecy. the future. directed.
* α (alpha) = injection strength. how much γ modulates ε.
*
* drift = |γ_t - γ_{t-1}| — how far the system has traveled.
* prophecy_debt = distance between manifested and destined.
* destiny = attractor in token space.
*
* the oracle does not predict. it prophesies.
* not minimize(predicted - actual) but minimize(destined - manifested).
* the difference is intention.
* ═══════════════════════════════════════════════════════════════════════════════ */
/* Velocity modes — movement IS language */
enum { VEL_NOMOVE=0, VEL_WALK, VEL_RUN, VEL_BACKWARD };
/* Seasons — 4.C Async Field Forever */
enum { SEASON_SPRING=0, SEASON_SUMMER, SEASON_AUTUMN, SEASON_WINTER };
typedef struct {
/* Prophecy physics */
int prophecy; /* prediction horizon (1-64) */
float destiny; /* bias toward most probable path (0-1) */
float destiny_bias; /* effective: destiny × prophecy_scale */
float debt; /* prophecy debt — accumulated deviation from destiny */
float debt_decay; /* decay rate per step */
/* Suffering — not a bug, a geometry */
float pain; /* compress logits toward mean */
float tension; /* accumulated pressure */
float dissonance; /* symmetry-break trigger */
/* Velocity — movement IS language */
int velocity_mode;
float effective_temp;
float base_temperature;
float time_direction; /* 1.0 forward, -1.0 backward */
/* Attention */
float attend_focus; /* sharpen top logits (0-1) */
float attend_spread; /* blur factor */
/* Laws of nature — enforced constraints */
float entropy_floor;
float resonance_ceiling;
float emergence_threshold;
/* Live metrics */
float entropy;
float resonance;
float emergence;
float field_health;
/* 4.C — Seasonal meta-operators */
int season;
float season_phase;
float season_intensity;
float spring_energy, summer_energy, autumn_energy, winter_energy;
/* Schumann resonance — Earth coupling */
float schumann_hz;
float schumann_coherence;
float schumann_phase;
float schumann_modulation;
/* Expert blending (4 internal experts for temperature) */
float expert_structural, expert_semantic, expert_creative, expert_precise;
/* Tunneling */
float tunnel_threshold;
float tunnel_chance;
int tunnel_skip_max;
/* Calendar drift (Hebrew-Gregorian conflict) */
float calendar_drift;
float calendar_phase;
float wormhole;
float wormhole_gate;
int wormhole_active;
/* NOTORCH parameters */
float notorch_lr;
float notorch_decay;
/* Identity */
float essence_alpha; /* γ injection strength */
float lora_alpha; /* δ voice strength */
/* Presence */
float presence_decay;
float presence_fade;
/* Dark matter — gravitational memory */
float dark_gravity;
/* Temporal debt */
float temporal_debt;
/* Step counter */
int step;
} FieldState;
/* 4.C MLP Controller — small neural net trained by Hebbian plasticity */
typedef struct {
float w1[FIELD_4C_INPUTS * FIELD_4C_HIDDEN];
float b1[FIELD_4C_HIDDEN];
float w2[FIELD_4C_HIDDEN * FIELD_4C_OUTPUTS];
float b2[FIELD_4C_OUTPUTS];
float hidden[FIELD_4C_HIDDEN];
} FieldMLP;
static FieldState F;
static FieldMLP F_mlp;
/* Schumann harmonics */
static const float g_schumann_harmonics[SCHUMANN_N_HARMONICS] = {
7.83f, 14.1f, 20.3f, 26.4f, 32.5f
};
static const float g_harmonic_weights[SCHUMANN_N_HARMONICS] = {
1.0f, 0.5f, 0.3f, 0.2f, 0.1f
};
/* Hebrew-Gregorian calendar */
static const int g_metonic_leaps[7] = {3, 6, 8, 11, 14, 17, 19};
static time_t g_epoch_t = 0;
static void calendar_init(void) {
struct tm ep = {0};
ep.tm_year = 2024 - 1900; ep.tm_mon = 9; ep.tm_mday = 3; ep.tm_hour = 12;
g_epoch_t = mktime(&ep);
}
static float calendar_dissonance(void) {
if (g_epoch_t <= 0) return 0;
int days = (int)(difftime(time(NULL), g_epoch_t) / 86400.0);
float years = (float)days / 365.25f;
float drift = years * 11.25f;
int full = (int)(years / 19); float corrections = (float)(full * 7) * 30.0f;
float partial = fmodf(years, 19.0f);
int yr = (int)partial + 1;
for (int i = 0; i < 7; i++) if (g_metonic_leaps[i] <= yr) corrections += 30.0f;
drift -= corrections;
float raw = fabsf(fmodf(drift, 33.0f)) / 33.0f;
return clamp01(raw);
}
static void field_mlp_init(void) {
memset(&F_mlp, 0, sizeof(F_mlp));
/* 4 specialist neurons — from AML core am_4c_init_weights */
F_mlp.w1[0 * FIELD_4C_HIDDEN + 0] = -2.0f; F_mlp.b1[0] = 0.5f;
F_mlp.w2[0 * FIELD_4C_OUTPUTS + 0] = 1.5f; /* low entropy → spring */
F_mlp.w1[1 * FIELD_4C_HIDDEN + 1] = 2.0f; F_mlp.b1[1] = -1.5f;
F_mlp.w2[1 * FIELD_4C_OUTPUTS + 2] = 1.5f; /* high resonance → autumn */
F_mlp.w1[2 * FIELD_4C_HIDDEN + 2] = 2.5f; F_mlp.b1[2] = -1.5f;
F_mlp.w2[2 * FIELD_4C_OUTPUTS + 3] = 1.5f; /* high pain → winter */
F_mlp.w1[4 * FIELD_4C_HIDDEN + 3] = 2.5f; F_mlp.b1[3] = -0.5f;
F_mlp.w2[3 * FIELD_4C_OUTPUTS + 1] = 1.5f; /* high emergence → summer */
/* cross-connections for nuance */
F_mlp.w1[3 * FIELD_4C_HIDDEN + 4] = 0.5f;
F_mlp.w1[5 * FIELD_4C_HIDDEN + 4] = -0.3f;
F_mlp.w2[4 * FIELD_4C_OUTPUTS + 0] = 0.3f;
F_mlp.w2[4 * FIELD_4C_OUTPUTS + 1] = -0.3f;
F_mlp.w1[0 * FIELD_4C_HIDDEN + 5] = -1.0f;
F_mlp.w1[1 * FIELD_4C_HIDDEN + 5] = 1.0f;
F_mlp.w2[5 * FIELD_4C_OUTPUTS + 2] = 0.5f;
F_mlp.w1[5 * FIELD_4C_HIDDEN + 6] = 1.5f; F_mlp.b1[6] = -1.0f;
F_mlp.w2[6 * FIELD_4C_OUTPUTS + 3] = 0.4f;
F_mlp.w1[4 * FIELD_4C_HIDDEN + 7] = 1.0f;
F_mlp.w1[2 * FIELD_4C_HIDDEN + 7] = -1.0f;
F_mlp.w2[7 * FIELD_4C_OUTPUTS + 1] = 0.5f;
}
static void field_init(void) {
memset(&F, 0, sizeof(F));
F.prophecy = 7;
F.destiny = 0.35f;
F.debt_decay = 0.998f;
F.velocity_mode = VEL_WALK;
F.base_temperature = 1.0f;
F.time_direction = 1.0f;
F.attend_focus = 0.70f;
F.attend_spread = 0.20f;
F.entropy_floor = 0.1f;
F.resonance_ceiling = 0.95f;
F.emergence_threshold = 0.3f;
F.season = SEASON_SPRING;
F.season_intensity = 0.5f;
F.spring_energy = 1.0f;
F.schumann_hz = SCHUMANN_BASE_HZ;
F.schumann_modulation = 0.3f;
F.schumann_coherence = 1.0f;
F.tunnel_threshold = 0.55f;
F.tunnel_chance = 0.05f;
F.tunnel_skip_max = 7;
F.calendar_drift = 11.0f;
F.wormhole = 0.02f;
F.wormhole_gate = 0.3f;
F.notorch_lr = 0.01f;
F.notorch_decay = 0.999f;
F.essence_alpha = 0.5f;
F.lora_alpha = 0.1f;
F.presence_decay = 1.0f;
F.presence_fade = 0.95f;
F.dark_gravity = 0.5f;
F.effective_temp = 0.85f;
F.expert_structural = 0.25f;
F.expert_semantic = 0.25f;
F.expert_creative = 0.25f;
F.expert_precise = 0.25f;
calendar_init();
field_mlp_init();
printf("[doe] θ = ε + γ + αδ — parliament awakens. prophecy=%d destiny=%.2f\n",
F.prophecy, F.destiny);
}
/* ─── Schumann resonance ─── */
static float schumann_coherence(float hz) {
float d = fabsf(hz - SCHUMANN_BASE_HZ), mx = 32.5f - 4.0f;
return clamp01(1.0f - (d/mx)*(d/mx));
}
static float schumann_signal(void) {
float s = 0, w = 0;
for (int i = 0; i < SCHUMANN_N_HARMONICS; i++) {
float hp = F.schumann_phase * (g_schumann_harmonics[i] / SCHUMANN_BASE_HZ);
s += g_harmonic_weights[i] * sinf(hp);
w += g_harmonic_weights[i];
}
return w > 0 ? s / w : 0;
}
/* ─── 4.C MLP forward ─── */
static void field_mlp_forward(const float *in, float *out) {
for (int h = 0; h < FIELD_4C_HIDDEN; h++) {
float s = F_mlp.b1[h];
for (int i = 0; i < FIELD_4C_INPUTS; i++) s += F_mlp.w1[i * FIELD_4C_HIDDEN + h] * in[i];
F_mlp.hidden[h] = tanhf(s);
}
for (int o = 0; o < FIELD_4C_OUTPUTS; o++) {
float s = F_mlp.b2[o];
for (int h = 0; h < FIELD_4C_HIDDEN; h++) s += F_mlp.w2[h * FIELD_4C_OUTPUTS + o] * F_mlp.hidden[h];
out[o] = tanhf(s);
}
}
/* ─── 4.C Hebbian update ─── */
static void field_mlp_hebbian(const float *in, const float *out, float signal) {
float lr = F.notorch_lr * 0.1f;
for (int h = 0; h < FIELD_4C_HIDDEN; h++)
for (int o = 0; o < FIELD_4C_OUTPUTS; o++) {
F_mlp.w2[h * FIELD_4C_OUTPUTS + o] += lr * F_mlp.hidden[h] * out[o] * signal;
if (F_mlp.w2[h*FIELD_4C_OUTPUTS+o] > 3.0f) F_mlp.w2[h*FIELD_4C_OUTPUTS+o] = 3.0f;
if (F_mlp.w2[h*FIELD_4C_OUTPUTS+o] < -3.0f) F_mlp.w2[h*FIELD_4C_OUTPUTS+o] = -3.0f;
}
for (int i = 0; i < FIELD_4C_INPUTS; i++)
for (int h = 0; h < FIELD_4C_HIDDEN; h++) {
F_mlp.w1[i * FIELD_4C_HIDDEN + h] += lr * in[i] * F_mlp.hidden[h] * signal;
if (F_mlp.w1[i*FIELD_4C_HIDDEN+h] > 3.0f) F_mlp.w1[i*FIELD_4C_HIDDEN+h] = 3.0f;
if (F_mlp.w1[i*FIELD_4C_HIDDEN+h] < -3.0f) F_mlp.w1[i*FIELD_4C_HIDDEN+h] = -3.0f;
}
}
/* ═══════════════════════════════════════════════════════════════════════════════
* FIELD STEP — the heartbeat. from AML am_step(), distilled for DOE.
* called per token. advances field physics by dt seconds.
*
* 1. calendar conflict → wormhole activation → dissonance bleed
* 2. debt decay (prophecy debt × decay_rate)
* 3. Schumann resonance → tension/dissonance healing
* 4. destiny bias computation
* 5. velocity + expert blending → effective temperature
* 6. law enforcement (entropy floor, resonance ceiling)
* 7. 4.C seasonal MLP controller + Hebbian update
* ═══════════════════════════════════════════════════════════════════════════════ */
static void field_step(float dt) {
if (dt <= 0) return;
F.step++;
/* ── Calendar conflict ── */
float cal_d = calendar_dissonance();
if (cal_d > F.wormhole_gate) {
F.wormhole_active = 1;
float excess = (cal_d - F.wormhole_gate) / (1.0f - F.wormhole_gate);
F.wormhole = clamp01(F.wormhole + excess * 0.1f * dt);
} else {
F.wormhole_active = 0;
F.wormhole *= 0.995f;
if (F.wormhole < 0.02f) F.wormhole = 0.02f;
}
if (cal_d > 0.3f) {
F.dissonance += (cal_d - 0.3f) * 0.05f * dt;
if (F.dissonance > 1.0f) F.dissonance = 1.0f;
}
F.debt += cal_d * 0.005f * dt;
/* ── Debt decay ── */
F.debt *= F.debt_decay;
if (F.debt > 100.0f) F.debt = 100.0f;
/* ── Temporal debt ── */
if (F.velocity_mode == VEL_BACKWARD) F.temporal_debt += 0.01f * dt;
else F.temporal_debt *= 0.9995f;
if (F.temporal_debt > 10.0f) F.temporal_debt = 10.0f;
/* ── Schumann resonance healing ── */
F.schumann_phase += F.schumann_hz * dt * 6.2831853f;
if (F.schumann_phase > 6.2831853f) F.schumann_phase = fmodf(F.schumann_phase, 6.2831853f);
F.schumann_coherence = schumann_coherence(F.schumann_hz);
if (F.schumann_coherence > 0 && F.schumann_modulation > 0) {
float cf = 0.5f + 0.5f * F.schumann_coherence;
float hm = 1.0f + schumann_signal() * 0.1f;
float heal = 0.998f - 0.003f * cf * F.schumann_modulation * hm;
F.tension *= heal;
F.dissonance *= heal;
}
/* ── Destiny bias ── */
float ps = 1.0f + ((float)F.prophecy - 7.0f) * 0.02f;
if (ps < 0.5f) ps = 0.5f; if (ps > 2.0f) ps = 2.0f;
F.destiny_bias = F.destiny * ps;
/* ── Velocity + expert blending → effective temperature ── */
{
float vm;
switch (F.velocity_mode) {
case VEL_NOMOVE: vm = 0.5f; F.time_direction = 1.0f; break;
case VEL_WALK: vm = 0.85f; F.time_direction = 1.0f; break;
case VEL_RUN: vm = 1.2f; F.time_direction = 1.0f; break;
case VEL_BACKWARD: vm = 0.7f; F.time_direction = -1.0f; break;
default: vm = 1.0f; F.time_direction = 1.0f;
}
float vt = F.base_temperature * vm;
float ws = F.expert_structural + F.expert_semantic + F.expert_creative + F.expert_precise;
if (ws > 0.001f) {
float et = (F.expert_structural*0.7f + F.expert_semantic*0.9f +
F.expert_creative*1.2f + F.expert_precise*0.5f) / ws;
F.effective_temp = 0.5f * vt + 0.5f * et;
} else F.effective_temp = vt;
float sm = 1.0f + F.summer_energy * 0.1f - F.winter_energy * 0.15f;
F.effective_temp *= sm;
if (F.effective_temp < 0.1f) F.effective_temp = 0.1f;
}
/* ── Law enforcement ── */
{
float re = (F.effective_temp - 0.5f)*0.3f + F.dissonance*0.3f +
F.tunnel_chance*0.2f + (1.0f - F.attend_focus)*0.2f;
F.entropy = fmaxf(F.entropy_floor, clamp01(re));
float rr = F.schumann_coherence*0.3f + (1.0f-F.dissonance)*0.3f +
F.attend_focus*0.2f + (1.0f - clamp01(F.debt*0.1f))*0.2f;
F.resonance = fminf(F.resonance_ceiling, clamp01(rr));
F.emergence = clamp01((1.0f - F.entropy) * F.resonance);
}
/* ── Presence fade ── */
F.presence_decay *= F.presence_fade;
if (F.presence_decay < 0.001f) F.presence_decay = 0.001f;
/* ── 4.C Seasonal MLP controller ── */
{
float sr = 0.001f;
F.season_phase += sr * dt;
if (F.season_phase >= 1.0f) { F.season_phase = 0; F.season = (F.season+1)%4; }
float gain = 0.02f * dt * F.season_intensity, fade = 0.995f;
F.spring_energy *= fade; F.summer_energy *= fade;
F.autumn_energy *= fade; F.winter_energy *= fade;
switch (F.season) {
case SEASON_SPRING: F.spring_energy = clamp01(F.spring_energy + gain); break;
case SEASON_SUMMER: F.summer_energy = clamp01(F.summer_energy + gain); break;
case SEASON_AUTUMN: F.autumn_energy = clamp01(F.autumn_energy + gain); break;
case SEASON_WINTER: F.winter_energy = clamp01(F.winter_energy + gain); break;
}
float mlp_in[FIELD_4C_INPUTS] = {
F.entropy, F.resonance, F.pain, F.tension, F.emergence, F.effective_temp
};
float mlp_out[FIELD_4C_OUTPUTS];
field_mlp_forward(mlp_in, mlp_out);
float sc = 0.02f * dt * F.season_intensity;
F.spring_energy = clamp01(F.spring_energy + mlp_out[0]*sc);
F.summer_energy = clamp01(F.summer_energy + mlp_out[1]*sc);
F.autumn_energy = clamp01(F.autumn_energy + mlp_out[2]*sc);
F.winter_energy = clamp01(F.winter_energy + mlp_out[3]*sc);
/* Hebbian: did the field improve? */
float health = clamp01((1.0f - fabsf(F.entropy - 0.5f)) * F.resonance * (1.0f - F.pain));
float sig = health - F.field_health;
F.field_health = health;
if (fabsf(sig) > 0.001f) field_mlp_hebbian(mlp_in, mlp_out, sig);
/* Season effects */
F.tunnel_chance = clamp01(F.tunnel_chance + F.spring_energy * 0.005f * dt);
F.dark_gravity = clamp01(F.dark_gravity + F.autumn_energy * 0.002f * dt);
}
}
/* ═══════════════════════════════════════════════════════════════════════════════
* PROPHECY DEBT — retroactive conscience.
* every token you choose that isn't the destined one accumulates debt.
* not minimize(predicted - actual) but minimize(destined - manifested).
* the difference is intention. the difference is identity.
* ═══════════════════════════════════════════════════════════════════════════════ */
static float compute_prophecy_debt(const float *logits, int chosen, int n) {
if (n <= 0 || chosen < 0 || chosen >= n) return 0;
float mx = logits[0];
for (int i = 1; i < n; i++) if (logits[i] > mx) mx = logits[i];
float diff = mx - logits[chosen];
return diff > 0 ? diff / (diff + 1.0f) : 0;
}
/* ═══════════════════════════════════════════════════════════════════════════════
* FIELD → LOGITS — the full pipeline. from AML am_apply_field_to_logits().
*
* 1. destiny bias: suppress low-probability tokens
* 2. suffering: compress toward mean (pain dampens extremes)
* 3. attention: sharpen or blur distribution
* 4. laws: entropy floor, resonance ceiling
*
* this is not post-processing. this is the architecture speaking.
* ═══════════════════════════════════════════════════════════════════════════════ */
static void apply_destiny(float *logits, int n) {
if (n <= 0 || F.destiny_bias < 0.001f) return;
float mx = logits[0];
for (int i = 1; i < n; i++) if (logits[i] > mx) mx = logits[i];
for (int i = 0; i < n; i++) {
float diff = mx - logits[i];
logits[i] -= diff * F.destiny_bias * 0.5f;
}
}
static void apply_suffering(float *logits, int n) {
if (n <= 0) return;
float total = F.pain + F.tension * 0.5f;
if (total < 0.01f) return;
float mean = 0;
for (int i = 0; i < n; i++) mean += logits[i];
mean /= n;
float compress = total * 0.3f;
for (int i = 0; i < n; i++) logits[i] = logits[i] * (1.0f - compress) + mean * compress;
}
static void apply_attention(float *logits, int n) {
if (n <= 0) return;
float focus = F.attend_focus;
if (focus < 0.01f) return;
float mx = logits[0];
for (int i = 1; i < n; i++) if (logits[i] > mx) mx = logits[i];
for (int i = 0; i < n; i++) {
float d = mx - logits[i];
logits[i] -= d * focus * 0.2f;
}
}
static void apply_field_to_logits(float *logits, int n) {
apply_destiny(logits, n);
apply_suffering(logits, n);
apply_attention(logits, n);
}
/* ═══════════════════════════════════════════════════════════════════════════════
* DEQUANTIZATION — Q4_0, Q8_0, Q4_K, Q6_K → f32
* Ported from nanollama/go/quant.go. Dequant at load time.
* ═══════════════════════════════════════════════════════════════════════════════ */
static float f16_to_f32(uint16_t h) {
uint32_t sign = (h >> 15) & 1, exp = (h >> 10) & 0x1F, mant = h & 0x3FF, f;
if (exp == 0) {
if (mant == 0) f = sign << 31;
else { exp = 1; while (!(mant & 0x400)) { mant <<= 1; exp--; } mant &= 0x3FF; f = (sign<<31)|((exp+127-15)<<23)|(mant<<13); }
} else if (exp == 31) f = (sign<<31)|0x7F800000|(mant<<13);
else f = (sign<<31)|((exp+127-15)<<23)|(mant<<13);
float r; memcpy(&r, &f, 4); return r;
}
/* Q4_0: block = 2 bytes f16 scale + 16 bytes (32 nibbles) = 18 bytes, 32 values */
#define Q4_0_BLOCK 32
#define Q4_0_BYTES 18
static void dequant_q4_0(const uint8_t *data, float *out, uint64_t n) {
uint64_t nblocks = n / Q4_0_BLOCK;
for (uint64_t i = 0; i < nblocks; i++) {
const uint8_t *b = data + i * Q4_0_BYTES;
float d = f16_to_f32(b[0] | (b[1] << 8));
for (int j = 0; j < 16; j++) {
int v0 = (b[2+j] & 0x0F) - 8;
int v1 = (b[2+j] >> 4) - 8;
out[i*Q4_0_BLOCK + j] = (float)v0 * d;
out[i*Q4_0_BLOCK + j + 16] = (float)v1 * d;
}
}
}
/* Q8_0: block = 2 bytes f16 scale + 32 bytes int8 = 34 bytes, 32 values */
#define Q8_0_BLOCK 32
#define Q8_0_BYTES 34
static void dequant_q8_0(const uint8_t *data, float *out, uint64_t n) {
uint64_t nblocks = n / Q8_0_BLOCK;
for (uint64_t i = 0; i < nblocks; i++) {
const uint8_t *b = data + i * Q8_0_BYTES;
float d = f16_to_f32(b[0] | (b[1] << 8));
for (int j = 0; j < 32; j++)
out[i*Q8_0_BLOCK + j] = (float)((int8_t)b[2+j]) * d;
}
}
/* Q4_K: block = 2+2 bytes f16 (d, dmin) + 12 bytes scales + 128 nibbles = 144 bytes, 256 values */
#define Q4_K_BLOCK 256
#define Q4_K_BYTES 144
static void get_scale_min_k4(int j, const uint8_t *sc, uint8_t *s, uint8_t *m) {
if (j < 4) { *s = sc[j] & 63; *m = sc[j+4] & 63; }
else { *s = (sc[j+4] & 0x0F) | ((sc[j-4] >> 6) << 4); *m = (sc[j+4] >> 4) | ((sc[j] >> 6) << 4); }
}
static void dequant_q4_k(const uint8_t *data, float *out, uint64_t n) {
uint64_t nblocks = n / Q4_K_BLOCK;
for (uint64_t i = 0; i < nblocks; i++) {
const uint8_t *b = data + i * Q4_K_BYTES;
float d = f16_to_f32(b[0] | (b[1] << 8));
float dmin = f16_to_f32(b[2] | (b[3] << 8));
const uint8_t *sc = b + 4, *qs = b + 16;
int is = 0, qi = 0, oi = (int)(i * Q4_K_BLOCK);
for (int j = 0; j < Q4_K_BLOCK; j += 64) {
uint8_t sc0, m0, sc1, m1v;
get_scale_min_k4(is, sc, &sc0, &m0);
float d1 = d * (float)sc0, mm1 = dmin * (float)m0;
get_scale_min_k4(is+1, sc, &sc1, &m1v);
float d2 = d * (float)sc1, mm2 = dmin * (float)m1v;
for (int l = 0; l < 32; l++)
out[oi + j + l] = d1 * (float)(qs[qi+l] & 0x0F) - mm1;
for (int l = 0; l < 32; l++)
out[oi + j + 32 + l] = d2 * (float)(qs[qi+l] >> 4) - mm2;
qi += 32; is += 2;
}
}
}
/* Q5_0: block = 2 bytes f16 scale + 4 bytes high bits + 16 bytes nibbles = 22 bytes, 32 values */
#define Q5_0_BLOCK 32
#define Q5_0_BYTES 22
static void dequant_q5_0(const uint8_t *data, float *out, uint64_t n) {
uint64_t nblocks = n / Q5_0_BLOCK;
for (uint64_t i = 0; i < nblocks; i++) {
const uint8_t *b = data + i * Q5_0_BYTES;
float d = f16_to_f32(b[0] | (b[1] << 8));
uint32_t qh = b[2] | (b[3]<<8) | (b[4]<<16) | (b[5]<<24);
const uint8_t *qs = b + 6;
for (int j = 0; j < 16; j++) {
int lo = qs[j] & 0x0F, hi = qs[j] >> 4;
int hbit0 = (qh >> j) & 1, hbit1 = (qh >> (j+16)) & 1;
out[i*Q5_0_BLOCK + j] = (float)((lo | (hbit0<<4)) - 16) * d;
out[i*Q5_0_BLOCK + j + 16] = (float)((hi | (hbit1<<4)) - 16) * d;
}
}
}
/* Q6_K: block = 128 ql + 64 qh + 16 scales + 2 d = 210 bytes, 256 values */
#define Q6_K_BLOCK 256
#define Q6_K_BYTES 210
static void dequant_q6_k(const uint8_t *data, float *out, uint64_t n) {
uint64_t nblocks = n / Q6_K_BLOCK;
for (uint64_t i = 0; i < nblocks; i++) {
const uint8_t *b = data + i * Q6_K_BYTES;
const uint8_t *ql = b, *qh = b + 128, *sc = b + 192;
float d = f16_to_f32(b[208] | (b[209] << 8));
int oi = (int)(i * Q6_K_BLOCK);
for (int n128 = 0; n128 < 2; n128++) {
const uint8_t *qlp = ql + n128*64, *qhp = qh + n128*32;
const uint8_t *scp = sc + n128*8;
int yo = oi + n128*128;
for (int l = 0; l < 32; l++) {
int is = l / 16;
int q1 = (qlp[l] & 0x0F) | ((qhp[l] >> 0) & 3) << 4;
int q2 = (qlp[l+32] & 0x0F) | ((qhp[l] >> 2) & 3) << 4;
int q3 = (qlp[l] >> 4) | ((qhp[l] >> 4) & 3) << 4;
int q4 = (qlp[l+32] >> 4) | ((qhp[l] >> 6) & 3) << 4;
out[yo+l+0] = d * (float)((int8_t)scp[is+0]) * (float)(q1-32);
out[yo+l+32] = d * (float)((int8_t)scp[is+2]) * (float)(q2-32);
out[yo+l+64] = d * (float)((int8_t)scp[is+4]) * (float)(q3-32);
out[yo+l+96] = d * (float)((int8_t)scp[is+6]) * (float)(q4-32);
}
}
}
}
/* bytes per element for each quant type (for raw data size calculation) */
static uint64_t quant_raw_bytes(uint32_t dtype, uint64_t n_elems) {
switch (dtype) {
case 0: return n_elems * 4; /* f32 */
case 1: return n_elems * 2; /* f16 */
case 2: return (n_elems / Q4_0_BLOCK) * Q4_0_BYTES; /* Q4_0 */
case 6: return (n_elems / Q5_0_BLOCK) * Q5_0_BYTES; /* Q5_0 */
case 8: return (n_elems / Q8_0_BLOCK) * Q8_0_BYTES; /* Q8_0 */
case 12: return (n_elems / Q4_K_BLOCK) * Q4_K_BYTES; /* Q4_K */
case 14: return (n_elems / Q6_K_BLOCK) * Q6_K_BYTES; /* Q6_K */
default: return 0;
}
}
/* ═══════════════════════════════════════════════════════════════════════════════
* MATH OPS — building blocks
* ═══════════════════════════════════════════════════════════════════════════════ */
static float silu_f(float x) { return x / (1.0f + expf(-x)); }
static void rmsnorm(float *out, const float *x, const float *w, int d, float eps) {
float ss = 0; for (int i = 0; i < d; i++) ss += x[i]*x[i];
float inv = 1.0f / sqrtf(ss/d + eps);
for (int i = 0; i < d; i++) out[i] = x[i] * inv * w[i];
}
/* threaded matvec worker */
typedef struct { float *out; const float *W; const float *x; int r0, r1, c; } MVWork;
static void *matvec_worker(void *arg) {
MVWork *w = (MVWork*)arg;
for (int i = w->r0; i < w->r1; i++) {
float s = 0; const float *row = w->W + (size_t)i * w->c;
for (int j = 0; j < w->c; j++) s += row[j] * w->x[j];
w->out[i] = s;
}
return NULL;
}
static int g_n_threads = 0;
static void matvec(float *out, const float *W, const float *x, int r, int c) {
#ifdef USE_CUBLAS
cublas_init();
float *dW = gpu_scratch(0,(size_t)r*c*4), *dx = gpu_scratch(1,(size_t)c*4), *dy = gpu_scratch(2,(size_t)r*4);
cudaMemcpy(dW, W, (size_t)r*c*4, cudaMemcpyHostToDevice);
cudaMemcpy(dx, x, (size_t)c*4, cudaMemcpyHostToDevice);
float a=1,b=0;
cublasSgemv(g_cublas, CUBLAS_OP_T, c, r, &a, dW, c, dx, 1, &b, dy, 1);
cudaMemcpy(out, dy, (size_t)r*4, cudaMemcpyDeviceToHost);
#elif defined(USE_BLAS)
cblas_sgemv(CblasRowMajor,CblasNoTrans,r,c,1.0f,W,c,x,1,0.0f,out,1);
#else
int nt = g_n_threads;
if (nt <= 1 || r < 64) {
for (int i = 0; i < r; i++) {
float s = 0; const float *row = W + (size_t)i*c;
for (int j = 0; j < c; j++) s += row[j] * x[j];
out[i] = s;
}
return;
}
if (nt > 32) nt = 32;
pthread_t thr[32]; MVWork work[32];
int chunk = (r + nt - 1) / nt;
int actual = 0;
for (int t = 0; t < nt; t++) {
int r0 = t * chunk, r1 = r0 + chunk;
if (r0 >= r) break;
if (r1 > r) r1 = r;
work[t] = (MVWork){out, W, x, r0, r1, c};
pthread_create(&thr[t], NULL, matvec_worker, &work[t]);
actual++;
}
for (int t = 0; t < actual; t++) pthread_join(thr[t], NULL);
#endif
}
static void softmax_n(float *x, int n) {
float mx = x[0]; for (int i = 1; i < n; i++) if (x[i] > mx) mx = x[i];
float s = 0; for (int i = 0; i < n; i++) { x[i] = expf(x[i]-mx); s += x[i]; }
for (int i = 0; i < n; i++) x[i] /= s;
}
static void apply_rope(float *v, int pos, float *cc, float *sc, int hd) {
int h = hd/2, off = pos*h; /* hd must be even — all standard archs are */
for (int i = 0; i < h; i++) {
float x0 = v[i], x1 = v[i+h];
v[i] = x0*cc[off+i] - x1*sc[off+i];
v[i+h] = x0*sc[off+i] + x1*cc[off+i];
}
}
/* ═══════════════════════════════════════════════════════════════════════════════
* HARMONIC RESONANCE ENGINE — from AML/DOE, adapted for field.
* each expert has a frequency. input gets fourier-decomposed.
* experts that resonate with input get boosted.
* ═══════════════════════════════════════════════════════════════════════════════ */
typedef struct {
float amplitudes[HARMONIC_N];
float dominant_freq;
float confidence;
} HarmonicState;
static void harmonic_decompose(HarmonicState *hs, float *hist, int len) {
float max_amp = 0; int max_k = 0;
for (int k = 0; k < HARMONIC_N && k < len/2; k++) {
float re = 0, im = 0;
for (int n = 0; n < len; n++) {
float angle = 6.2831853f * k * n / len;
re += hist[n] * cosf(angle);
im += hist[n] * sinf(angle);
}
hs->amplitudes[k] = sqrtf(re*re + im*im) / len;
if (k > 0 && hs->amplitudes[k] > max_amp) { max_amp = hs->amplitudes[k]; max_k = k; }
}
hs->dominant_freq = len > 0 ? 6.2831853f * max_k / len : 0;
float total = 0;
for (int k = 0; k < HARMONIC_N; k++) total += hs->amplitudes[k];
hs->confidence = total > 1e-8f ? max_amp / total : 0;
}
static float expert_resonance(float expert_freq, HarmonicState *hs) {
float res = 0;
for (int k = 0; k < HARMONIC_N; k++) {
float fk = 6.2831853f * k / HARMONIC_N;
float dist = fabsf(expert_freq - fk);
if (dist > 3.14159f) dist = 6.2831853f - dist;
res += hs->amplitudes[k] * expf(-dist*dist*2.0f);
}
return res;
}
/* ═══════════════════════════════════════════════════════════════════════════════
* WEIGHT PROFILER — DOE's sonar.
* before attaching, DOE profiles the host's weights.
* L2 norms per layer, spectral density, dead neuron ratio.
* this tells DOE where to focus its LoRA experts.
*
* the index is read-only. DOE is the architecture.
* weak layers get more LoRA. healthy layers get less.
* ═══════════════════════════════════════════════════════════════════════════════ */
typedef struct {
float l2_norm; /* L2 norm of layer weights */
float mean_abs; /* mean absolute value */
float std_dev; /* standard deviation */
float sparsity; /* fraction near zero (<1e-6) */
float spectral_energy; /* energy in top 10% singular values (approx) */
int dead_neurons; /* rows/cols with near-zero norm */
float health; /* composite: 0=dead, 1=vibrant */
} LayerProfile;
typedef struct {
LayerProfile layers[MAX_LAYERS];
int n_layers;
float overall_health; /* average layer health */
float code_affinity; /* estimated code capability (from weight stats) */
float complexity; /* model complexity metric */
uint64_t fingerprint; /* hash of weight statistics — identifies this host */
} WeightProfile;
static void profile_weights(float *data, int rows, int cols, LayerProfile *out) {
int n = rows * cols;
if (n == 0) { memset(out, 0, sizeof(LayerProfile)); return; }
float sum = 0, sum_sq = 0, sum_abs = 0;
int near_zero = 0;
for (int i = 0; i < n; i++) {
float v = data[i];
sum += v; sum_sq += v*v; sum_abs += fabsf(v);
if (fabsf(v) < 1e-6f) near_zero++;
}
float mean = sum / n;
out->l2_norm = sqrtf(sum_sq);
out->mean_abs = sum_abs / n;
out->std_dev = sqrtf(sum_sq/n - mean*mean);
out->sparsity = (float)near_zero / n;
/* Approximate spectral energy: sample random directions */
float top_energy = 0;
for (int trial = 0; trial < 8; trial++) {
float dot = 0;
for (int j = 0; j < cols; j++) {
float r = rand_normal();
float proj = 0;
for (int i = 0; i < rows; i++) proj += data[i*cols+j] * r;
dot += proj * proj;
}
top_energy += sqrtf(dot);
}
out->spectral_energy = top_energy / 8.0f;
/* Dead neurons: rows with near-zero norm */
out->dead_neurons = 0;
for (int r = 0; r < rows; r++) {
float rn = 0;
for (int c = 0; c < cols; c++) rn += data[r*cols+c] * data[r*cols+c];
if (sqrtf(rn) < 1e-4f) out->dead_neurons++;
}
/* Composite health */
float alive_ratio = 1.0f - (float)out->dead_neurons / (rows > 0 ? rows : 1);
float activity = fminf(1.0f, out->std_dev * 10.0f);
float density = 1.0f - out->sparsity;
out->health = alive_ratio * 0.4f + activity * 0.3f + density * 0.3f;
}
static uint64_t compute_fingerprint(WeightProfile *wp) {
uint64_t h = 14695981039346656037ULL;
for (int i = 0; i < wp->n_layers; i++) {
uint32_t bits;
memcpy(&bits, &wp->layers[i].l2_norm, 4);
h ^= (uint64_t)bits; h *= 1099511628211ULL;
memcpy(&bits, &wp->layers[i].std_dev, 4);
h ^= (uint64_t)bits; h *= 1099511628211ULL;
}
return h;
}
/* ═══════════════════════════════════════════════════════════════════════════════
* LIVING LoRA EXPERTS — DOE's democracy, adapted for symbiosis.
* instead of standalone FFN experts, these are LoRA overlays.
* each expert has A[dim, rank] and B[rank, dim] — Delta Voice injection.
* Delta Voice: out += α × A @ (B @ x)
*
* experts still live and die. overloaded → mitosis. neglected → apoptosis.
* but now they modulate the host's attention, not replace it.
* ═══════════════════════════════════════════════════════════════════════════════ */
typedef struct {
float *lora_A; /* [dim, rank] — output projection */
float *lora_B; /* [rank, dim] — input projection */
float frequency; /* position in harmonic space */
float vitality; /* 0.0=dying, 1.0=peak */
float specialization; /* entropy of routing distribution */
int age;
int tokens_seen;
int alive;
int low_vitality_count;
float attention_bias; /* per-expert attention scaling */
float layer_focus; /* per-expert residual contribution */
} LoraExpert;
typedef struct {
float *w_vote; /* [MAX_EXPERTS * dim] */
float consensus;
float faction_power[MAX_EXPERTS];
int election_count;
} Parliament;
typedef struct {
Parliament parliament;
LoraExpert experts[MAX_EXPERTS];
int n_alive;
int host_layer_idx; /* which host layer this wraps */
} FieldLayer;
/* ═══════════════════════════════════════════════════════════════════════════════
* INDEX STATE — the full host-DOE interface.
* mmap'd host model + DOE's living LoRA overlay + weight profile.
* ═══════════════════════════════════════════════════════════════════════════════ */
typedef struct {
/* Host model — mmap'd, read-only */
uint8_t *mmap_base;
size_t mmap_size;
int host_n_layers, host_dim, host_hidden, host_heads, host_kv_heads, host_head_dim;
int host_vocab;
char host_arch[64];
char host_path[256];
/* Host weight pointers (into mmap'd region) */
float *host_tok_emb;
float *host_output;
float *host_norm;
float rope_theta; /* RoPE frequency base (default 10000, Qwen=1000000) */
float rms_norm_eps; /* RMSNorm epsilon (default 1e-5, varies per arch) */
struct {
float *wq, *wk, *wv, *wo;
float *bq, *bk, *bv; /* attention biases (Qwen2, optional) */
float *ffn_gate, *ffn_up, *ffn_down;
float *ffn_gate_up; /* fused gate+up for Phi-3 (size: hidden*2 × dim) */
float *attn_norm, *ffn_norm;
} host_layers[MAX_LAYERS];
/* DOE's living overlay */
FieldLayer field_layers[MAX_LAYERS];
int n_field_layers;
/* Host profiling */
WeightProfile profile;
/* LoRA parameters */
int lora_rank;
float lora_alpha;
/* Active flag */
int active;
/* f16→f32 conversion buffers (must be freed on cleanup) */
float **f16_bufs;
int n_f16_bufs;
/* Tokenizer from GGUF metadata */
char **vocab_tokens; /* token strings, indexed by token id */
float *vocab_scores; /* BPE merge scores per token (SentencePiece) or from merges (GPT-2) */
int vocab_size; /* number of entries */
int bos_id, eos_id; /* special tokens */
int add_space_prefix;
int is_gpt2_bpe; /* 1 if tokenizer.ggml.model == "gpt2" */
/* GPT-2 BPE merges (used to build scores if no native scores) */
char **bpe_merges; /* merge strings "A B" */
int n_bpe_merges;
/* Token hash table for O(1) lookup */
int *tok_ht_ids; /* hash table: token id or -1 */
int tok_ht_cap; /* hash table capacity (power of 2) */
/* Chat template detection */
int chat_style; /* 0=raw, 1=chatml, 2=llama/mistral [INST], 3=zephyr, 4=phi, 5=gemma, 6=nanollama */
/* Identity & gamma */
int weightless; /* 1 if no doe_identity.gguf found */
char identity_tag[128]; /* doe.identity metadata from GGUF — empty if not DOE's own */
void *gamma_data; /* raw gamma binary blob */
int gamma_size; /* gamma blob size in bytes */
} GGUFIndex;
typedef struct { char name[96]; uint32_t ndim; uint64_t dims[4]; uint32_t dtype; uint64_t offset; } TensorInfo;
/* ═══════════════════════════════════════════════════════════════════════════════
* ENVIRONMENT SCANNER — DOE opens its eyes
* ═══════════════════════════════════════════════════════════════════════════════ */
typedef struct {
char path[256]; char arch[64]; int n_layers, dim, n_heads;
int64_t file_size; float compatibility;
} DiscoveredGGUF;
typedef struct {
DiscoveredGGUF ggufs[32]; int n_ggufs;
int64_t disk_free, mem_available;
int cpu_count, has_compiler, has_curl;
char self_path[256];
} Environment;
static int gguf_sniff(const char *path, DiscoveredGGUF *out) {
FILE *f = fopen(path, "rb");
if (!f) return 0;
struct stat st; fstat(fileno(f), &st); out->file_size = st.st_size;
snprintf(out->path, 256, "%s", path);
memset(out->arch, 0, 64); out->n_layers = 0; out->dim = 0; out->n_heads = 0;
uint32_t magic; if (fread(&magic, 4, 1, f) != 1 || magic != 0x46554747) { fclose(f); return 0; }
uint32_t version; fread(&version, 4, 1, f);
uint64_t n_tensors, n_kv; fread(&n_tensors, 8, 1, f); fread(&n_kv, 8, 1, f);
for (uint64_t i = 0; i < n_kv; i++) {
uint64_t klen; if (fread(&klen, 8, 1, f) != 1) break;
if (klen > 255) { fseek(f, klen + 4, SEEK_CUR); continue; }
char key[256]; if (fread(key, 1, klen, f) != klen) break; key[klen] = '\0';
uint32_t vtype; if (fread(&vtype, 4, 1, f) != 1) break;
if (vtype == 8) { /* string */
uint64_t vlen; fread(&vlen, 8, 1, f); char val[256];
int rl = vlen < 255 ? (int)vlen : 255; fread(val, 1, rl, f); val[rl] = '\0';
if (vlen > 255) fseek(f, vlen-255, SEEK_CUR);
if (strstr(key, "general.architecture")) snprintf(out->arch, 64, "%s", val);
} else if (vtype == 4) { uint32_t val; fread(&val, 4, 1, f);
if (strstr(key, "embedding_length")) out->dim = (int)val;
else if (strstr(key, "block_count")) out->n_layers = (int)val;
else if (strstr(key, "head_count") && !strstr(key, "kv")) out->n_heads = (int)val;
} else if (vtype == 0 || vtype == 1 || vtype == 7) fseek(f, 1, SEEK_CUR);
else if (vtype == 2 || vtype == 3) fseek(f, 2, SEEK_CUR);
else if (vtype == 5 || vtype == 6) fseek(f, 4, SEEK_CUR);
else if (vtype == 10 || vtype == 11 || vtype == 12) fseek(f, 8, SEEK_CUR);
else if (vtype == 9) { /* array */
uint32_t atype; fread(&atype, 4, 1, f);
uint64_t alen; fread(&alen, 8, 1, f);
size_t esz = 0;
if (atype == 0 || atype == 1 || atype == 7) esz = 1;
else if (atype == 2 || atype == 3) esz = 2;
else if (atype == 4 || atype == 5 || atype == 6) esz = 4;
else if (atype == 10 || atype == 11 || atype == 12) esz = 8;
else if (atype == 8) {
for (uint64_t ai = 0; ai < alen; ai++) {
uint64_t sl; if (fread(&sl, 8, 1, f) != 1) break;
fseek(f, sl, SEEK_CUR);
}
continue;
}
fseek(f, alen * esz, SEEK_CUR);
} else fseek(f, 4, SEEK_CUR); /* unknown — guess 4 */
}
fclose(f);
return (out->arch[0] != '\0' && out->dim > 0);
}
static void env_scan(Environment *env, const char *self_src) {
memset(env, 0, sizeof(Environment));
snprintf(env->self_path, 256, "%s", self_src);
env->cpu_count = (int)sysconf(_SC_NPROCESSORS_ONLN);
#ifdef __linux__
env->mem_available = (int64_t)sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE);
struct statvfs sv; if (statvfs(".", &sv) == 0) env->disk_free = (int64_t)sv.f_bavail * sv.f_frsize;
#elif defined(__APPLE__)
int64_t mem = 0; size_t len = sizeof(mem);
sysctlbyname("hw.memsize", &mem, &len, NULL, 0); env->mem_available = mem;
struct statfs sf; if (statfs(".", &sf) == 0) env->disk_free = (int64_t)sf.f_bavail * sf.f_bsize;
#endif
env->has_compiler = (system("which cc >/dev/null 2>&1") == 0);
env->has_curl = (system("which curl >/dev/null 2>&1") == 0);
FILE *p = popen("find . -name '*.gguf' -maxdepth 3 2>/dev/null", "r");
if (p) {
char line[256];
while (fgets(line, sizeof(line), p) && env->n_ggufs < 32) {
int len = strlen(line);
while (len > 0 && (line[len-1]=='\n' || line[len-1]=='\r')) line[--len] = '\0';
if (len == 0) continue;
DiscoveredGGUF dg;
if (gguf_sniff(line, &dg)) env->ggufs[env->n_ggufs++] = dg;
}
pclose(p);
}
printf("[env] cpu=%d mem=%.1fGB disk=%.1fGB compiler=%s curl=%s ggufs=%d\n",
env->cpu_count, (float)env->mem_available/(1024*1024*1024),
(float)env->disk_free/(1024*1024*1024),
env->has_compiler?"yes":"no", env->has_curl?"yes":"no", env->n_ggufs);
for (int i = 0; i < env->n_ggufs; i++)
printf(" [gguf] %s arch=%s dim=%d layers=%d %.1fMB\n",
env->ggufs[i].path, env->ggufs[i].arch, env->ggufs[i].dim,
env->ggufs[i].n_layers, (float)env->ggufs[i].file_size/(1024*1024));
}
/* ═══════════════════════════════════════════════════════════════════════════════
* INDEX LOAD — mmap GGUF, wire weight pointers, profile layers, attach LoRA.
* the weights are substrate. DOE is the architecture.
* ═══════════════════════════════════════════════════════════════════════════════ */
static void init_lora_expert(LoraExpert *e, int dim, int rank, float freq) {
e->lora_A = calloc(dim * rank, sizeof(float));
e->lora_B = calloc(rank * dim, sizeof(float));
float scale = 0.02f / sqrtf((float)rank);
for (int i = 0; i < dim*rank; i++) e->lora_A[i] = rand_normal() * scale;
for (int i = 0; i < rank*dim; i++) e->lora_B[i] = rand_normal() * scale;
e->frequency = freq;
e->vitality = 0.7f;
e->alive = 1;
e->attention_bias = 0.0f;
e->layer_focus = 1.0f;
e->low_vitality_count = 0;
}
static void free_lora_expert(LoraExpert *e) {
free(e->lora_A); free(e->lora_B);
e->lora_A = e->lora_B = NULL;
e->alive = 0; e->vitality = 0;
}
static int tok_lookup(GGUFIndex *ps, const char *s, int len);
static void tok_ht_build(GGUFIndex *ps);
static void build_gpt2_scores(GGUFIndex *ps);
static int index_load(GGUFIndex *ps, const char *path) {
memset(ps, 0, sizeof(GGUFIndex));
snprintf(ps->host_path, 256, "%s", path);
ps->lora_rank = LORA_RANK;
ps->lora_alpha = F.lora_alpha;
ps->bos_id = 1; ps->eos_id = 2; /* defaults, overridden by GGUF */
ps->rope_theta = 10000.0f;
ps->rms_norm_eps = 1e-5f;
ps->add_space_prefix = 1;
int fd = open(path, O_RDONLY);
if (fd < 0) { printf("[doe] cannot open %s\n", path); return 0; }
struct stat st; fstat(fd, &st);
ps->mmap_size = st.st_size;
ps->mmap_base = mmap(NULL, ps->mmap_size, PROT_READ, MAP_PRIVATE, fd, 0);
close(fd);
if (ps->mmap_base == MAP_FAILED) { ps->mmap_base = NULL; return 0; }
/* Parse GGUF header */
uint8_t *p = ps->mmap_base, *pend = ps->mmap_base + ps->mmap_size;
#define PC(n) do { if (p + (n) > pend) goto bail; } while(0)
PC(4); uint32_t magic = *(uint32_t*)p; p += 4;
if (magic != 0x46554747) goto bail;
PC(4); p += 4; /* version */
PC(8); uint64_t n_tensors = *(uint64_t*)p; p += 8;
PC(8); uint64_t n_kv = *(uint64_t*)p; p += 8;
for (uint64_t i = 0; i < n_kv; i++) {
PC(8); uint64_t klen = *(uint64_t*)p; p += 8;
if (klen > 255) { p += klen + 4; continue; } /* skip long keys */
char key[256]; memcpy(key, p, klen); key[klen] = '\0'; p += klen;
PC(4); uint32_t vtype = *(uint32_t*)p; p += 4;
if (vtype == 8) { /* string */
PC(8); uint64_t vlen = *(uint64_t*)p; p += 8;
if (strstr(key, "general.architecture") && vlen < 64) {
memcpy(ps->host_arch, p, vlen); ps->host_arch[vlen] = 0;
}
if (strstr(key, "tokenizer.ggml.model") && vlen < 20) {
char tok_model[24]; memcpy(tok_model, p, vlen); tok_model[vlen] = 0;
if (strcmp(tok_model, "gpt2") == 0) ps->is_gpt2_bpe = 1;
}
/* DOE identity fingerprint — this GGUF is DOE's own */
if (strcmp(key, "doe.identity") == 0 && vlen < 128) {
memcpy(ps->identity_tag, p, vlen); ps->identity_tag[vlen] = 0;
printf("[identity] GGUF self-identifies: \"%s\"\n", ps->identity_tag);
}
/* Detect chat template style from template string */
if (strstr(key, "chat_template") && vlen > 10 && vlen < 100000) {
/* Search for distinctive patterns in the Jinja template */
char *tmpl = malloc(vlen + 1); memcpy(tmpl, p, vlen); tmpl[vlen] = 0;
if (strstr(tmpl, "im_start")) ps->chat_style = 1; /* ChatML */
else if (strstr(tmpl, "[INST]")) ps->chat_style = 2; /* Llama/Mistral */
else if (strstr(tmpl, "<|user|>")) ps->chat_style = 3; /* Zephyr */
else if (strstr(tmpl, "<|end|>")) ps->chat_style = 4; /* Phi */
else if (strstr(tmpl, "start_of_turn")) ps->chat_style = 5; /* Gemma */
free(tmpl);
}
p += vlen;
} else if (vtype == 4) { /* uint32 */
PC(4); uint32_t val = *(uint32_t*)p; p += 4;
if (strstr(key, "embedding_length")) ps->host_dim = (int)val;
else if (strstr(key, "block_count")) ps->host_n_layers = (int)val;
else if (strstr(key, "head_count") && !strstr(key, "kv")) ps->host_heads = (int)val;
else if (strstr(key, "head_count_kv")) ps->host_kv_heads = (int)val;
else if (strstr(key, "feed_forward_length")) ps->host_hidden = (int)val;
else if (strstr(key, "vocab_size")) ps->host_vocab = (int)val;
else if (strstr(key, "bos_token_id")) ps->bos_id = (int)val;
else if (strstr(key, "eos_token_id")) ps->eos_id = (int)val;
else if (strstr(key, "add_space_prefix")) ps->add_space_prefix = (int)val;
} else if (vtype == 6) { /* float32 */
PC(4); float fval; memcpy(&fval, p, 4); p += 4;
if (strstr(key, "rope.freq_base")) ps->rope_theta = fval;
else if (strstr(key, "layer_norm_rms_epsilon")) ps->rms_norm_eps = fval;
} else if (vtype == 0 || vtype == 7) {
PC(1); uint8_t bval = *p; p += 1;
if (strstr(key, "add_space_prefix")) ps->add_space_prefix = bval;
} else if (vtype == 1) p += 1; /* int8 */
else if (vtype == 2 || vtype == 3) p += 2; /* uint16, int16 */
else if (vtype == 5) p += 4; /* int32 */
else if (vtype == 10 || vtype == 11 || vtype == 12) p += 8; /* uint64, int64, float64 */
else if (vtype == 9) { /* array */
PC(4); uint32_t atype = *(uint32_t*)p; p += 4;
PC(8); uint64_t alen = *(uint64_t*)p; p += 8;
size_t elem_sz = 0;
if (atype == 0 || atype == 1 || atype == 7) elem_sz = 1;
else if (atype == 2 || atype == 3) elem_sz = 2;
else if (atype == 4 || atype == 5 || atype == 6) {
elem_sz = 4;
/* float32 array: tokenizer.ggml.scores */
if (atype == 6 && strstr(key, "tokenizer.ggml.scores") && alen < 200000) {
ps->vocab_scores = malloc(alen * sizeof(float));
memcpy(ps->vocab_scores, p, alen * 4);
}
}
else if (atype == 10 || atype == 11 || atype == 12) elem_sz = 8;
else if (atype == 8) {
/* array of strings */
int is_vocab = strstr(key, "tokenizer.ggml.tokens") != NULL;
int is_merges = strstr(key, "tokenizer.ggml.merges") != NULL;
if (is_vocab && alen < 200000) {
ps->vocab_tokens = calloc(alen, sizeof(char*));
ps->vocab_size = (int)alen;
}
if (is_merges && alen < 500000) {
ps->bpe_merges = calloc(alen, sizeof(char*));
ps->n_bpe_merges = (int)alen;
}
for (uint64_t ai = 0; ai < alen && p < pend; ai++) {
PC(8); uint64_t slen = *(uint64_t*)p; p += 8;
if (slen > 1000000 || p + slen > pend) break; /* sanity */
if (is_vocab && ps->vocab_tokens && ai < (uint64_t)ps->vocab_size) {
ps->vocab_tokens[ai] = malloc(slen + 1);
memcpy(ps->vocab_tokens[ai], p, slen);
ps->vocab_tokens[ai][slen] = '\0';
}
if (is_merges && ps->bpe_merges && ai < (uint64_t)ps->n_bpe_merges) {
ps->bpe_merges[ai] = malloc(slen + 1);
memcpy(ps->bpe_merges[ai], p, slen);
ps->bpe_merges[ai][slen] = '\0';
}
p += slen;
}
continue;
}
p += alen * elem_sz;
} else { p += 4; } /* unknown — guess 4 bytes */
}
if (ps->host_dim == 0 || ps->host_n_layers == 0) goto bail;
if (ps->host_heads == 0) ps->host_heads = ps->host_dim / 64;
if (ps->host_kv_heads == 0) ps->host_kv_heads = ps->host_heads;
ps->host_head_dim = ps->host_dim / ps->host_heads;
if (ps->host_hidden == 0) ps->host_hidden = ps->host_dim * 4;
/* Parse tensor info */
if (n_tensors > 20000) goto bail;
TensorInfo *tinfo = calloc(n_tensors, sizeof(TensorInfo));
for (uint64_t i = 0; i < n_tensors; i++) {
PC(8); uint64_t nlen = *(uint64_t*)p; p += 8;
if (nlen > 256) { free(tinfo); goto bail; }
int nl = nlen < 95 ? (int)nlen : 95;
PC(nlen); memcpy(tinfo[i].name, p, nl); tinfo[i].name[nl] = '\0'; p += nlen;
PC(4); tinfo[i].ndim = *(uint32_t*)p; p += 4;
if (tinfo[i].ndim > 4) { free(tinfo); goto bail; }
for (uint32_t d = 0; d < tinfo[i].ndim; d++) { PC(8); tinfo[i].dims[d] = *(uint64_t*)p; p += 8; }
PC(4); tinfo[i].dtype = *(uint32_t*)p; p += 4;
PC(8); tinfo[i].offset = *(uint64_t*)p; p += 8;
}
uint64_t header_size = p - ps->mmap_base;
uint64_t data_start = ((header_size + 31) / 32) * 32;
/* dequantized f32 buffers — tracked in GGUFIndex for cleanup */
ps->f16_bufs = NULL; ps->n_f16_bufs = 0;
/* Wire weight pointers — supports f32, f16, Q4_0, Q8_0, Q4_K, Q6_K */
int wired = 0;
for (uint64_t i = 0; i < n_tensors; i++) {
uint32_t dt = tinfo[i].dtype;
if (dt != 0 && dt != 1 && dt != 2 && dt != 6 && dt != 8 && dt != 12 && dt != 14) continue;
uint64_t n_elems = 1;
for (uint32_t d = 0; d < tinfo[i].ndim; d++) n_elems *= tinfo[i].dims[d];
uint64_t raw_bytes = quant_raw_bytes(dt, n_elems);
uint64_t byte_offset = data_start + tinfo[i].offset;
if (raw_bytes == 0 || byte_offset + raw_bytes > ps->mmap_size) {
if (raw_bytes > 0)
printf("[doe] WARNING: tensor %s OOB (%lu+%lu > %lu), skipping\n",
tinfo[i].name, (unsigned long)byte_offset, (unsigned long)raw_bytes,
(unsigned long)ps->mmap_size);
continue;
}
float *data;
const uint8_t *src = ps->mmap_base + byte_offset;
if (dt == 0) {
data = (float*)src; /* f32: point directly into mmap */
} else {
/* dequantize to f32 */
data = malloc(n_elems * sizeof(float));
if (dt == 1) { /* f16 */
const uint16_t *h = (const uint16_t*)src;
for (uint64_t j = 0; j < n_elems; j++) data[j] = f16_to_f32(h[j]);
} else if (dt == 2) dequant_q4_0(src, data, n_elems);
else if (dt == 6) dequant_q5_0(src, data, n_elems);
else if (dt == 8) dequant_q8_0(src, data, n_elems);
else if (dt == 12) dequant_q4_k(src, data, n_elems);
else if (dt == 14) dequant_q6_k(src, data, n_elems);
ps->f16_bufs = realloc(ps->f16_bufs, (ps->n_f16_bufs+1)*sizeof(float*));
ps->f16_bufs[ps->n_f16_bufs++] = data;
}
char *n = tinfo[i].name;
/* debug: if (i < 15) printf("[tensor] %s dims=[%lu,%lu]\n", n, (unsigned long)tinfo[i].dims[0], (unsigned long)tinfo[i].dims[1]); */
if (strcmp(n, "token_embd.weight") == 0) {
ps->host_tok_emb = data;
if (ps->host_vocab == 0) ps->host_vocab = (int)tinfo[i].dims[1];
wired++;
}
else if (strcmp(n, "output_norm.weight") == 0) { ps->host_norm = data; wired++; }
else if (strcmp(n, "output.weight") == 0) { ps->host_output = data; wired++; }
else {
int l = -1; sscanf(n, "blk.%d.", &l);
if (l >= 0 && l < MAX_LAYERS && l < ps->host_n_layers) {
if (strstr(n, "attn_q.weight")) { ps->host_layers[l].wq = data; wired++; }
else if (strstr(n, "attn_k.weight")) { ps->host_layers[l].wk = data; wired++; }
else if (strstr(n, "attn_v.weight")) { ps->host_layers[l].wv = data; wired++; }
else if (strstr(n, "attn_output.weight")) { ps->host_layers[l].wo = data; wired++; }
else if (strstr(n, "attn_q.bias")) { ps->host_layers[l].bq = data; wired++; }
else if (strstr(n, "attn_k.bias")) { ps->host_layers[l].bk = data; wired++; }
else if (strstr(n, "attn_v.bias")) { ps->host_layers[l].bv = data; wired++; }
else if (strstr(n, "ffn_gate.weight") && !strstr(n, "ffn_gate_inp") && !strstr(n, "ffn_gate_up")) { ps->host_layers[l].ffn_gate = data; wired++; }
else if (strstr(n, "ffn_up.weight") && !strstr(n, "gate_up")) {
/* Check if fused gate+up: dims[1] > host_hidden means [dim, hidden*2] */
if (ps->host_hidden > 0 && (int)tinfo[i].dims[1] > ps->host_hidden * 3 / 2) {
ps->host_layers[l].ffn_gate_up = data;
} else {
ps->host_layers[l].ffn_up = data;
}
wired++;
}
else if (strstr(n, "ffn_down.weight")) { ps->host_layers[l].ffn_down = data; wired++; }
else if (strstr(n, "ffn_gate_up_proj") || strstr(n, "ffn_gate_up.weight")) { ps->host_layers[l].ffn_gate_up = data; wired++; }
else if (strstr(n, "attn_norm.weight")) { ps->host_layers[l].attn_norm = data; wired++; }
else if (strstr(n, "ffn_norm.weight")) { ps->host_layers[l].ffn_norm = data; wired++; }
else if (l == 0 && strstr(n, "ffn")) { printf("[doe] unwired FFN tensor: %s\n", n); }
}
}
}
free(tinfo);
/* tied embeddings: if output.weight missing, reuse token_embd.weight */
if (!ps->host_output && ps->host_tok_emb) {
ps->host_output = ps->host_tok_emb;
printf("[doe] output.weight missing — using tied embeddings\n");
}
if (!ps->host_tok_emb || !ps->host_output || !ps->host_norm) {
printf("[doe] host missing essential weights (tok_emb=%d out=%d norm=%d). abandoning.\n",
ps->host_tok_emb!=NULL, ps->host_output!=NULL, ps->host_norm!=NULL);
goto bail;
}
/* Check for standard FFN (skip MoE hosts for now) */
int has_ffn = 0;
for (int l = 0; l < ps->host_n_layers && l < MAX_LAYERS; l++) {
if (ps->host_layers[l].ffn_gate && ps->host_layers[l].ffn_up && ps->host_layers[l].ffn_down) has_ffn = 1;
if (ps->host_layers[l].ffn_gate_up && ps->host_layers[l].ffn_down) has_ffn = 1;
}
if (!has_ffn) {
printf("[doe] host has no standard FFN. DOE needs a plain transformer.\n");
goto bail;
}
/* ── Weight profiling — the sonar ── */
printf("[sonar] profiling host weights...\n");
ps->profile.n_layers = ps->host_n_layers;
for (int l = 0; l < ps->host_n_layers && l < MAX_LAYERS; l++) {
if (ps->host_layers[l].ffn_gate)
profile_weights(ps->host_layers[l].ffn_gate, ps->host_hidden, ps->host_dim, &ps->profile.layers[l]);
else
memset(&ps->profile.layers[l], 0, sizeof(LayerProfile));
}
float total_h = 0;
for (int l = 0; l < ps->profile.n_layers; l++) total_h += ps->profile.layers[l].health;
ps->profile.overall_health = total_h / (ps->profile.n_layers > 0 ? ps->profile.n_layers : 1);
ps->profile.complexity = (float)ps->host_dim * ps->host_n_layers * ps->host_heads;
ps->profile.fingerprint = compute_fingerprint(&ps->profile);
printf("[sonar] host fingerprint: %016llx health=%.2f complexity=%.0f\n",
(unsigned long long)ps->profile.fingerprint, ps->profile.overall_health, ps->profile.complexity);
for (int l = 0; l < ps->host_n_layers && l < MAX_LAYERS; l++) {
LayerProfile *lp = &ps->profile.layers[l];
if (lp->l2_norm > 0)
printf(" L%d: health=%.2f l2=%.2f std=%.4f sparse=%.1f%% dead=%d\n",
l, lp->health, lp->l2_norm, lp->std_dev, lp->sparsity*100, lp->dead_neurons);
}
/* ── Initialize living LoRA experts per layer ── */
int initial_experts = ps->host_n_layers <= 8 ? 4 : ps->host_n_layers <= 16 ? 6 : 8;
ps->n_field_layers = ps->host_n_layers;
if (ps->n_field_layers > MAX_LAYERS) ps->n_field_layers = MAX_LAYERS;
for (int l = 0; l < ps->n_field_layers; l++) {
FieldLayer *fl = &ps->field_layers[l];
fl->host_layer_idx = l;
fl->n_alive = initial_experts;
fl->parliament.w_vote = calloc(MAX_EXPERTS * ps->host_dim, sizeof(float));
float vote_std = 0.01f;
for (int i = 0; i < MAX_EXPERTS * ps->host_dim; i++)
fl->parliament.w_vote[i] = rand_normal() * vote_std;
fl->parliament.consensus = 0.5f;
/* Initialize experts with harmonic spacing — health-aware */
float layer_health = ps->profile.layers[l].health;
for (int e = 0; e < MAX_EXPERTS; e++) {
if (e < initial_experts) {
float freq = 6.2831853f * e / initial_experts;
init_lora_expert(&fl->experts[e], ps->host_dim, ps->lora_rank, freq);
/* Weaker layers get stronger initial LoRA — DOE compensates */
if (layer_health < 0.5f) {
float boost = (0.5f - layer_health) * 2.0f;
for (int i = 0; i < ps->host_dim * ps->lora_rank; i++) {
fl->experts[e].lora_A[i] *= (1.0f + boost);
fl->experts[e].lora_B[i] *= (1.0f + boost);
}
}
} else {
memset(&fl->experts[e], 0, sizeof(LoraExpert));
}
}
}
ps->active = 1;
/* Build token hash table for O(1) lookup, then GPT-2 BPE scores */
tok_ht_build(ps);
build_gpt2_scores(ps);
printf("[doe] attached to %s (arch=%s dim=%d layers=%d heads=%d kv=%d vocab=%d %.1fMB)\n",
path, ps->host_arch, ps->host_dim, ps->host_n_layers, ps->host_heads,
ps->host_kv_heads, ps->host_vocab, (float)ps->mmap_size/(1024*1024));
printf("[doe] rope_theta=%.0f rms_eps=%.1e bias=%s\n",
ps->rope_theta, ps->rms_norm_eps,
ps->host_layers[0].bq ? "yes" : "no");
if (ps->is_gpt2_bpe) printf("[doe] tokenizer: GPT-2 BPE (%d merges)\n", ps->n_bpe_merges);
/* Auto-detect nanollama chat style from identity tag or vocab tokens */
if (ps->chat_style == 0 && (ps->identity_tag[0] ||
tok_lookup(ps, "<|user_start|>", 14) >= 0)) ps->chat_style = 6;
{ const char *cs[] = {"raw","chatml","inst","zephyr","phi","gemma","nanollama"};
printf("[doe] chat: %s\n", cs[ps->chat_style < 7 ? ps->chat_style : 0]); }
printf("[doe] LoRA rank=%d alpha=%.2f experts=%d/layer — parliament is alive.\n",
ps->lora_rank, ps->lora_alpha, initial_experts);
#undef PC
return 1;
bail:
for (int i = 0; i < ps->n_f16_bufs; i++) free(ps->f16_bufs[i]);
free(ps->f16_bufs); ps->f16_bufs = NULL; ps->n_f16_bufs = 0;
if (ps->mmap_base) { munmap(ps->mmap_base, ps->mmap_size); ps->mmap_base = NULL; }
printf("[doe] GGUF parse failed.\n");
return 0;
}
static void index_free(GGUFIndex *ps) {
for (int l = 0; l < ps->n_field_layers; l++) {
free(ps->field_layers[l].parliament.w_vote);
for (int e = 0; e < MAX_EXPERTS; e++)
if (ps->field_layers[l].experts[e].alive)
free_lora_expert(&ps->field_layers[l].experts[e]);
}
for (int i = 0; i < ps->n_f16_bufs; i++) free(ps->f16_bufs[i]);
free(ps->f16_bufs);
if (ps->vocab_tokens) {
for (int i = 0; i < ps->vocab_size; i++) free(ps->vocab_tokens[i]);
free(ps->vocab_tokens);
}
free(ps->vocab_scores);
if (ps->bpe_merges) {
for (int i = 0; i < ps->n_bpe_merges; i++) free(ps->bpe_merges[i]);
free(ps->bpe_merges);
}
if (ps->mmap_base) munmap(ps->mmap_base, ps->mmap_size);
memset(ps, 0, sizeof(GGUFIndex));
}
/* ═══════════════════════════════════════════════════════════════════════════════
* PARLIAMENT ELECTION — variable-k over LoRA experts
* ═══════════════════════════════════════════════════════════════════════════════ */
static int parliament_elect(Parliament *p, LoraExpert *experts, float *input, int dim,
HarmonicState *hs, int *selected, float *weights) {
int n_alive = 0, alive_idx[MAX_EXPERTS];
for (int e = 0; e < MAX_EXPERTS; e++) if (experts[e].alive) alive_idx[n_alive++] = e;
if (n_alive < MIN_EXPERTS) return 0;
float votes[MAX_EXPERTS]; float max_vote = -1e30f;
for (int i = 0; i < n_alive; i++) {
int e = alive_idx[i];
float *row = p->w_vote + e * dim;
float dot = 0;
for (int j = 0; j < dim; j++) dot += row[j] * input[j];
float res = expert_resonance(experts[e].frequency, hs);
votes[e] = dot + 0.1f * res;
if (votes[e] > max_vote) max_vote = votes[e];
}
float mean_v = 0;
for (int i = 0; i < n_alive; i++) mean_v += votes[alive_idx[i]];
mean_v /= n_alive;
float var_v = 0;
for (int i = 0; i < n_alive; i++) { float d = votes[alive_idx[i]] - mean_v; var_v += d*d; }
var_v /= n_alive;
float consensus = fminf(1.0f, sqrtf(var_v + 1e-8f) / (fabsf(mean_v) + 1.0f));
p->consensus = 0.9f * p->consensus + 0.1f * consensus;
int k = (int)(n_alive * (1.0f - p->consensus));
if (k < 2) k = 2; if (k > n_alive) k = n_alive;
int used[MAX_EXPERTS] = {0};
for (int ki = 0; ki < k; ki++) {
float bv = -1e30f; int bi = 0;
for (int i = 0; i < n_alive; i++) {
int e = alive_idx[i];
if (!used[e] && votes[e] > bv) { bv = votes[e]; bi = e; }
}
selected[ki] = bi; weights[ki] = votes[bi]; used[bi] = 1;
}
float mx = weights[0];
for (int i = 1; i < k; i++) if (weights[i] > mx) mx = weights[i];
float sum = 0;
for (int i = 0; i < k; i++) { weights[i] = expf(weights[i]-mx); sum += weights[i]; }
for (int i = 0; i < k; i++) weights[i] /= sum;
p->election_count++;
return k;
}
/* ═══════════════════════════════════════════════════════════════════════════════
* NOTORCH — Hebbian plasticity for LoRA experts. from AML core.
* no backprop. synapse strengthens from co-activation.
* signal-gated: prophecy debt drives learning direction.
* ═══════════════════════════════════════════════════════════════════════════════ */
static int notorch_offset = 0; /* rotating window into LoRA rank */
static void notorch_step(float *A, float *B, int out_dim, int in_dim, int rank,
const float *x, const float *dy, float signal) {
if (fabsf(signal) < 1e-8f) return;
float lr = F.notorch_lr * signal;
/* NOTORCH operates at rank 4 but rotates across all LORA_RANK components.
* each call updates 4 components starting at notorch_offset.
* after rank/4 calls, every component has been updated once. */
int nr = NOTORCH_RANK;
if (nr > rank) nr = rank;
int base = notorch_offset % rank;
float u[NOTORCH_RANK];
for (int j = 0; j < nr; j++) {
int r = (base + j) % rank;
float s = 0;
for (int i = 0; i < out_dim && i < in_dim; i++) s += B[i * rank + r] * dy[i];
u[j] = s + rand_normal() * 0.01f;
}
#ifdef USE_BLAS
for (int j = 0; j < nr; j++) {
int r = (base + j) % rank;
cblas_saxpy(in_dim, lr * u[j], x, 1, A + r, rank);
}
#else
for (int i = 0; i < in_dim; i++)
for (int j = 0; j < nr; j++) {
int r = (base + j) % rank;
A[i * rank + r] += lr * x[i] * u[j];
}
#endif
/* decay only the components we touched */
float decay = F.notorch_decay;
for (int j = 0; j < nr; j++) {
int r = (base + j) % rank;
for (int i = 0; i < out_dim; i++) B[i * rank + r] *= decay;
}
notorch_offset = (base + nr) % rank;
}
/* ═══════════════════════════════════════════════════════════════════════════════
* VITALITY + MITOSIS + APOPTOSIS — LoRA experts live and die
* ═══════════════════════════════════════════════════════════════════════════════ */
static void update_expert_vitality(FieldLayer *fl, int total_tokens) {
int na = 0;
for (int e = 0; e < MAX_EXPERTS; e++) if (fl->experts[e].alive) na++;
if (na == 0) return;
float fair = (float)total_tokens / na;
for (int e = 0; e < MAX_EXPERTS; e++) {
if (!fl->experts[e].alive) continue;
LoraExpert *exp = &fl->experts[e];
float ratio = fair > 0 ? (float)exp->tokens_seen / fair : 1.0f;
exp->vitality += (ratio - 1.0f) * 0.05f;
if (exp->vitality < 0) exp->vitality = 0;
if (exp->vitality > 1) exp->vitality = 1;
exp->age++;
if (exp->vitality < 0.1f) exp->low_vitality_count++;
else exp->low_vitality_count = 0;
exp->tokens_seen = 0;
}
fl->n_alive = na;
}
static int try_mitosis(FieldLayer *fl, int dim, int rank) {
int na = 0;
for (int e = 0; e < MAX_EXPERTS; e++) if (fl->experts[e].alive) na++;
if (na >= MAX_EXPERTS) return 0;
int parent = -1;
for (int e = 0; e < MAX_EXPERTS; e++) {
if (!fl->experts[e].alive) continue;
if (fl->experts[e].vitality > 0.8f && fl->experts[e].age > 20) { parent = e; break; }
}
if (parent < 0) return 0;
int child = -1;
for (int e = 0; e < MAX_EXPERTS; e++) if (!fl->experts[e].alive) { child = e; break; }
if (child < 0) return 0;
LoraExpert *p = &fl->experts[parent];
float cf = p->frequency + 3.14159f / (na + 1);
if (cf > 6.2831853f) cf -= 6.2831853f;
init_lora_expert(&fl->experts[child], dim, rank, cf);
LoraExpert *ch = &fl->experts[child];
for (int i = 0; i < dim*rank; i++) ch->lora_A[i] = p->lora_A[i] + rand_normal()*0.01f;
for (int i = 0; i < rank*dim; i++) ch->lora_B[i] = p->lora_B[i] + rand_normal()*0.01f;
ch->vitality = 0.5f; p->vitality *= 0.8f;
fl->n_alive++;
return 1;
}
static int try_apoptosis(FieldLayer *fl) {
int na = 0;
for (int e = 0; e < MAX_EXPERTS; e++) if (fl->experts[e].alive) na++;
if (na <= MIN_EXPERTS) return 0;
for (int e = 0; e < MAX_EXPERTS; e++) {
if (!fl->experts[e].alive) continue;
if (fl->experts[e].low_vitality_count >= 8) {
free_lora_expert(&fl->experts[e]);
fl->n_alive--;
return 1;
}
}
return 0;
}
/* ═══════════════════════════════════════════════════════════════════════════════
* CALENDAR DRIFT — 12D temporal self-awareness. from DOE m.c.
* ═══════════════════════════════════════════════════════════════════════════════ */
typedef struct {
float state[12]; int step;
} DriftSnapshot;
typedef struct {
DriftSnapshot history[DRIFT_SNAPSHOTS];
int head, n_snapshots;
float drift, stability, drift_accel;
} CalendarDrift;
static void drift_init(CalendarDrift *cd) { memset(cd, 0, sizeof(CalendarDrift)); }
static void drift_snapshot(CalendarDrift *cd, float loss, GGUFIndex *ps, HarmonicState *hs) {
DriftSnapshot *ds = &cd->history[cd->head % DRIFT_SNAPSHOTS];
ds->step = F.step;
int total_exp = 0;
for (int l = 0; l < ps->n_field_layers; l++) total_exp += ps->field_layers[l].n_alive;
ds->state[0] = (float)total_exp;
ds->state[1] = ps->field_layers[0].parliament.consensus;
ds->state[2] = loss;
ds->state[3] = F.entropy;
ds->state[4] = F.resonance;
ds->state[5] = F.debt;
ds->state[6] = hs->confidence;
ds->state[7] = F.effective_temp;
ds->state[8] = F.field_health;
ds->state[9] = F.spring_energy;
ds->state[10] = F.summer_energy;
ds->state[11] = F.schumann_coherence;
if (cd->n_snapshots > 0) {
int prev = (cd->head - 1 + DRIFT_SNAPSHOTS) % DRIFT_SNAPSHOTS;
float d2 = 0;
for (int i = 0; i < 12; i++) {
float diff = ds->state[i] - cd->history[prev].state[i];
float range = fabsf(ds->state[i]) + 1e-8f;
d2 += (diff / range) * (diff / range);
}
float new_drift = sqrtf(d2 / 12.0f);
float prev_drift = cd->drift;
cd->drift = 0.8f * cd->drift + 0.2f * new_drift;
cd->drift_accel = cd->drift - prev_drift;
cd->stability = 1.0f / (1.0f + cd->drift * 10.0f);
}
cd->head = (cd->head + 1) % DRIFT_SNAPSHOTS;
if (cd->n_snapshots < DRIFT_SNAPSHOTS) cd->n_snapshots++;
}
/* ═══════════════════════════════════════════════════════════════════════════════
* META-LEARNING — DOE learns from its own choices.
* ═══════════════════════════════════════════════════════════════════════════════ */
typedef struct {
int step; int n_experts; float consensus, loss, field_health;
float prophecy_debt_avg; float drift; float delta_loss;
} MetaEntry;
typedef struct {
MetaEntry history[META_HIST_CAP];
int n_entries;
float config_bias[4];
float prediction_error;
} MetaTrack;
static void meta_init(MetaTrack *mt) {
memset(mt, 0, sizeof(MetaTrack));
for (int i = 0; i < 4; i++) mt->config_bias[i] = 0.5f;
}
static void meta_record(MetaTrack *mt, int step, int n_exp, float consensus,
float loss, float health, float debt_avg, float drift, float prev_loss) {
if (mt->n_entries >= META_HIST_CAP) {
memmove(mt->history, mt->history+1, (META_HIST_CAP-1)*sizeof(MetaEntry));
mt->n_entries = META_HIST_CAP - 1;
}
MetaEntry *e = &mt->history[mt->n_entries];
e->step = step; e->n_experts = n_exp; e->consensus = consensus;
e->loss = loss; e->field_health = health; e->prophecy_debt_avg = debt_avg;
e->drift = drift; e->delta_loss = prev_loss > 0 ? prev_loss - loss : 0;
mt->n_entries++;
if (mt->n_entries >= 2) {
MetaEntry *prev = &mt->history[mt->n_entries-2];
float improvement = prev->loss - loss;
float lr_meta = 0.01f;
float sig = improvement > 0 ? 1.0f : -0.5f;
mt->config_bias[0] += lr_meta * sig * ((float)n_exp/MAX_EXPERTS - 0.5f);
mt->config_bias[1] += lr_meta * sig * (consensus - 0.5f);
mt->config_bias[2] += lr_meta * sig * (health - 0.5f);
mt->config_bias[3] += lr_meta * sig * (debt_avg - 0.5f);
for (int i = 0; i < 4; i++) {
if (mt->config_bias[i] < 0.01f) mt->config_bias[i] = 0.01f;
if (mt->config_bias[i] > 0.99f) mt->config_bias[i] = 0.99f;
}
}
}
/* ═══════════════════════════════════════════════════════════════════════════════
* MYCELIUM — LoRA spore forest.
* DOE doesn't save full model GGUFs. it saves LoRA configurations:
* the living experts, their weights, the parliament votes, the field state.
* each spore is a snapshot of how DOE adapted to this host.
* on restart with the same host (fingerprint match), load the best spore.
* ═══════════════════════════════════════════════════════════════════════════════ */
#define MYCELIUM_DIR "doe_mycelium"
typedef struct {
char path[256];
uint64_t host_fingerprint;
float fitness;
int step;
} LoraSpore;
typedef struct {
LoraSpore spores[MYCELIUM_MAX];
int n_spores, best_idx;
} MyceliumState;
static void mycelium_init(MyceliumState *ms) {
memset(ms, 0, sizeof(MyceliumState));
ms->best_idx = -1;
mkdir(MYCELIUM_DIR, 0755);
}
static void mycelium_save(GGUFIndex *ps, int step, float fitness) {
char path[256];
snprintf(path, 256, "%s/spore_%016llx_s%d.bin", MYCELIUM_DIR,
(unsigned long long)ps->profile.fingerprint, step);
FILE *f = fopen(path, "wb");
if (!f) { printf("[mycelium] cannot write %s\n", path); return; }
/* header: fingerprint, step, fitness, n_layers, dim, rank */
uint64_t fp = ps->profile.fingerprint;
fwrite(&fp, 8, 1, f);
fwrite(&step, 4, 1, f);
fwrite(&fitness, 4, 1, f);
int nl = ps->n_field_layers, dim = ps->host_dim, rank = ps->lora_rank;
fwrite(&nl, 4, 1, f); fwrite(&dim, 4, 1, f); fwrite(&rank, 4, 1, f);
/* per layer: n_alive, then per expert: alive, vitality, frequency, A, B */
for (int l = 0; l < nl; l++) {
FieldLayer *fl = &ps->field_layers[l];
fwrite(&fl->n_alive, 4, 1, f);
/* parliament vote weights */
fwrite(fl->parliament.w_vote, sizeof(float), MAX_EXPERTS * dim, f);
fwrite(&fl->parliament.consensus, 4, 1, f);
for (int e = 0; e < MAX_EXPERTS; e++) {
LoraExpert *ex = &fl->experts[e];
fwrite(&ex->alive, 4, 1, f);
if (ex->alive) {
fwrite(&ex->vitality, 4, 1, f);
fwrite(&ex->frequency, 4, 1, f);
fwrite(ex->lora_A, sizeof(float), dim * rank, f);
fwrite(ex->lora_B, sizeof(float), rank * dim, f);
}
}
}
fclose(f);
printf("[mycelium] spore saved: %s (fitness=%.3f)\n", path, fitness);
}
static int mycelium_load(GGUFIndex *ps, uint64_t target_fp) {
/* scan directory for best matching spore */
char pattern[256];
snprintf(pattern, 256, "%s/spore_%016llx_*.bin", MYCELIUM_DIR, (unsigned long long)target_fp);
/* simple scan: find newest (highest step) spore for this fingerprint */
char best_path[256] = {0};
int best_step = -1;
FILE *p = popen("ls " MYCELIUM_DIR "/ 2>/dev/null", "r");
if (!p) return 0;
char line[256];
while (fgets(line, sizeof(line), p)) {
int len = strlen(line);
while (len > 0 && (line[len-1]=='\n'||line[len-1]=='\r')) line[--len] = '\0';
/* match fingerprint */
char want[32]; snprintf(want, 32, "spore_%016llx", (unsigned long long)target_fp);
if (!strstr(line, want)) continue;
/* extract step from filename */
char *sp = strstr(line, "_s");
if (!sp) continue;
int s = atoi(sp+2);
if (s > best_step) {
best_step = s;
snprintf(best_path, 256, "%s/%s", MYCELIUM_DIR, line);
}
}
pclose(p);
if (best_step < 0) return 0;
FILE *f = fopen(best_path, "rb");
if (!f) return 0;
uint64_t fp; fread(&fp, 8, 1, f);
if (fp != target_fp) { fclose(f); return 0; }
int step; float fitness;
fread(&step, 4, 1, f); fread(&fitness, 4, 1, f);
int nl, dim, rank;
fread(&nl, 4, 1, f); fread(&dim, 4, 1, f); fread(&rank, 4, 1, f);
if (nl != ps->n_field_layers || dim != ps->host_dim || rank != ps->lora_rank) {
printf("[mycelium] spore mismatch (layers=%d/%d dim=%d/%d rank=%d/%d)\n",
nl, ps->n_field_layers, dim, ps->host_dim, rank, ps->lora_rank);
fclose(f); return 0;
}
for (int l = 0; l < nl; l++) {
FieldLayer *fl = &ps->field_layers[l];
fread(&fl->n_alive, 4, 1, f);
fread(fl->parliament.w_vote, sizeof(float), MAX_EXPERTS * dim, f);
fread(&fl->parliament.consensus, 4, 1, f);
for (int e = 0; e < MAX_EXPERTS; e++) {
LoraExpert *ex = &fl->experts[e];
int alive; fread(&alive, 4, 1, f);
if (alive) {
if (!ex->alive) {
ex->lora_A = calloc(dim * rank, sizeof(float));
ex->lora_B = calloc(rank * dim, sizeof(float));
}
ex->alive = 1;
fread(&ex->vitality, 4, 1, f);
fread(&ex->frequency, 4, 1, f);
fread(ex->lora_A, sizeof(float), dim * rank, f);
fread(ex->lora_B, sizeof(float), rank * dim, f);
} else if (ex->alive) {
free_lora_expert(ex);
}
}
}
fclose(f);
printf("[mycelium] spore loaded: %s (step=%d fitness=%.3f)\n", best_path, step, fitness);
return 1;
}
/* ═══════════════════════════════════════════════════════════════════════════════
* INDEX FORWARD — run token through host with DOE modulation.
*
* per layer:
* 1. host attention (read-only weights, KV cache)
* 2. parliament election (which LoRA experts vote)
* 3. Delta Voice injection: x += Σ(w_k × α × A_k @ (B_k @ x))
* 4. host FFN (read-only)
* 5. layer_focus scaling on residual
*
* after all layers:
* 6. field modulation on logits
* 7. prophecy debt computation
* 8. NOTORCH Hebbian update on winning experts
*
* the host swims. the field steers. nobody knows who's in charge.
* ═══════════════════════════════════════════════════════════════════════════════ */
typedef struct {
float *x, *xb, *xb2, *q, *k, *v, *att, *logits;
float *hb, *hb2, *expert_out;
float *key_cache, *value_cache;
float *cos_cache, *sin_cache;
HarmonicState hs;
int max_seq;
} InferState;
static InferState alloc_infer(GGUFIndex *ps, int max_seq) {
InferState s = {0};
int D = ps->host_dim, kd = ps->host_kv_heads * ps->host_head_dim;
int H = ps->host_hidden;
s.max_seq = max_seq;
s.x = calloc(D, 4); s.xb = calloc(D, 4); s.xb2 = calloc(D, 4);
s.q = calloc(ps->host_heads * ps->host_head_dim, 4);
s.k = calloc(kd, 4); s.v = calloc(kd, 4);
s.att = calloc(ps->host_heads * max_seq, 4);
s.logits = calloc(ps->host_vocab, 4);
s.hb = calloc(H, 4); s.hb2 = calloc(H * 2, 4); /* *2 for fused gate_up */
s.expert_out = calloc(D, 4);
s.key_cache = calloc(ps->host_n_layers * max_seq * kd, 4);
s.value_cache = calloc(ps->host_n_layers * max_seq * kd, 4);
int half = ps->host_head_dim / 2;
s.cos_cache = calloc(max_seq * half, 4);
s.sin_cache = calloc(max_seq * half, 4);
float rope_theta = ps->rope_theta;
for (int p = 0; p < max_seq; p++)
for (int i = 0; i < half; i++) {
float freq = 1.0f / powf(rope_theta, (float)(2*i) / (float)ps->host_head_dim);
float ang = (float)p * freq;
s.cos_cache[p*half+i] = cosf(ang);
s.sin_cache[p*half+i] = sinf(ang);
}
return s;
}
static void free_infer(InferState *s) {
free(s->x); free(s->xb); free(s->xb2);
free(s->q); free(s->k); free(s->v);
free(s->att); free(s->logits);
free(s->hb); free(s->hb2); free(s->expert_out);
free(s->key_cache); free(s->value_cache);
free(s->cos_cache); free(s->sin_cache);
memset(s, 0, sizeof(InferState));
}
static float *doe_forward(GGUFIndex *ps, InferState *s, int token, int pos) {
int D = ps->host_dim, hd = ps->host_head_dim;
int kd = ps->host_kv_heads * hd;
int H = ps->host_hidden;
int hg = ps->host_heads / ps->host_kv_heads;
float sc = 1.0f / sqrtf((float)hd);
/* Embedding */
if (token < ps->host_vocab)
memcpy(s->x, ps->host_tok_emb + token * D, D * sizeof(float));
else
memset(s->x, 0, D * sizeof(float));
for (int l = 0; l < ps->host_n_layers && l < MAX_LAYERS; l++) {
if (!ps->host_layers[l].wq) continue;
/* ── Host attention ── */
float *xn = s->xb;
if (ps->host_layers[l].attn_norm) rmsnorm(xn, s->x, ps->host_layers[l].attn_norm, D, ps->rms_norm_eps);
else memcpy(xn, s->x, D*4);
matvec(s->q, ps->host_layers[l].wq, xn, ps->host_heads*hd, D);
matvec(s->k, ps->host_layers[l].wk, xn, kd, D);
matvec(s->v, ps->host_layers[l].wv, xn, kd, D);
/* Add attention biases (Qwen2, optional) */
if (ps->host_layers[l].bq) for (int i = 0; i < ps->host_heads*hd; i++) s->q[i] += ps->host_layers[l].bq[i];
if (ps->host_layers[l].bk) for (int i = 0; i < kd; i++) s->k[i] += ps->host_layers[l].bk[i];
if (ps->host_layers[l].bv) for (int i = 0; i < kd; i++) s->v[i] += ps->host_layers[l].bv[i];
for (int h = 0; h < ps->host_heads; h++) apply_rope(s->q+h*hd, pos, s->cos_cache, s->sin_cache, hd);
for (int h = 0; h < ps->host_kv_heads; h++) apply_rope(s->k+h*hd, pos, s->cos_cache, s->sin_cache, hd);
int co = l * s->max_seq * kd + pos * kd;
memcpy(s->key_cache + co, s->k, kd*4);
memcpy(s->value_cache + co, s->v, kd*4);
float *ao = s->xb2; memset(ao, 0, D*4);
for (int h = 0; h < ps->host_heads; h++) {
int kvh = h / hg; float *qh = s->q + h*hd;
float *att = s->att + h * s->max_seq;
for (int t = 0; t <= pos; t++) {
int ko = l*s->max_seq*kd + t*kd + kvh*hd;
float dot = 0;
for (int d = 0; d < hd; d++) dot += qh[d] * s->key_cache[ko+d];
att[t] = dot * sc;
}
softmax_n(att, pos+1);
float *oh = ao + h*hd;
for (int t = 0; t <= pos; t++) {
float a = att[t]; int vo = l*s->max_seq*kd + t*kd + kvh*hd;
for (int d = 0; d < hd; d++) oh[d] += a * s->value_cache[vo+d];
}
}
matvec(s->xb, ps->host_layers[l].wo, ao, D, D);
for (int i = 0; i < D; i++) s->x[i] += s->xb[i];
/* ── Parliament election + LoRA injection (after attention, before FFN) ── */
if (l < ps->n_field_layers) {
FieldLayer *fl = &ps->field_layers[l];
int selected[MAX_EXPERTS]; float weights[MAX_EXPERTS];
int k = parliament_elect(&fl->parliament, fl->experts, s->x, D, &s->hs, selected, weights);
memset(s->expert_out, 0, D*4);
for (int ki = 0; ki < k; ki++) {
LoraExpert *exp = &fl->experts[selected[ki]];
exp->tokens_seen++;
/* Delta Voice: out += α × A @ (B @ x) */
float tmp[LORA_RANK]; memset(tmp, 0, sizeof(tmp));
for (int r = 0; r < ps->lora_rank; r++)
for (int j = 0; j < D; j++)
tmp[r] += exp->lora_B[r * D + j] * s->x[j];
float lora_out[D]; memset(lora_out, 0, D*4);
for (int i = 0; i < D; i++)
for (int r = 0; r < ps->lora_rank; r++)
lora_out[i] += exp->lora_A[i * ps->lora_rank + r] * tmp[r];
for (int i = 0; i < D; i++)
s->expert_out[i] += weights[ki] * ps->lora_alpha * lora_out[i];
}
for (int i = 0; i < D; i++) s->x[i] += s->expert_out[i];
}
/* ── Host FFN (SwiGLU) ── */
{
float *fn = s->xb;
if (ps->host_layers[l].ffn_norm) rmsnorm(fn, s->x, ps->host_layers[l].ffn_norm, D, ps->rms_norm_eps);
else memcpy(fn, s->x, D*4);
if (ps->host_layers[l].ffn_gate_up && ps->host_layers[l].ffn_down) {
/* Fused gate_up: [hidden*2, dim] → split into gate [0..H) and up [H..2H) */
matvec(s->hb2, ps->host_layers[l].ffn_gate_up, fn, H * 2, D);
for (int i = 0; i < H; i++) s->hb[i] = silu_f(s->hb2[i]) * s->hb2[H + i];
matvec(s->xb, ps->host_layers[l].ffn_down, s->hb, D, H);
for (int i = 0; i < D; i++) s->x[i] += s->xb[i];
} else if (ps->host_layers[l].ffn_gate && ps->host_layers[l].ffn_up && ps->host_layers[l].ffn_down) {
/* Standard separate gate + up */
matvec(s->hb, ps->host_layers[l].ffn_gate, fn, H, D);
matvec(s->hb2, ps->host_layers[l].ffn_up, fn, H, D);
for (int i = 0; i < H; i++) s->hb[i] = silu_f(s->hb[i]) * s->hb2[i];
matvec(s->xb, ps->host_layers[l].ffn_down, s->hb, D, H);
for (int i = 0; i < D; i++) s->x[i] += s->xb[i];
}
}
}
/* Final norm + LM head */
rmsnorm(s->x, s->x, ps->host_norm, D, ps->rms_norm_eps);
matvec(s->logits, ps->host_output, s->x, ps->host_vocab, D);
return s->logits;
}
/* ═══════════════════════════════════════════════════════════════════════════════
* SAMPLING + CHAT
* ═══════════════════════════════════════════════════════════════════════════════ */
static int sample(float *logits, int V, float temp, int top_k) {
if (temp <= 0) { int b = 0; for (int i = 1; i < V; i++) if (logits[i] > logits[b]) b = i; return b; }
for (int i = 0; i < V; i++) logits[i] /= temp;
if (top_k > 0 && top_k < V) {
float *s = malloc(V*4); memcpy(s, logits, V*4);
for (int i = 0; i < top_k; i++) { int b = i; for (int j = i+1; j < V; j++) if (s[j] > s[b]) b = j; float t = s[i]; s[i] = s[b]; s[b] = t; }
float th = s[top_k-1]; free(s);
for (int i = 0; i < V; i++) if (logits[i] < th) logits[i] = -1e30f;
}
softmax_n(logits, V);
float r = rand_uniform(), cum = 0;
for (int i = 0; i < V; i++) { cum += logits[i]; if (cum >= r) return i; }
return V - 1;
}
/* GPT-2 byte_decoder: reverse the byte_encoder mapping (unicode codepoint -> original byte) */
static int gpt2_rune_to_byte(int rune) {
static int table_built = 0;
static int rtable[512]; /* rune -> byte, -1 if not mapped */
if (!table_built) {
for (int i = 0; i < 512; i++) rtable[i] = -1;
int n = 0;
for (int b = 0; b < 256; b++) {
if ((b >= 33 && b <= 126) || (b >= 161 && b <= 172) || (b >= 174 && b <= 255))
rtable[b] = b; /* identity mapping */
else
rtable[256 + n++] = b; /* offset mapping */
}
table_built = 1;
}
if (rune >= 0 && rune < 512) return rtable[rune];
return -1;
}
/* Parse one UTF-8 codepoint, return codepoint and advance *p by bytes consumed */
static int utf8_decode_cp(const char **p) {
const unsigned char *s = (const unsigned char *)*p;
int cp, len;
if (s[0] < 0x80) { cp = s[0]; len = 1; }
else if ((s[0] & 0xE0) == 0xC0) { cp = (s[0] & 0x1F) << 6 | (s[1] & 0x3F); len = 2; }
else if ((s[0] & 0xF0) == 0xE0) { cp = (s[0] & 0x0F) << 12 | (s[1] & 0x3F) << 6 | (s[2] & 0x3F); len = 3; }
else if ((s[0] & 0xF8) == 0xF0) { cp = (s[0] & 0x07) << 18 | (s[1] & 0x3F) << 12 | (s[2] & 0x3F) << 6 | (s[3] & 0x3F); len = 4; }
else { cp = s[0]; len = 1; } /* fallback */
*p += len;
return cp;
}
/* Decode token to text using GGUF vocab, fallback to byte */
static void token_decode_print(GGUFIndex *ps, int token) {
if (ps->vocab_tokens && token >= 0 && token < ps->vocab_size && ps->vocab_tokens[token]) {
const char *s = ps->vocab_tokens[token];
if (ps->is_gpt2_bpe) {
/* GPT-2 byte-level BPE: full byte_decoder — each codepoint maps to one byte */
unsigned char buf[256];
int blen = 0;
const char *p = s;
while (*p && blen < (int)sizeof(buf) - 4) {
int cp = utf8_decode_cp(&p);
int b = gpt2_rune_to_byte(cp);
if (b >= 0) buf[blen++] = (unsigned char)b;
else {
/* Not a GPT-2 mapped byte — emit codepoint as UTF-8 */
if (cp < 0x80) { buf[blen++] = cp; }
else if (cp < 0x800) { buf[blen++] = 0xC0|(cp>>6); buf[blen++] = 0x80|(cp&0x3F); }
else if (cp < 0x10000) { buf[blen++] = 0xE0|(cp>>12); buf[blen++] = 0x80|((cp>>6)&0x3F); buf[blen++] = 0x80|(cp&0x3F); }
else { buf[blen++] = 0xF0|(cp>>18); buf[blen++] = 0x80|((cp>>12)&0x3F); buf[blen++] = 0x80|((cp>>6)&0x3F); buf[blen++] = 0x80|(cp&0x3F); }
}
}
fwrite(buf, 1, blen, stdout);
} else {
/* Handle sentencepiece ▁ (U+2581, 3 bytes: E2 96 81) → space */
while (*s) {
if ((unsigned char)s[0] == 0xE2 && (unsigned char)s[1] == 0x96 && (unsigned char)s[2] == 0x81) {
fputc(' ', stdout);
s += 3;
} else if (!strncmp(s, "<0x", 3) && s[5] == '>') {
/* sentencepiece hex byte: <0xAB> */
unsigned int b = 0;
sscanf(s + 3, "%02X", &b);
if (b >= 32 || b == '\n' || b == '\t') fputc((char)b, stdout);
s += 6;
} else {
fputc(*s, stdout);
s++;
}
}
}
} else if (token >= 0 && token < 256) {
char c = (char)token;
if (c >= 32 || c == '\n' || c == '\t') fputc(c, stdout);
}
}
/* Decode token to buffer instead of stdout — for HTTP serve mode */
static int token_decode_buf(GGUFIndex *ps, int token, char *buf, int bufsz) {
int pos = 0;
if (ps->vocab_tokens && token >= 0 && token < ps->vocab_size && ps->vocab_tokens[token]) {
const char *s = ps->vocab_tokens[token];
if (ps->is_gpt2_bpe) {
const char *p = s;
while (*p && pos < bufsz - 4) {
int cp = utf8_decode_cp(&p);
int b = gpt2_rune_to_byte(cp);
if (b >= 0) buf[pos++] = (char)(unsigned char)b;
else {
if (cp < 0x80) { buf[pos++] = cp; }
else if (cp < 0x800 && pos < bufsz-2) { buf[pos++] = 0xC0|(cp>>6); buf[pos++] = 0x80|(cp&0x3F); }
else if (cp < 0x10000 && pos < bufsz-3) { buf[pos++] = 0xE0|(cp>>12); buf[pos++] = 0x80|((cp>>6)&0x3F); buf[pos++] = 0x80|(cp&0x3F); }
}
}
} else {
while (*s && pos < bufsz - 1) {
if ((unsigned char)s[0]==0xE2 && (unsigned char)s[1]==0x96 && (unsigned char)s[2]==0x81) {
buf[pos++]=' '; s+=3;
} else if (!strncmp(s,"<0x",3) && s[5]=='>') {
unsigned int b=0; sscanf(s+3,"%02X",&b);
if (b>=32||b=='\n'||b=='\t') buf[pos++]=(char)b;
s+=6;
} else { buf[pos++]=*s; s++; }
}
}
} else if (token >= 0 && token < 256) {
char c = (char)token;
if ((c>=32||c=='\n'||c=='\t') && pos < bufsz-1) buf[pos++]=c;
}
buf[pos] = '\0';
return pos;
}
/* ── BPE Tokenizer — SentencePiece style, score-based merge ── */
static int tok_lookup(GGUFIndex *ps, const char *s, int len); /* forward decl */
/* Build GPT-2 BPE scores from merges (called after index_load if needed) */
static void build_gpt2_scores(GGUFIndex *ps) {
if (!ps->is_gpt2_bpe || !ps->bpe_merges || ps->n_bpe_merges == 0 || ps->vocab_scores || !ps->vocab_tokens) return;
ps->vocab_scores = calloc(ps->vocab_size, sizeof(float));
for (int i = 0; i < ps->vocab_size; i++) ps->vocab_scores[i] = -1e9f;
int built = 0;
for (int m = 0; m < ps->n_bpe_merges; m++) {
const char *merge = ps->bpe_merges[m];
const char *sp = strchr(merge, ' ');
if (!sp) continue;
int la = (int)(sp - merge), lb = (int)strlen(sp + 1);
if (la + lb > 128) continue;
char merged[130];
memcpy(merged, merge, la);
memcpy(merged + la, sp + 1, lb);
int mid = tok_lookup(ps, merged, la + lb);
if (mid >= 0) { ps->vocab_scores[mid] = (float)(ps->n_bpe_merges - m); built++; }
}
printf("[doe] GPT-2 BPE: built %d merge scores from %d merges\n", built, ps->n_bpe_merges);
ps->add_space_prefix = 0;
}
/* FNV-1a hash */
static uint32_t tok_hash(const char *s, int len) {
uint32_t h = 2166136261u;
for (int i = 0; i < len; i++) { h ^= (uint8_t)s[i]; h *= 16777619u; }
return h;
}
/* Build hash table for O(1) token lookup */
static void tok_ht_build(GGUFIndex *ps) {
if (!ps->vocab_tokens || ps->vocab_size == 0) return;
int cap = 1;
while (cap < ps->vocab_size * 3) cap <<= 1; /* ~33% load factor */
ps->tok_ht_ids = malloc(cap * sizeof(int));
ps->tok_ht_cap = cap;
for (int i = 0; i < cap; i++) ps->tok_ht_ids[i] = -1;
int mask = cap - 1;
for (int i = 0; i < ps->vocab_size; i++) {
if (!ps->vocab_tokens[i]) continue;
int slen = (int)strlen(ps->vocab_tokens[i]);
uint32_t idx = tok_hash(ps->vocab_tokens[i], slen) & mask;
while (ps->tok_ht_ids[idx] != -1) idx = (idx + 1) & mask;
ps->tok_ht_ids[idx] = i;
}
}
/* Find token ID by string. Returns -1 if not found. O(1) average. */
static int tok_lookup(GGUFIndex *ps, const char *s, int len) {
if (!ps->tok_ht_ids) {
/* fallback linear scan */
for (int i = 0; i < ps->vocab_size; i++) {
if (ps->vocab_tokens[i] && strlen(ps->vocab_tokens[i]) == (size_t)len
&& memcmp(ps->vocab_tokens[i], s, len) == 0)
return i;
}
return -1;
}
int mask = ps->tok_ht_cap - 1;
uint32_t idx = tok_hash(s, len) & mask;
while (ps->tok_ht_ids[idx] != -1) {
int id = ps->tok_ht_ids[idx];
const char *t = ps->vocab_tokens[id];
if (t && strlen(t) == (size_t)len && memcmp(t, s, len) == 0) return id;
idx = (idx + 1) & mask;
}
return -1;
}
/* Score-based BPE merge on an array of token IDs */
static int bpe_merge(GGUFIndex *ps, int *ids, int n) {
if (!ps->vocab_scores) return n;
while (n > 1) {
float best_score = -1e30f;
int best_idx = -1, best_id = -1;
for (int i = 0; i < n - 1; i++) {
/* Concatenate token strings */
const char *a = ps->vocab_tokens[ids[i]];
const char *b = ps->vocab_tokens[ids[i+1]];
if (!a || !b) continue;
int la = strlen(a), lb = strlen(b);
if (la + lb > 128) continue;
char merged[130];
memcpy(merged, a, la);
memcpy(merged + la, b, lb);
int mid = tok_lookup(ps, merged, la + lb);
if (mid >= 0 && ps->vocab_scores[mid] > best_score) {
best_score = ps->vocab_scores[mid];
best_idx = i;
best_id = mid;
}
}
if (best_idx < 0) break;
ids[best_idx] = best_id;
/* Remove ids[best_idx+1] by shifting */
for (int i = best_idx + 1; i < n - 1; i++) ids[i] = ids[i+1];
n--;
}
return n;
}
/* GPT-2 byte-to-unicode table: maps each byte to a unicode codepoint */
static int gpt2_byte_to_rune(int b) {
/* Printable ASCII + Latin-1 supplement range → identity */
if ((b >= 33 && b <= 126) || (b >= 161 && b <= 172) || (b >= 174 && b <= 255))
return b;
/* Everything else → 256 + offset */
static int table_built = 0;
static int table[256];
if (!table_built) {
int n = 0;
for (int i = 0; i < 256; i++) {
if ((i >= 33 && i <= 126) || (i >= 161 && i <= 172) || (i >= 174 && i <= 255))
table[i] = i;
else
table[i] = 256 + n++;
}
table_built = 1;
}
return table[(unsigned char)b];
}
/* Encode a unicode codepoint as UTF-8, return length */
static int rune_to_utf8(int r, char *out) {
if (r < 0x80) { out[0] = (char)r; return 1; }
if (r < 0x800) { out[0] = 0xC0 | (r >> 6); out[1] = 0x80 | (r & 0x3F); return 2; }
out[0] = 0xE0 | (r >> 12); out[1] = 0x80 | ((r >> 6) & 0x3F); out[2] = 0x80 | (r & 0x3F); return 3;
}
/* Try to match a special token at position i in text. Returns token id and advances *len. */
static int try_special_token(GGUFIndex *ps, const char *text, int tlen, int i, int *consumed) {
static const char *specials[] = {
"<|im_start|>", "<|im_end|>", "<|endoftext|>", "<|end|>",
"<start_of_turn>", "<end_of_turn>", "<|user|>", "<|assistant|>",
"[INST]", "[/INST]", "<s>", "</s>",
"<|user_start|>", "<|user_end|>", "<|assistant_start|>", "<|assistant_end|>",
"<|bos|>", "<|eot_id|>", NULL
};
if (text[i] != '<' && text[i] != '[') return -1;
for (int s = 0; specials[s]; s++) {
int slen = (int)strlen(specials[s]);
if (i + slen <= tlen && memcmp(text + i, specials[s], slen) == 0) {
int id = tok_lookup(ps, specials[s], slen);
if (id >= 0) { *consumed = slen; return id; }
}
}
return -1;
}
static int tokenize_input(GGUFIndex *ps, const char *text, int *tokens, int max_tokens) {
if (!ps->vocab_tokens) {
int n = 0, len = strlen(text);
for (int i = 0; i < len && n < max_tokens; i++) tokens[n++] = (unsigned char)text[i];
return n;
}
int tlen = strlen(text);
int *ids = malloc((tlen + 16) * sizeof(int));
int n = 0;
if (ps->is_gpt2_bpe) {
/* GPT-2: check special tokens first, then byte-level BPE */
for (int i = 0; i < tlen && n < max_tokens; ) {
int consumed = 0;
int sid = try_special_token(ps, text, tlen, i, &consumed);
if (sid >= 0) { ids[n++] = sid; i += consumed; continue; }
int r = gpt2_byte_to_rune((unsigned char)text[i]);
char u8[4]; int u8len = rune_to_utf8(r, u8);
int id = tok_lookup(ps, u8, u8len);
ids[n++] = (id >= 0) ? id : 0;
i++;
}
} else {
/* SentencePiece: split on special tokens first, then ▁-encode segments */
int i = 0;
while (i < tlen && n < max_tokens) {
/* Check special tokens at raw text level */
int consumed = 0;
int sid = try_special_token(ps, text, tlen, i, &consumed);
if (sid >= 0) { ids[n++] = sid; i += consumed; continue; }
/* Find next special token boundary (or end) */
int seg_end = i + 1;
while (seg_end < tlen) {
int c2 = 0;
if (try_special_token(ps, text, tlen, seg_end, &c2) >= 0) break;
seg_end++;
}
/* Encode segment [i, seg_end) with SentencePiece ▁ */
int slen = seg_end - i;
char *sp = malloc(slen * 3 + 4);
int sp_len = 0;
if (ps->add_space_prefix && i == 0 && text[i] != ' ') {
sp[sp_len++] = 0xE2; sp[sp_len++] = 0x96; sp[sp_len++] = 0x81;
}
for (int j = i; j < seg_end; j++) {
if (text[j] == ' ') {
sp[sp_len++] = 0xE2; sp[sp_len++] = 0x96; sp[sp_len++] = 0x81;
} else {
sp[sp_len++] = text[j];
}
}
sp[sp_len] = '\0';
int k = 0;
while (k < sp_len && n < max_tokens) {
int clen = 1;
unsigned char c = (unsigned char)sp[k];
if (c >= 0xC0 && c < 0xE0) clen = 2;
else if (c >= 0xE0 && c < 0xF0) clen = 3;
else if (c >= 0xF0) clen = 4;
if (k + clen > sp_len) clen = 1;
int id = tok_lookup(ps, sp + k, clen);
if (id >= 0) { ids[n++] = id; k += clen; }
else {
char hex[7]; snprintf(hex, 7, "<0x%02X>", (unsigned char)sp[k]);
id = tok_lookup(ps, hex, 6);
ids[n++] = (id >= 0) ? id : 0; k++;
}
}
free(sp);
i = seg_end;
}
}
n = bpe_merge(ps, ids, n);
int out = (n < max_tokens) ? n : max_tokens;
memcpy(tokens, ids, out * sizeof(int));
free(ids);
return out;
}
static void chat(GGUFIndex *ps) {
int max_seq = 512;
InferState is = alloc_infer(ps, max_seq);
CalendarDrift cd; drift_init(&cd);
MetaTrack meta; meta_init(&meta);
HarmonicState hs = {0};
char input[1024];
printf("\n[doe] the parliament is in session. type your message (Ctrl+C to dissipate):\n");
printf("[doe] host: %s (%s, %dM params)\n\n",
ps->host_path, ps->host_arch,
(int)(ps->host_vocab * ps->host_dim * 2 / 1000000)); /* rough estimate */
float debt_sum = 0; int debt_count = 0;
while (1) {
printf("> "); fflush(stdout);
if (!fgets(input, sizeof(input), stdin)) break;
int len = strlen(input);
while (len > 0 && (input[len-1]=='\n' || input[len-1]=='\r')) input[--len] = '\0';
if (!len) continue;
if (strcmp(input,"quit")==0 || strcmp(input,"exit")==0) break;
if (strcmp(input,"status")==0) {
printf("[field] step=%d debt=%.3f entropy=%.3f resonance=%.3f emergence=%.3f\n",
F.step, F.debt, F.entropy, F.resonance, F.emergence);
printf("[field] season=%s health=%.3f temp=%.3f velocity=%s\n",
(const char*[]){"spring","summer","autumn","winter"}[F.season],
F.field_health, F.effective_temp,
(const char*[]){"nomove","walk","run","backward"}[F.velocity_mode]);
printf("[drift] d=%.3f stability=%.3f accel=%.4f snapshots=%d\n",
cd.drift, cd.stability, cd.drift_accel, cd.n_snapshots);
int te = 0;
for (int l = 0; l < ps->n_field_layers; l++) te += ps->field_layers[l].n_alive;
printf("[experts] alive=%d consensus=%.2f elections=%d\n",
te, ps->field_layers[0].parliament.consensus,
ps->field_layers[0].parliament.election_count);
if (debt_count > 0)
printf("[prophecy] avg_debt=%.4f total_debt=%.4f\n", debt_sum/debt_count, F.debt);
continue;
}
/* Reset KV cache */
int kd = ps->host_kv_heads * ps->host_head_dim;
memset(is.key_cache, 0, ps->host_n_layers * max_seq * kd * 4);
memset(is.value_cache, 0, ps->host_n_layers * max_seq * kd * 4);
/* Wrap input in chat template (auto-detected from GGUF chat_template) */
char wrapped[2048];
/* Only use chat template if the key special tokens exist in vocab */
int use_template = 0;
switch (ps->chat_style) {
case 1: /* ChatML */
if (tok_lookup(ps, "<|im_start|>", 12) >= 0) {
snprintf(wrapped, sizeof(wrapped),
"<|im_start|>user\n%s<|im_end|>\n<|im_start|>assistant\n", input);
use_template = 1;
}
break;
case 2: /* [INST] */
if (tok_lookup(ps, "[INST]", 6) >= 0) {
snprintf(wrapped, sizeof(wrapped), "[INST] %s [/INST]", input);
use_template = 1;
}
break;
case 3: /* Zephyr */
if (tok_lookup(ps, "<|user|>", 8) >= 0) {
snprintf(wrapped, sizeof(wrapped),
"<|user|>\n%s\n<|assistant|>\n", input);
use_template = 1;
}
break;
case 4: /* Phi */
if (tok_lookup(ps, "<|end|>", 7) >= 0) {
snprintf(wrapped, sizeof(wrapped),
"<|user|>\n%s<|end|>\n<|assistant|>\n", input);
use_template = 1;
}
break;
case 5: /* Gemma */
if (tok_lookup(ps, "<start_of_turn>", 15) >= 0) {
snprintf(wrapped, sizeof(wrapped),
"<start_of_turn>user\n%s<end_of_turn>\n<start_of_turn>model\n", input);
use_template = 1;
}
break;
case 6: /* nanollama — <|user_start|>...<|user_end|><|assistant_start|> */
snprintf(wrapped, sizeof(wrapped),
"<|user_start|>%s<|user_end|><|assistant_start|>", input);
use_template = 1;
break;
}
if (!use_template) snprintf(wrapped, sizeof(wrapped), "%s", input);
/* Tokenize wrapped input */
int input_tokens[512];
int n_input = 0;
if (ps->bos_id >= 0) input_tokens[n_input++] = ps->bos_id;
n_input += tokenize_input(ps, wrapped, input_tokens + n_input, 512 - n_input);
int pos = 0;
for (int i = 0; i < n_input && pos < max_seq - 1; i++, pos++)
doe_forward(ps, &is, input_tokens[i], pos);
int prev = input_tokens[n_input - 1];
printf(" ");
int total_births = 0, total_deaths = 0;
for (int i = 0; i < 200 && pos < max_seq; i++, pos++) {
float *lg = doe_forward(ps, &is, prev, pos);
/* Field modulation on logits */
field_step(1.0f);
apply_field_to_logits(lg, ps->host_vocab);
int next = sample(lg, ps->host_vocab, F.effective_temp, 40);
/* Stop on EOS or chat-template end tokens */
if (next == ps->eos_id) break;
if (ps->vocab_tokens && next >= 0 && next < ps->vocab_size && ps->vocab_tokens[next]) {
const char *ts = ps->vocab_tokens[next];
if (strcmp(ts, "<|im_end|>") == 0 || strcmp(ts, "<|end|>") == 0 ||
strcmp(ts, "<|endoftext|>") == 0 || strcmp(ts, "<end_of_turn>") == 0 ||
strcmp(ts, "<|user|>") == 0 || strcmp(ts, "<|assistant_end|>") == 0 ||
strcmp(ts, "<|eot_id|>") == 0)
break;
}
/* Prophecy debt — retroactive conscience */
float pd = compute_prophecy_debt(lg, next, ps->host_vocab);
F.debt += pd;
debt_sum += pd; debt_count++;
/* NOTORCH Hebbian update — debt drives learning */
float learn_signal = pd > 0.3f ? -pd : (1.0f - pd) * 0.1f;
for (int l = 0; l < ps->n_field_layers; l++) {
FieldLayer *fl = &ps->field_layers[l];
for (int e = 0; e < MAX_EXPERTS; e++) {
if (!fl->experts[e].alive || fl->experts[e].tokens_seen == 0) continue;
notorch_step(fl->experts[e].lora_A, fl->experts[e].lora_B,
ps->host_dim, ps->host_dim, ps->lora_rank,
is.x, is.xb, learn_signal);
}
}
/* Vitality + mitosis + apoptosis */
if (i % 10 == 0) {
/* Harmonic decomposition */
float lh[16]; int lhl = 0;
for (int j = 0; j < 16 && j < i; j++) lh[lhl++] = F.entropy;
if (lhl > 2) harmonic_decompose(&is.hs, lh, lhl);
for (int l = 0; l < ps->n_field_layers; l++) {
update_expert_vitality(&ps->field_layers[l], 10);
if (try_mitosis(&ps->field_layers[l], ps->host_dim, ps->lora_rank)) total_births++;
if (try_apoptosis(&ps->field_layers[l])) total_deaths++;
}
}
/* Drift snapshot */
if (i % DRIFT_INTERVAL == 0 && i > 0)
drift_snapshot(&cd, F.debt, ps, &is.hs);
token_decode_print(ps, next);
fflush(stdout);
prev = next;
}
printf("\n");
/* Meta record */
int te = 0;
for (int l = 0; l < ps->n_field_layers; l++) te += ps->field_layers[l].n_alive;
meta_record(&meta, F.step, te, ps->field_layers[0].parliament.consensus,
F.debt, F.field_health, debt_count > 0 ? debt_sum/debt_count : 0,
cd.drift, F.debt);
if (total_births > 0 || total_deaths > 0)
printf(" [life] births=%d deaths=%d\n", total_births, total_deaths);
printf("\n");
}
free_infer(&is);
}
/* ═══════════════════════════════════════════════════════════════════════════════
* HTTP SERVE MODE — minimal HTTP server for doe_ui.html and doe.html
* ═══════════════════════════════════════════════════════════════════════════════ */
static int g_serve_port = 0; /* 0 = disabled */
/* JSON-escape a string into buf. Returns bytes written (not counting NUL). */
static int json_escape(const char *src, char *buf, int bufsz) {
int p = 0;
for (; *src && p < bufsz - 2; src++) {
switch (*src) {
case '"': if(p+2<bufsz){buf[p++]='\\';buf[p++]='"';} break;
case '\\': if(p+2<bufsz){buf[p++]='\\';buf[p++]='\\';} break;
case '\n': if(p+2<bufsz){buf[p++]='\\';buf[p++]='n';} break;
case '\r': if(p+2<bufsz){buf[p++]='\\';buf[p++]='r';} break;
case '\t': if(p+2<bufsz){buf[p++]='\\';buf[p++]='t';} break;
default: buf[p++] = *src; break;
}
}
buf[p] = '\0';
return p;
}
/* Read full HTTP request into buf, return total bytes. */
static int http_read_request(int fd, char *buf, int bufsz) {
int total = 0;
int content_length = -1;
int header_end = -1;
while (total < bufsz - 1) {
int n = (int)read(fd, buf + total, bufsz - 1 - total);
if (n <= 0) break;
total += n;
buf[total] = '\0';
/* Find end of headers */
if (header_end < 0) {
char *hdr_end = strstr(buf, "\r\n\r\n");
if (hdr_end) {
header_end = (int)(hdr_end - buf) + 4;
/* Parse Content-Length */
char *cl = strcasestr(buf, "content-length:");
if (cl) content_length = atoi(cl + 15);
else content_length = 0;
}
}
if (header_end >= 0 && total >= header_end + content_length) break;
}
return total;
}
/* Send full buffer over socket */
static void http_send(int fd, const char *data, int len) {
int sent = 0;
while (sent < len) {
int n = (int)write(fd, data + sent, len - sent);
if (n <= 0) break;
sent += n;
}
}
/* Send HTTP response header */
static void http_send_header(int fd, int status, const char *content_type, int content_length) {
char hdr[512];
const char *status_text = status == 200 ? "OK" : status == 404 ? "Not Found" : "Bad Request";
int hlen;
if (content_length >= 0) {
hlen = snprintf(hdr, sizeof(hdr),
"HTTP/1.1 %d %s\r\n"
"Content-Type: %s\r\n"
"Content-Length: %d\r\n"
"Access-Control-Allow-Origin: *\r\n"
"Access-Control-Allow-Headers: Content-Type\r\n"
"Connection: close\r\n\r\n",
status, status_text, content_type, content_length);
} else {
/* Streaming (SSE) — no content-length */
hlen = snprintf(hdr, sizeof(hdr),
"HTTP/1.1 %d %s\r\n"
"Content-Type: %s\r\n"
"Cache-Control: no-cache\r\n"
"Access-Control-Allow-Origin: *\r\n"
"Access-Control-Allow-Headers: Content-Type\r\n"
"Connection: keep-alive\r\n\r\n",
status, status_text, content_type);
}
http_send(fd, hdr, hlen);
}
/* Serve a static file (doe_ui.html, doe.html) */
static int http_serve_file(int fd, const char *filepath) {
FILE *f = fopen(filepath, "rb");
if (!f) return 0;
fseek(f, 0, SEEK_END); long sz = ftell(f); fseek(f, 0, SEEK_SET);
char *data = malloc(sz);
if (!data) { fclose(f); return 0; }
fread(data, 1, sz, f); fclose(f);
http_send_header(fd, 200, "text/html; charset=utf-8", (int)sz);
http_send(fd, data, (int)sz);
free(data);
return 1;
}
/* Extract JSON string value for a key from body. Simple parser. */
static int json_get_string(const char *json, const char *key, char *out, int outsz) {
char needle[64];
snprintf(needle, sizeof(needle), "\"%s\"", key);
const char *p = strstr(json, needle);
if (!p) return 0;
p = strchr(p + strlen(needle), ':');
if (!p) return 0;
while (*p && (*p == ':' || *p == ' ' || *p == '\t')) p++;
if (*p != '"') return 0;
p++;
int i = 0;
while (*p && *p != '"' && i < outsz - 1) {
if (*p == '\\' && p[1]) { p++; /* skip escape */ }
out[i++] = *p++;
}
out[i] = '\0';
return i;
}
/* Extract last user message from messages array in chat/completions body */
static int json_get_last_user_message(const char *body, char *out, int outsz) {
/* Find last "role":"user" ... "content":"..." */
const char *last_user = NULL;
const char *p = body;
while ((p = strstr(p, "\"role\"")) != NULL) {
const char *rv = strstr(p, "\"user\"");
if (rv && rv - p < 30) last_user = p;
p++;
}
if (!last_user) return 0;
return json_get_string(last_user, "content", out, outsz);
}
static float json_get_float(const char *json, const char *key, float def) {
char needle[64];
snprintf(needle, sizeof(needle), "\"%s\"", key);
const char *p = strstr(json, needle);
if (!p) return def;
p = strchr(p + strlen(needle), ':');
if (!p) return def;
return (float)atof(p + 1);
}
/* Run inference and stream SSE tokens */
static void http_stream_inference(int fd, GGUFIndex *ps, const char *user_msg, float temperature, int max_tokens) {
int max_seq = 512;
InferState is = alloc_infer(ps, max_seq);
/* Reset KV cache */
int kd = ps->host_kv_heads * ps->host_head_dim;
memset(is.key_cache, 0, (size_t)ps->host_n_layers * max_seq * kd * 4);
memset(is.value_cache, 0, (size_t)ps->host_n_layers * max_seq * kd * 4);
/* Wrap input in chat template */
char wrapped[2048];
switch (ps->chat_style) {
case 1: snprintf(wrapped, sizeof(wrapped), "<|im_start|>user\n%s<|im_end|>\n<|im_start|>assistant\n", user_msg); break;
case 2: snprintf(wrapped, sizeof(wrapped), "[INST] %s [/INST]", user_msg); break;
case 3: snprintf(wrapped, sizeof(wrapped), "<|user|>\n%s\n<|assistant|>\n", user_msg); break;
case 4: snprintf(wrapped, sizeof(wrapped), "<|user|>\n%s<|end|>\n<|assistant|>\n", user_msg); break;
case 5: snprintf(wrapped, sizeof(wrapped), "<start_of_turn>user\n%s<end_of_turn>\n<start_of_turn>model\n", user_msg); break;
case 6: snprintf(wrapped, sizeof(wrapped), "<|user_start|>%s<|user_end|><|assistant_start|>", user_msg); break;
default: snprintf(wrapped, sizeof(wrapped), "%s", user_msg); break;
}
/* Tokenize */
int input_tokens[512];
int n_input = 0;
if (ps->bos_id >= 0) input_tokens[n_input++] = ps->bos_id;
n_input += tokenize_input(ps, wrapped, input_tokens + n_input, 512 - n_input);
/* Prefill */
int pos = 0;
for (int i = 0; i < n_input && pos < max_seq - 1; i++, pos++)
doe_forward(ps, &is, input_tokens[i], pos);
int prev = input_tokens[n_input - 1];
/* Generate tokens, stream as SSE */
for (int i = 0; i < max_tokens && pos < max_seq; i++, pos++) {
float *lg = doe_forward(ps, &is, prev, pos);
field_step(1.0f);
apply_field_to_logits(lg, ps->host_vocab);
float temp = temperature > 0.01f ? temperature : F.effective_temp;
int next = sample(lg, ps->host_vocab, temp, 40);
/* Stop on EOS */
if (next == ps->eos_id) break;
if (ps->vocab_tokens && next >= 0 && next < ps->vocab_size && ps->vocab_tokens[next]) {
const char *ts = ps->vocab_tokens[next];
if (strcmp(ts, "<|im_end|>") == 0 || strcmp(ts, "<|end|>") == 0 ||
strcmp(ts, "<|endoftext|>") == 0 || strcmp(ts, "<end_of_turn>") == 0 ||
strcmp(ts, "<|user|>") == 0 || strcmp(ts, "<|assistant_end|>") == 0 ||
strcmp(ts, "<|eot_id|>") == 0) break;
}
/* Prophecy debt + Hebbian update */
float pd = compute_prophecy_debt(lg, next, ps->host_vocab);
F.debt += pd;
float learn_signal = pd > 0.3f ? -pd : (1.0f - pd) * 0.1f;
for (int l = 0; l < ps->n_field_layers; l++) {
FieldLayer *fl = &ps->field_layers[l];
for (int e = 0; e < MAX_EXPERTS; e++) {
if (!fl->experts[e].alive || fl->experts[e].tokens_seen == 0) continue;
notorch_step(fl->experts[e].lora_A, fl->experts[e].lora_B,
ps->host_dim, ps->host_dim, ps->lora_rank,
is.x, is.xb, learn_signal);
}
}
/* Decode token to buffer */
char tokbuf[256], escaped[512];
token_decode_buf(ps, next, tokbuf, sizeof(tokbuf));
json_escape(tokbuf, escaped, sizeof(escaped));
/* Send SSE event */
char sse[1024];
int slen = snprintf(sse, sizeof(sse), "data: {\"token\":\"%s\"}\n\n", escaped);
int wr = (int)write(fd, sse, slen);
if (wr <= 0) break; /* client disconnected */
prev = next;
}
/* Send done event */
write(fd, "data: {\"done\":true}\n\n", 20);
free_infer(&is);
}
/* Main HTTP serve loop */
static void serve_loop(GGUFIndex *ps, const char *exe_dir) {
signal(SIGPIPE, SIG_IGN); /* ignore broken pipes */
int server_fd = socket(AF_INET, SOCK_STREAM, 0);
if (server_fd < 0) { perror("[serve] socket"); return; }
int opt = 1;
setsockopt(server_fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt));
struct sockaddr_in addr = {0};
addr.sin_family = AF_INET;
addr.sin_addr.s_addr = INADDR_ANY;
addr.sin_port = htons(g_serve_port);
if (bind(server_fd, (struct sockaddr*)&addr, sizeof(addr)) < 0) {
perror("[serve] bind"); close(server_fd); return;
}
if (listen(server_fd, 8) < 0) {
perror("[serve] listen"); close(server_fd); return;
}
/* Resolve HTML file paths relative to executable */
char ui_path[512], vis_path[512];
snprintf(ui_path, sizeof(ui_path), "%sdoe_ui.html", exe_dir);
snprintf(vis_path, sizeof(vis_path), "%sdoe.html", exe_dir);
printf("[serve] parliament listening on http://0.0.0.0:%d\n", g_serve_port);
printf("[serve] / → chat UI\n");
printf("[serve] /visual → parliament terminal\n");
printf("[serve] /health → status\n");
printf("[serve] POST /chat/completions → inference stream\n\n");
while (1) {
struct sockaddr_in client_addr;
socklen_t client_len = sizeof(client_addr);
int client = accept(server_fd, (struct sockaddr*)&client_addr, &client_len);
if (client < 0) continue;
char req[8192];
int reqlen = http_read_request(client, req, sizeof(req));
if (reqlen <= 0) { close(client); continue; }
/* Parse method and path */
char method[8] = "", path[256] = "";
sscanf(req, "%7s %255s", method, path);
/* Handle CORS preflight */
if (strcmp(method, "OPTIONS") == 0) {
const char *cors = "HTTP/1.1 204 No Content\r\n"
"Access-Control-Allow-Origin: *\r\n"
"Access-Control-Allow-Methods: GET, POST, OPTIONS\r\n"
"Access-Control-Allow-Headers: Content-Type\r\n"
"Content-Length: 0\r\n"
"Connection: close\r\n\r\n";
http_send(client, cors, (int)strlen(cors));
close(client);
continue;
}
if (strcmp(method, "GET") == 0) {
if (strcmp(path, "/") == 0 || strcmp(path, "/index.html") == 0) {
if (!http_serve_file(client, ui_path)) {
const char *msg = "doe_ui.html not found";
http_send_header(client, 404, "text/plain", (int)strlen(msg));
http_send(client, msg, (int)strlen(msg));
}
} else if (strcmp(path, "/visual") == 0) {
if (!http_serve_file(client, vis_path)) {
const char *msg = "doe.html not found";
http_send_header(client, 404, "text/plain", (int)strlen(msg));
http_send(client, msg, (int)strlen(msg));
}
} else if (strcmp(path, "/health") == 0) {
char body[512];
int blen = snprintf(body, sizeof(body),
"{\"status\":\"ok\",\"model\":\"%s\",\"arch\":\"%s\","
"\"params\":\"%dM\",\"vocab\":%d,\"layers\":%d,"
"\"experts\":%d,\"debt\":%.4f,\"health\":%.4f}",
ps->host_path, ps->host_arch,
(int)(ps->host_vocab * ps->host_dim * 2 / 1000000),
ps->host_vocab, ps->host_n_layers,
ps->n_field_layers > 0 ? ps->field_layers[0].n_alive : 0,
F.debt, F.field_health);
http_send_header(client, 200, "application/json", blen);
http_send(client, body, blen);
} else {
const char *msg = "not found";
http_send_header(client, 404, "text/plain", (int)strlen(msg));
http_send(client, msg, (int)strlen(msg));
}
} else if (strcmp(method, "POST") == 0 &&
(strcmp(path, "/chat/completions") == 0 || strcmp(path, "/v1/chat/completions") == 0)) {
/* Find body after \r\n\r\n */
char *body = strstr(req, "\r\n\r\n");
if (!body) { close(client); continue; }
body += 4;
char user_msg[2048] = "";
json_get_last_user_message(body, user_msg, sizeof(user_msg));
if (user_msg[0] == '\0') {
const char *err = "{\"error\":\"no user message\"}";
http_send_header(client, 400, "application/json", (int)strlen(err));
http_send(client, err, (int)strlen(err));
} else {
float temp = json_get_float(body, "temperature", 0.0f);
int max_tok = (int)json_get_float(body, "max_tokens", 256.0f);
if (max_tok < 1) max_tok = 256;
if (max_tok > 512) max_tok = 512;
printf("[serve] inference: \"%.*s\" temp=%.2f max=%d\n",
(int)(strlen(user_msg) > 60 ? 60 : strlen(user_msg)), user_msg, temp, max_tok);
http_send_header(client, 200, "text/event-stream", -1);
http_stream_inference(client, ps, user_msg, temp, max_tok);
}
} else {
const char *msg = "method not allowed";
http_send_header(client, 400, "text/plain", (int)strlen(msg));
http_send(client, msg, (int)strlen(msg));
}
close(client);
}
}
/* ═══════════════════════════════════════════════════════════════════════════════
* MAIN — the field manifests.
* ═══════════════════════════════════════════════════════════════════════════════ */
int main(int argc, char **argv) {
setbuf(stdout, NULL);
printf("\n doe.c — Democracy of Experts\n");
printf(" θ = ε + γ + αδ — the parliament awakens.\n\n");
char gguf_path[256] = "";
for (int i = 1; i < argc; i++) {
if (strcmp(argv[i], "--model") == 0 && i+1 < argc) snprintf(gguf_path, 256, "%s", argv[++i]);
else if (strcmp(argv[i], "--threads") == 0 && i+1 < argc) { g_n_threads = atoi(argv[++i]); if (g_n_threads < 1) g_n_threads = 1; }
else if (strcmp(argv[i], "--prophecy") == 0 && i+1 < argc) { /* will be set after field_init */ }
else if (strcmp(argv[i], "--destiny") == 0 && i+1 < argc) { /* will be set after field_init */ }
else if (strcmp(argv[i], "--serve") == 0 && i+1 < argc) { g_serve_port = atoi(argv[++i]); }
else if (strcmp(argv[i], "--help") == 0 || strcmp(argv[i], "-h") == 0) {
printf("doe.c — DOE: inference architecture over any GGUF\n\n");
printf(" --model PATH GGUF to index (or auto-detect)\n");
printf(" --serve PORT start HTTP server for UI (doe_ui.html, doe.html)\n");
printf(" --threads N CPU threads for matvec (default: all cores)\n");
printf(" --prophecy N prediction horizon (default: 7)\n");
printf(" --destiny F destiny bias strength (default: 0.35)\n");
printf(" --lora-rank N LoRA rank (default: 16)\n");
printf(" --lora-alpha F LoRA injection strength (default: 0.1)\n\n");
printf(" BLAS: cc doe.c -O3 -lm -lpthread -DUSE_BLAS -DACCELERATE -framework Accelerate -o doe\n");
printf(" GPU: cc doe.c -O3 -lm -lpthread -DUSE_CUBLAS -lcublas -lcudart -o doe\n");
return 0;
}
}
/* ── Thread count for matvec ── */
g_n_threads = (int)sysconf(_SC_NPROCESSORS_ONLN);
if (g_n_threads < 1) g_n_threads = 1;
if (g_n_threads > 32) g_n_threads = 32;
/* ── Field awakens ── */
field_init();
/* Parse field overrides */
for (int i = 1; i < argc; i++) {
if (strcmp(argv[i], "--prophecy") == 0 && i+1 < argc) F.prophecy = atoi(argv[++i]);
else if (strcmp(argv[i], "--destiny") == 0 && i+1 < argc) F.destiny = atof(argv[++i]);
else if (strcmp(argv[i], "--lora-rank") == 0 && i+1 < argc) { /* handled in index_load */ }
else if (strcmp(argv[i], "--lora-alpha") == 0 && i+1 < argc) F.lora_alpha = atof(argv[++i]);
}
/* ── Environment scan ── */
Environment env;
env_scan(&env, __FILE__);
/* ── PHASE 1: Search for DOE identity + gamma FIRST ── */
char identity_path[256] = "";
char gamma_path[256] = "";
int weightless = 1;
{
static const char *wdirs[] = { "weights/", "doe_w/", "./", "../weights/", NULL };
struct stat st;
/* Search for doe_identity*.gguf (any variant: _micro, _mini, _q8, etc.) */
for (int d = 0; wdirs[d] && identity_path[0] == '\0'; d++) {
DIR *dir = opendir(wdirs[d]);
if (!dir) continue;
struct dirent *ent;
int64_t best_size = 0;
while ((ent = readdir(dir)) != NULL) {
if (strncmp(ent->d_name, "doe_identity", 12) != 0) continue;
int nlen = (int)strlen(ent->d_name);
if (nlen < 5 || strcmp(ent->d_name + nlen - 5, ".gguf") != 0) continue;
char tmp[256];
snprintf(tmp, 256, "%s%s", wdirs[d], ent->d_name);
if (stat(tmp, &st) == 0 && st.st_size > best_size) {
snprintf(identity_path, 256, "%s", tmp);
best_size = st.st_size;
}
}
closedir(dir);
if (identity_path[0] != '\0') {
stat(identity_path, &st);
printf("[identity] found: %s (%.1fMB)\n", identity_path, (float)st.st_size/(1024*1024));
weightless = 0;
}
}
/* Search for doe_gamma*.bin or doe_gamma*.npz */
for (int d = 0; wdirs[d] && gamma_path[0] == '\0'; d++) {
DIR *dir = opendir(wdirs[d]);
if (!dir) continue;
struct dirent *ent;
while ((ent = readdir(dir)) != NULL) {
if (strncmp(ent->d_name, "doe_gamma", 9) == 0 ||
strncmp(ent->d_name, "gamma_", 6) == 0) {
char tmp[256];
snprintf(tmp, 256, "%s%s", wdirs[d], ent->d_name);
if (stat(tmp, &st) == 0 && st.st_size > 0) {
snprintf(gamma_path, 256, "%s", tmp);
printf("[gamma] found: %s (%.1fMB)\n", tmp, (float)st.st_size/(1024*1024));
break;
}
}
}
closedir(dir);
}
if (weightless)
printf("[identity] no doe_identity.gguf — weightless mode.\n");
if (gamma_path[0] == '\0')
printf("[gamma] no doe_gamma.bin — parliament self-organizes.\n");
}
/* ── PHASE 2: Find host GGUF (external knowledge substrate) ── */
if (gguf_path[0] == '\0') {
if (identity_path[0] != '\0') {
snprintf(gguf_path, 256, "%s", identity_path);
printf("[host] using identity as host model.\n");
} else {
/* Also check all discovered GGUFs for doe.identity metadata */
int identity_idx = -1, external_idx = -1;
for (int i = 0; i < env.n_ggufs; i++) {
if (strstr(env.ggufs[i].path, "mycelium/")) continue;
if (strstr(env.ggufs[i].path, "doe_gamma")) continue;
/* Quick sniff for doe.identity key in this GGUF */
if (strstr(env.ggufs[i].path, "doe_identity")) {
identity_idx = i; continue;
}
if (external_idx < 0) external_idx = i;
}
/* Identity GGUF by name takes priority */
if (identity_idx >= 0) {
snprintf(gguf_path, 256, "%s", env.ggufs[identity_idx].path);
printf("[host] found identity GGUF: %s\n", gguf_path);
weightless = 0;
} else if (external_idx >= 0) {
snprintf(gguf_path, 256, "%s", env.ggufs[external_idx].path);
printf("[host] indexing external: %s (%.1fMB)\n", gguf_path, (float)env.ggufs[external_idx].file_size/(1024*1024));
} else {
fprintf(stderr, "[error] no GGUF found. use --model PATH or place a .gguf nearby.\n");
return 1;
}
}
}
/* ── Index GGUF ── */
GGUFIndex idx;
if (!index_load(&idx, gguf_path)) {
fprintf(stderr, "[error] failed to index %s\n", gguf_path);
return 1;
}
idx.weightless = weightless;
/* If GGUF has doe.identity metadata — it's ours regardless of filename */
if (idx.identity_tag[0] != '\0') {
idx.weightless = 0;
printf("[identity] verified via metadata: \"%s\"\n", idx.identity_tag);
}
/* ── Load gamma if found ── */
if (gamma_path[0] != '\0') {
FILE *gf = fopen(gamma_path, "rb");
if (gf) {
fseek(gf, 0, SEEK_END); long gsz = ftell(gf); fseek(gf, 0, SEEK_SET);
idx.gamma_data = malloc(gsz);
idx.gamma_size = (int)gsz;
if (fread(idx.gamma_data, 1, gsz, gf) == (size_t)gsz)
printf("[gamma] loaded %ld bytes — personality active.\n", gsz);
else { free(idx.gamma_data); idx.gamma_data = NULL; idx.gamma_size = 0; }
fclose(gf);
}
}
/* ── Mycelium — check for existing LoRA spores ── */
MyceliumState mycelium;
mycelium_init(&mycelium);
if (mycelium_load(&idx, idx.profile.fingerprint))
printf("[mycelium] resumed adaptation for this index.\n");
/* ── Chat or Serve ── */
if (g_serve_port > 0) {
/* Resolve directory of the executable for HTML files */
char exe_dir[512] = "./";
{
/* Try to find doe_ui.html relative to argv[0] */
char *slash = strrchr(argv[0], '/');
if (slash) { int dlen = (int)(slash - argv[0]) + 1; if (dlen < 500) { memcpy(exe_dir, argv[0], dlen); exe_dir[dlen] = '\0'; } }
}
serve_loop(&idx, exe_dir);
} else {
chat(&idx);
}
/* ── Save spore on exit ── */
mycelium_save(&idx, F.step, F.field_health);
/* ── Cleanup ── */
index_free(&idx);
printf("[doe] the parliament adjourns. θ persists.\n");
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment