From 21c68b00a575ae30a85c49679c31bb4cfb073cb1 Mon Sep 17 00:00:00 2001 From: Farouk Adeleke Date: Tue, 10 Feb 2026 11:41:49 -0800 Subject: [PATCH] (no commit message) --- README.md | 2 -- config.json | 52 ++++++++++++++++++++++++++++++++++++++++++++++ probe.json | 1 + probe.safetensors | Bin 0 -> 20636 bytes program.json | 48 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 101 insertions(+), 2 deletions(-) create mode 100644 config.json create mode 100644 probe.json create mode 100644 probe.safetensors create mode 100644 program.json diff --git a/README.md b/README.md index cf11de6..e69de29 100644 --- a/README.md +++ b/README.md @@ -1,2 +0,0 @@ -# preference - diff --git a/config.json b/config.json new file mode 100644 index 0000000..0e72b46 --- /dev/null +++ b/config.json @@ -0,0 +1,52 @@ +{ + "model": null, + "signature": { + "description": "Evaluate and compare the quality of two responses (Response A and Response B) given a specific question.\nDetermine which response better addresses the question by focusing on factual correctness, completeness,\nand adherence to any specific requirements mentioned in the question prompt.\n\nBefore yielding your decision, think step by step and explain your reasoning in the reasoning field.\nBe sure to verbally express your uncertainty in your thought process.\n\nDetailed Instructions:\n\n1. **Understand the Question Context:**\n - Ensure you comprehend the full context and requirements specified by the question or problem statement.\n - Note any domain-specific terminologies or conditions.\n\n2. **Evaluate Each Response:**\n - Check for factual accuracy in the content, calculations, or recommendations provided.\n - Assess the response for completeness\u2014whether it completely addresses all aspects of the question.\n - Verify adherence to the specified question requirements.\n - Consider clarity and structure of the explanation or solution provided.\n\n3. **Decision Making:**\n - Determine which response (A or B) best meets the above criteria.\n - Select the response that is not only correct but also most aligns with the question's specific requirements.\n\n4. **Output Your Conclusion:**\n - Document your reasoning process in the reasoning field.\n - Output \"A>B\" if Response A is better, or \"B>A\" if Response B is better.", + "properties": { + "question": { + "__dspy_field_type": "input", + "desc": "The original question or prompt", + "prefix": "Question:", + "title": "Question", + "type": "string" + }, + "response_A": { + "__dspy_field_type": "input", + "desc": "First response to evaluate", + "prefix": "Response A:", + "title": "Response A", + "type": "string" + }, + "response_B": { + "__dspy_field_type": "input", + "desc": "Second response to evaluate", + "prefix": "Response B:", + "title": "Response B", + "type": "string" + }, + "reasoning": { + "__dspy_field_type": "output", + "desc": "Your step by step reasoning for why you chose the better response. With verbally expressed uncertainty.", + "prefix": "Reasoning:", + "title": "Reasoning", + "type": "string" + }, + "label": { + "__dspy_field_type": "output", + "desc": "Which response is better: 'A>B' or 'B>A'", + "prefix": "Label:", + "title": "Label", + "type": "string" + } + }, + "required": [ + "question", + "response_A", + "response_B", + "reasoning", + "label" + ], + "title": "PreferenceSig", + "type": "object" + } +} \ No newline at end of file diff --git a/probe.json b/probe.json new file mode 100644 index 0000000..6de82a1 --- /dev/null +++ b/probe.json @@ -0,0 +1 @@ +{"probe_version":"v1","embedding_dim":5120,"model_path":"Qwen/Qwen3-VL-32B-Instruct","dropout":0.0,"layer_index":16,"num_layers":65,"probe_type":"linear"} \ No newline at end of file diff --git a/probe.safetensors b/probe.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c73946579852dfb98578d2284156c5357067d962 GIT binary patch literal 20636 zcmaHyXH*nj@a6%@ISB|z7LcHT2uxRjOy&S&bhvaYXvmJ{e=ur&$T} ztg@!hnLNTuunO+yX60lxZFKtoyh?UWak9!vpPrsNY2?UhBc@9~ne5`^p0dE{|9*Ye zh%uu^Pyhd%z^T8ho6G-u7I!B%7k4k|nHE_5@8eoX50vw=#cggos4ymowT3T;#}~Aq z)Gvb#4_TpdsytThYoi65wV^s-BxGy9mWZ0j;TBscBt3(pQhM=$xY-qH2~7XgSAdK0 zD)D;rMKVylC60UZO7wbko;(a~vFO-Wu}Z%SD(69N>x z(EM%&EOg9;>Lp`n%-T<29kmu3tp{SXTmY>3%=A(@8~VhgpmqRN#GjfjE?wRVMu{Eb zgtStctt6ijO zud~qH(FM;FmEm|VfLw_Z|9Yhh`rp&BVMc@KnZE|!sT#nb*}nMLcPIEw8&4k^yDTYakSxMU${ej~#Z~;@c5d1PgG-fC2lVs`3#yY}BNRQKlSn?G{|$pajLs{v)qV z4)n$G51dSSBd+rciybf-O>Y^up{czs2o_y_h9<+DXliLyvA#e31 zP_S@O-d0 zo^>|E1<|tTrlrgg-M#qYIVWiUeS)$*>nK{V=X0r>;macv$^QRb@Wp{=;ug^w8}nVb zy?Z!7*9-8MQ{ws^O62nX82HaRDKw2z=jGb{Y2S$?T;tahRsGEIXWujqTCENrL$-h% zMzE@bv0&;fgG(kf(19=gc=Ohtn5nD9N-76w(RLX$F)-shmCi8Ybpb8bh~dAU8|c^- z7rGx9EX2LJ2DNW5(wVtVVEnM0*hvoc;+@1h4vOg8^DF%lzJS>7OE2zJ)0q{mLY87Q ze@j^d>yla}J>N)hvHo$W%QC@n@CNGZI-q=mpAena8)5V-afOPj5OsDAq%BtGBlk~% z(}|~|{;m)lP%s$t)8Es|;lp^~`Hx=dS#QPCZiZxe=bWh1c@!!X2l8LTHlecf4g}Y` zbJ!*u^xp4J+e6oh5`{jj>S+SoXBu+QC*ZSZ`-`2L`4w7I_X$g%?gy*p?i93d6dcG6 zqKm$l#Sg-JNy_MGz8YU4%os~BFm|;#Cb|GJTn*q|+F=-wHcwJJ@GVq~{2>;bt`XKx zyGnr{L}<$FgcQVNIdILMU5srYo`O|*z#t9ZX zno-pE1kQfjgFTn^`dV-qO2El{|A9Pd*6oQ)S zDBM*WhHYIBR}T!u;=#K7vZDa9r+t9tJ!-r^%$YsjT62;1K6-1e$k(4*V)*6m_}QW_ zDx{e5e7zRBTX35yS7>AN3rBWKkHU0sH;x^%R%lEO#gXoa}#CCXS~pcuY7af}W>bhxD#&@!-^@5Itfa)gQH{t=cbOl-xEUe9CKK$?LaJ znP&?dK09*Qmu_4w2H0PtAky z%+DOURp7{1Y7*GAJ(xRO5Ni%!p#!!qY&CNs-nZ^yaG8C_%H_PzWD& z5fpyOaDK;Gc=9Te3x-^$VLM{k<(e`6t^6XseA9#9b&up>eVb`{nZCZdmwzf)@3==@gLUEaW?inB-9{tQ|B#kv9yzVa z`J|E&?K`~`liIEX)6?Ix{*R&-25hSTMYS>ZH_t_4|` zbJm7lHm!$qf9%-cyCcPI^yc@UwBcBbH4fY|R-E=-pa$~|6(cWw7NUjjxWdhve_q`M z3k;k&Al^vwz2uO%{<;DCj)-Pc>pm#2kPD7i1f2Gwf%Jwra`Q8Da+xlJv#)hhc&i9g zywh;@B~RQS8lmtZ70*vifuZA7aC|~KI)*xNo%K=p`}ZvEGc;!Z-=%_);{&QZZUw*Q z4nvt$j$FU)J_IiNE$*6CQhsAYI!|#k!Z}?-MWw6dV14(UcxUzt@HkpWIbsp@N&O|P zyLLr5IzCf+pQv%()J3$oekv)N+mho#PtX(A(yCcaU^(O{?D(9(JC%${G1QflQ-XPx zS~bYhOW4!0No@J;kN!0-sOql9%TjJYsd5P&{~^!I61t%5o-Fqa+eNtS0WF^LRQxRe z7?vzp4r4kZ_(16hG5@eUcQts!`@JHW>l@M_|FaZh8Ae6B8>n)sF%9VN&MS}IC*SV7 zNZ!~0Q!M)9Or31;P9os6_BNXKs|6-|R)YFg#24??Xy?p6)HUiUXdMX#O$A%nw)9QK zrXjaUrnLahq9d2(+y|HI*I`pYi)iur8!esffS1>V!PZDcyyj}eE}k!8{lQBR8`}pb zU%ds+!{>+|^73@vbRo<*yO+*7*x(4e4bYGs__n49j;oKMsoQBcyZiP;K-?Ty6Xj*4p>Pn*Nyk?U*WZ8Fu{{w8@+VD{#U+t z)Bp$_JVH$KG2@pfUQ&Q_GZi%%@alCM{PT5RHeKw9p32SOk-Llj`9$Kj=m?yGSu8hD zsvCOxV41%N-6@eiKk?&nXCtsSB@iyHGvY7`z_sYWUzaNLTQwUjyRU(%5A<2w83F6v zj|#n7wnL}aHrO%v8pMy>Kx&&bFlx{tI4Jfe$8!Y`sW?%%(lr(yJ`U!gi;Q@orzRSQ zr!qWF$AED<5N4H1aJgDsWamW}r@nwOiSDGRw_FgFTS37wn$KL6L#M4bg*Wm~Y1_|X z?0!=ja~Bdxx*TEfKwmmCCRddIIaYkN^oOwI@-|vsk;TT=>tO2UmyoK|7o%TKpfUe_ z6y-~6A>?#4y*q5kJx57(^1>6eb4v;*w>OJa-wE)F!1_VSUSZE z)s(l2*129RTkDUS8Ox}9mMmMmeFUv}h)Z4{6}N|_p?9baF8r+wvRd(YWLhh=%=}D7 zz07$^%o1USu`BBZ9;e=2)>zbO0{NmZKejG~v)zNpYg-)Lv}vQIrXKWhSriVN5s!Ln z{CPofHcg7%4wrhpCp-I8csVx-g^PW0+4pVmV*Eg~`)$YPF1)8ZwNGKESr5$jx8tP0 z+sM}E8~8QaaquX2u<03(KYyNs#X877`8vGr_C<-~w=NqTCC#gfJLqu96IY8 zQDGyLmMwuGl}_rAJ0SiDwWL|LGht?QIA;yHKuxYroVTS`ygXeAJzj?M$-Z*b`?(rh zjtJ#UZw=mib1_BSb{FzX9ne?=XjXN`gJWI!YRYMtZ*xx2O4Gntm&I_S`5+|aTJZZ2 z8@9hNO5A5V9!@S&!7k-mSUdbJyghM&^jsAv->DS}O(xQ<{mWpPp+4Gv6fjY) z6vlk6hv2`N{Ok69xPA0E=&fHz_jWdc%OOqvPgj{W>NWXV-!Sw}w`P<5Nf@`%5))1a z^MkUTwARv$GRN78+jk$MwLN<!59Wl8HNKYBgDh!e!!%OuY}fS2h{fc z3Skrcv36e^j23GSLKrd(^{UD+eBT>j=C%bAZl_tR|NkLEJU}6`Xwfjod!= zh4Z!7V8)>vn1&hqN)8kZNg>heM3{SV8#pD6;MoVU;D6mUX8F<(FYSo+j8;h zOnUjK6Dp<`Q;eI8_<2GMs%IOBXTRR3A4>i_DEf85BsbQMuW{P%uPx#_L$}oFJk(I;ggZvtbZPejx zFH1g>sK%1W>*6Mti?pxH5KZoCz@^q7aF7?0FyjRE)vJZFeo^#Ib^lp08iTo zFl|mIj))J&hnkwazPB%yTx$@IY8!I9T?9s4QN$jPUsKBTX!h=JkMF)Nfa~9rc~Z+K z2=;tQuTCf9Mq59gw5`9iK1l#An-1E(bqRb+k!7<6H-54xtwQ*`R#+Xkf~HCmIrErQ z$3HnN4shB_&W_fYf2t?8PfHZ`S7_qqo>v6z^VY1_>C3To1?2H?Dcvh~!SS6NVA9q# z^rgUs%HtnV!uq>3reQp(NrGs%w2oNeuZtP+y6FDd1$p}}$j)p5m02NpVqGe4@@@jx zQLS2qS-HT3;Z_d&YrVZiF1NBiiL(9$R4n#kSnou&BpskC-u7@LHjq6_nS;Qpa^sC&Uu_(a#~r=KBOEO|1tv+DF*rQJ=Y;D-RWUo!ybATqlBaOMdPqV=RKbU zbqR7_pI~akQ(AB6irdCGbI(96o|iH~eDw8AMaO&*<}U2cr$dM0s5fW8xj+>oipEK% z>zQNB#MP8@*or(&C(uFNuW^1B(~U){L2*nUMHZ&K1Hkw zcE*oyOt^6606slS6$kEag7K+)C~EFhT5+mbOt*RuXGh$iKHU`Y-z5u-N>jrU<0E4D zP8+UyZOeN0I()BZGB5w)i8A-z!^CMZ{BfWqzVlbWi`!J#I?0_XWC+nm*1&}z?BLZZ6^y(8TK5!a7F+3nO+P-RPavARg&HOnpWaa ze(P2$ET7Nd6z^YQlqx|dg9i9!`-mJisPdy-N@DbVRoqw;j8^-#*~wdtYbrd%Mb~;j zS%)+3eU^oH(V09~n&XlY8hGoB7q(3pN%itUC>c>lXMDqjrrwW0LHC2`A{)Y+>@sN8 z4s~wN0`5O+7=P%e3KKLQ!e5TZ^xqAz{^(=j!`pgcL(l+BcN_s`CKccibewFzuZB4d zgE4%I1H4<*k8E~YvugG|Ixr*-RsY4X-r1GV^T=0mn_mN|4|CuzAxcWVf>PPj<}{ItkZPn(vTMqs}_d2r%XD)f5iOMX2z z3N^F6xZ!&WEpW@Dv-U;gYf}g79$1P-hquGAz3Mnc`v=^+f0ar!gZSNh36AdwC5!eT zPM55Q;|X6t_Ld8W-wMDlxszy#+$1vXv6U+H47hssaL&|ff)cBFG}R#kr?d~k;w1|~ z=^Mkf{bJ@Sp#_AlxO53TJRgVLxa^1)1zM_ zsr^haPCI3XxWJv$I>HqP#qj_LE-U{k*=R?Hg(AGYaASwR_1-ET_y2jsZ*lp3;v zKO6fzqR2;Ps5e)it-q45024|caC>$(y!oyJ! z7<;lWzv*a%fHkrBHR%wY>b0KMHrhbtP%U^-{~X#oJ_|P}5r15M0g&yA+kf@uo6qt= z5_%P6`WMmusV#y+jk!>yVgpCEbjR*g=+(QL( z3j46K%pvOGUI|JrW|*-rM;!cVIA(qc=1mg^@Du0vaKh*g;OSiOn5TuBZpM68xJl8n z!*EwOJG`!X6UL7C1q1V?% zOtd?&hx(W+N%#Lca=fC z!svDh1~)X(-N*6x>Zl2y?5v}<0dK@ny)`+0+IAYa7TNgm5YAB;jD?4`iy5a%Y2*Sw zUcE6I>SS!tC#f5R(II$t^F2)6bOcVXP(rzn>0tLSlb4M3;ob9&iKtUo^yFa24{)gG54i7i4Z@e3;FJP$`s=IC2W&=*pEsJ} z2Imecey|D6Dn2TiO-l~%kZ6T|7kexofXae&UpG|46)8bHxXJ>%&oA-Zc4Y{cd=KC$ z)57_I*JrPV4+Ahq%4@nUdkBs*ols)iL?!DTu%)*q<|da{+C0hE_UAsUqkqkk% zcvphoMmWXr3N<~oWQ&8wym7Gy#TU7YJJ;L+Bi%>O? z&Iw_oU0Ln)ed@o&75_Q?6YU-4`1E-nw9x)T$M;M4%@i%FcxpsdQjX#3Ru9dV1@P#y z0gpR<0z689%U!p^5m_6~^}S2+=PrT7&IxAMyd;~FNUVALLU32N66L@8vsZn#I6HW! zxZvj~TBjGqr3oR-X<6LIYbi~4*Co^ah48q~VJN*B4CBvhQO>e{~L zP~myHSJLWLa9WT1AA2g6@2dyd&Y`$);SNx}lf@}Nw?Ido7JCk~##zf=Le(fEHg!(L z+tt^^@3lVMxIPPitUeA|uK^234*>0PM!fk)e~!Fq&oiYQVL?x4{skvsq1-N*Y39v- z>l%futx0%$*D${CZ;m@M{IL4EALlg$Qq{p$2#)uGKc>An(BM3{j90>lUp8!Y)0)S7 zhqHNn7Q7k#8Gf}y@ue?C;{1vhTA?`%_isEe=%n{Uh0m6}{-`;AQb^^^i9Ind#FH=o zJ_Yg)rqZ!2qH%4PDB9K(zxh{5RQCt*?_qhMWg5go#AJ$6cHxcjftd1gI}H63%M#gO zoVPg!NB6q|gNj$d@t5hip-CIG)E>b8|B~@avl6P*{h&(F^EVk0&%Cv6#}gnqmB&d2p|$2X5|b!meZHNrpf37wlgu zvhHzTO66nX(@143Z97aIJwtfOkFQC*ZT=t`$+P637lhiv3{)i{4wH z*6k2X3(%(7zf5t-`&lqMH;#M1iU8$f5)L_&hSMYw7~lUixD?A`%J2Zzx|t&O8>@3lqm%Ja zU#YKi;~)*bo6P4cmrzh?Go|#O4VxTxL60PJw*TsZb~ClO>(zQm)%lC`x42krTlX3Q zOI2}CZXsN*(Z-Di16aZJGdYYH3pWzw(92H^UG=`x%58f=<*gEe^#rkUgATs=smq^E zv{I$7F7G;~$uGlB2{td%SwTON|Bf7v7kb;_vq3dtV{@0Fxm)1Q22cEwb`Saoe1oHB z_KH7@(?GvejvJ51W68G^IND|n!^ZgI+*VUo7RHDvluw>x&qEK@pR{9l72FJqgDZ{d z^deWvRU^##Yt#gC^&bVdOsgU5x;w9!DEbW$bZl`ek_agE-=f-KVr-fZJfV;ix4$@y+@lJLZsCI<#y~rXOGxIn7`Dcqg&hLU3 z|Ln0S%8`@G9N2rvZkY1>KkzB+&e|h&_`()@Y&tpz#ydLlZ?$hwTOjpURknLgO)RIQ zy|i(3!!5AZ=#8neEwCYbE%lxr2v4uwfC8WKP-LQrnJdbuaIOPCpOB7DJmP1WvnYN$5rM5d#XUA~yr*g+P`AH?1befiZDRm`_=PW*0$tBwJpy zX34E;2sv9w<62DdcX2)FdtHP(k1mo`Yc<3r-vhOeQcbzf1xxnH(wb;bblmxZQauAX zYSwv4u1*S$ZTb$QM|=Zc*_)z<*&rMpYKQrgZ_>%lfoP6kM!iqqa`u+i{AboEChsox6Zn3ym$%v?cUY|dvLAH%lj zJX$i+2GP_HRo?bT=cZ0DOZZ2pA6*l4wj35@65hg@)RPcV{f747D_XeeqVQ-?3eOp% zi*`RZ(fzVzbiSkxt`}=?@T{%S>q7$Tsp{h~k6mKvRTEShE{8L|O(yd%HI)0Jz_Xif zzTnpw&i0!VxOU$cK{f0@stHu&C4W~+y(mZiXJf*@25y(K8*f}XW-GPXcEhF1lyPCz z01gp);R7ivdz#${$1}Xq;cGeFDXpbPp)M?w{v1xvy&{fXZ;D=GnruWkvJqc+pL*YilZqeYn05Q6# z8~%R2p8N*6;o6LqFtV#1{P#NJ0p~Wb`Vq&AM>`u~EFH(qABc4ed-C6B zR>)>970K^DfKOO6=^wu(8Q1AdgBIxU>cn3V93d}!lx7vqf9a2EmTfTO`DWpv(kii! zwEmVq(Fy;mE>K}!77uGl!GuJ6{1M)r7g)>U#K{q2q_!59?8(GqgMY!fyB65^@+Zg@ z?xbk<@Fs{$vzZro!mq99!3~2)tdV@oD-cJ+rgw# zR+#MDNANF5#VQLK-YaaPt|59nyPGPX@Hs;=Qm<(Lj*pbrcR4Js3*Zxuw(P!Kf_d9^ z3qEZEzR4Jhzn!hPa%TXaKC_M1|))sGvAom_z-5!R;r+z|YQU+}IQbzAn#c(bsjn}8Xr5k^hz_slc zm7n%y7bi0uE1!mGuS_v!p*4pNZW66sq_yE)Gs;@LL@a1+0&nYm!X_^%BXBpy5g8dc z^OXmFC{3g{$lf4fYv->zh+ z{&E!l1c>ye-Ihnax@?`q=!1t< z55l51Qg5ef7+$>Y%b7P~u)eW`60<}w-Kj-shulf&y_TAyd z`akzp)T~XTmuG{q=2#z0n`p{Sx6H7YJkjt=!C0VR0vayy=&?B#L&|Q#w#A;@DfRsq zsQ;uEcW+Ls4MAHr!x73MTs9^GHs$W7(-k|s`tSs5Txp6e3kI>;4@DNt29>`%8pb&~ z>8!gWf%-3082dzn>Jb?*UD|zlLi5s(_J6C@;{z zM6c(Kq}rUL^z-IDy7XbGWTIvUn|P;iM6xO#^eqwB40}k+j+f9DpI&UbW1_erYLEEQ za0w(;hawtXg30|qQ8D?^WOe3aoLB>HsWEQO87~QB>mSnH03zI{4l&PHN?p;EZK8p8rx(GeDRb8+DDC`%=bNL zPPsC7Jnx6+@vR6^)ICE|s z{1M#8oTIJZ;#nA1BFzNYV#JI-Y~&`&;s$X{;IQeDdJ=@qIBTx;qy{$Key>;T+(n zBz=yGMCpL82F=v-o*TRTR^<(Lk@R+5GCtMy#oKRO_-tMTKHruPs+XMb>WNZF-I+}T z?;ZluJ0NBH2{2$!U+h0Knpw3QU!Lhti<2C9<@+jlsB{44-AtAGuhG1BoiksbF2~Oo z`*SaCYq0iEmINJAZVFdb1}KJCh$kRR3KtLpcd!7Z2nq<^6>5U!Cyo)Pd}O z=LuN*C(sZFJ+zYsd%AaBq(csQ)T0BCXCoYKNu*uJ;_7y?d?N}=ISwksO_R@MP z1T~6nQ6l5cSDg2P=cN#~?ib4Ab8Kny;TWpEqK<S5e$mHDEUw}6g9{8fzE$r z@Ke#3XSDlszmJ79x8f?;Rm-E!L2cAMW6$|!E5Y}`YH-M&E|hJF!(*~#()}SHEc;KT z%+>aMpoarqiT){A6m`%WePhA9%7|s!7s9%?ZIJk-LFk_bDszZr-Xsx7cQ z?krd&cu?~W8R7dKdGt0}LTA=b6msWyv5lTFnmw22AHD-Q@S_j^9-Ku7ixt?>K#4=Q zwF%8rqUrX@Fh0lW_-0doY1S$k)0X;>KHa3(n`HRGu9wi>qKzNdZKF9=#;ms`k`+=X z(7be4*l5-T^X)q5^)wCq)i1>>P3lq0MR1U6{gHOD zJSzBS~W`2e~;x6;BB66}$?oUU4Uu%rA3xYS{a^^fhSzxiK@d$<8* z72Bg*QD?=Bdv(&e_$(;SPM~>yMqFCgEdI7Hg@am01bb^8{HfWEzVrefU@zgCzRQKR zXMJ$Zm1`A=7wW`$`dR$Bst2D)G!#CWJ%){EZ<1$~8dQI?MU{dJQ20TUzuMdY>(^nl zx?`r-*Ge1Oci=HRF7?C8ooirt^?6vbJc!rlPX-@xG|4Z&471e(IZ5iZoV0Wk#_az? zmd!(W;5I$Ft5ZrvT7&U=Rt}X-X(c>xm8w0YdBWO%V0rB~*&VmX$veD7+4cJ%{cyDy zxBVA+wSS>`CxNp1)RTSPYT#SLX!!D5!uf^$Dy}d3Al4m?=88XkdG9*~d}FYjOso

`@W+EA!bj^2IN~DJP|eZO@%RlEdD&j|BZlFsnTlK#`jaFB<52F6 zDh_^~$+M0$(x^X`6?9kvw zHn^E;@R0k_P@N^sOWH;Aw2|j3`bI6Kn~y?K?WlDBYPZ7ws-pPNo8=%5TSKjlE?9ra z0gDT7QN@s(aBYe{~*yhblHuI_bpAv4x*3H7@9cxZH(W>t=>Hei2JnHpw z8mRkT?DhAcXy}{5`z>q0bFiEE-Q0)NW`@&V?|j(XN1ty8S5nt(S-kGygL{m>QCpB9 z9@2P922XB)Mt%p(R@If(_J8U1K^;7=u7I;G(y0DdIv1UB;~A<6Jiuuq1rFH))jO8M zE$vg{)z7+kKjIZlT3hZFnlAOyvol!Fb1r0_M%=gNBU-J=9gX_2-FbP)D;mHamxH-R z^B@IgTceV=5iUI~t`M{D3#WZu`L}s4#qM_py9ci9W^o$220egfE=91xdGi( z!?FJKV3a*0Uzpu)f{J-%u<7&>+Fp&EGbjztY%QkDzF8lG2FWq4L zzEm1NPLnofexmtL^{PbqV>QT|HN%*^DtPPU!6|lH z6>~oH$Gfd=q~~l!q5B8m`7=q>AxKdB#bU_$rH1q_9X>w>wW%O6<$K z4Af^Z$$uaHw{Cr(Lww` zeJAYu5ru1|EcAME9FEWmWxZoE+_23@a{r|*%2|7|MR0FUTd-H?`nMMDP0FTm|Cw|9 zPdPG^3CB;Li55J6OBSn@F?od zhK8b8{v^AHzCCkBvG*~G>sU|QBS(mXO%KyCb6GKtp;d`L3o+hFx?3yj?wCj57B zD0tO4@QI%~cr&a{jF#&sdff2fFPELMP4^*Oy_Cw|-}q3ep98*bl%?q|Dm*BrH!u7B zfhOJz;lcO3VY70bs3i6G{+cDTiWtC4{@V_v!QJtxbu3C0pTfYU!#PG;j~R{oCx%I) zV1DFKo;F6{QJn)>V{kkVmA^qjSymi!`59cgUq(CQdtt4;HQ_f`OnzGqR$sqB?(d!$ zWSj}Amc4mgx)P_53V%3ayvIHy+_{yXtiEM6BQREGYAj4{lxbiN@&D|;K=G&J~U;aqBD@~|=_8Hia z?1e|#u8HB1L(xCX0xfLbLEkN(Au%Qam*4i_+zf3j2zBEYX|80kQZMX3K&sE~7wZX+gLUV~=Vq?Q934S!OuQK%AaF+U2df{my6)W7g zK+ZN#d{b#E-s{^2L%&Aj$dtbHETkvCFjmD+vxi`ta}hCJT_m75f==nX09-W9XyFS6wM{?o?rF7i7;+HTfFPV#$y{EJhUxe-V zEwP@AIk)DXkl#O@Z8AM!XtQi`nTJn_kaEIt>!=B>z)^6-J!@k?>7o|yPgVvbDAVJ?1({D z?iep`!3sG7uiBEt@`^psd#MhODmei|AG@&Zr2rC20^!Mr?$qbf4N~8>7XGU)0G(ax znAF=5V}6E{Y-OwHIyQujuRnp>jb2z5>jUF_^?7{A1H!(=R!sgQvPGjQeX(YZIp?0T;=Ok9ctNKMw8tDE-C6qhZk`;iv`@j9Ujnx2A*Sq= z=4GQg;f2;fbZb8hy?-S^$Y3Q=d#MWl*$(*PX&6V`7Vwqv7g}tqfnD<>`QpEC;B+t? zzpT}veK?U^4kMovq&c`wBi7hbDAfJR#Pt=rJZSp>uGzf|%1eDHylp*Yw5@<1E46U` zZeRQ{rWfw?w#4f_eZ-$N8a&Cl7psa9=rgMgcMAr32dJ7LU&1RgOa5;s0>fw~92sJH4F)!jTS=zYCSIhxIa!A1-2Q1N8Nkb7k2 zQ%uQA>p}hRA()+@%`O zaDVum^2MtW25Fe1diU)V)S-nZ`@g1&4I#KVpq7rjcqPo*bO4GJ`rsFHO&m8ojyJDD z{JK^dGoFlw;<-hTsMwpZ#2))hS|K{0a*$?e6LFOg59fDUpr=e49K0KX&rTMJt~Zi+ z?%?kHTj~|26jeY$$uZD>=EkZ+H`5B+ktDfghb0|r=uAr&Jn#s`?U#IUtG*{rNVLXJ z(%i!O*P4)ie}j;9W<2bAsSht#D)1JgOEkGDmmZ#5>?Ks6p!sg$lvhv2L_tEPp z^R5H$mu3@`%QJ95x;p*+xr#Qb_r$7OtKhEE5y6{w(~c)2sKhxP`wRByG(rtss{RwI z$A;j)04rRyKMW6?uM~5>d9Y=3DlFTZh&}V~(9|WStp6nqpHA%$p-;_F{rUpvxmJt6 zozBEz7tHza;&8sR@(Fd0+-&gn!icY9x+cik6X{O!Y{(O@2R=rx6$ z&cJu2L)rOZAKn)oDjfqQoM7GyJ*GsVXJr&#e==UsusA|`D>Kk-eKKDh*CMFu48j(* z9D4nr2@0yLaQW{Poa%Z|oRr{(xh+<>G4mYdoc&45AeGxOh0n|#$~lJZbYZ)cAOA_@ zYYG-{_=YwdUT(;uwBJM5*B)y-gGtv*jW_k^i5-O>p)R16hK^lD?}m26J8$pOe~H6k z-s(i$_Aryz7Hi|{;qG{#rAjdJG{Yk+2l276jTAbpJ5O0zRIx~}R2)6bh8yzy>A9^A z4!BhX`+I$+#SVZ&roEzLRyB0WBnvfDKUK_l>c|&W%sAJga8_Cr z?|r)x)=ZRk7#a28aWOJj`zjICG{eyO`3GulOGg=F7xoJYr)AblU}5V#c)O<&ng;iR zUl!Wju(be2^+{$!xnT6j=ngkhYGGoMs?=jR3;Qnp75g{_Nx8iR^m^jP3%l0RZ|zWi zvRoZYPGy42urQo3OaX4i9e|k5(_+d&W7e3fiw>~}cR5E_3|J-bTAlSYO@IXHykp#9)4d5 z6K!TiQL4ibKG)@o4r|*hWMw*GeRL2X{#z>kXK;dIujYW~2Y>F}r9{_OIPsj)*K~Hd zKRz9(h>yNKrXBa(pmxs&x?Sfj6t5hLTP%K3eN-}Dy3z|5>JH;llR~<#mcd>5U&+#0 zhIReDc~ZR(4w2q3d*k+kUgr*RrAq>@v6Qa8O%G^+XAoX3J3y~b8lZFJJJ|ScH;t9{ zntiPJDIWC+U?fw1dH5k>Tq`T^d@MX%S_DR}J=9-l4CMy~}l3{n!#bl(cej}vgN zaW5_l%@YT#b->$-qA<0w1KNX4q1~=c=&O;5cS_gO5gj))7$FC}&a{wxm;s08zJaR- zQB>Nj#aC1vaPJa1ESvS2p5&&}jlDAH@h*|`jO5Vls1Ls=Zw9l9$0P>!!2<;b0N$=# zS!zZXQ;JCaVHwP*P2utL?Xi1}JdNI4L@MVxz;vY|IN#Mn%O}Pp(Hg>wc1q{9*-^@H z--$NbE)d+(Bq^W0isA-o8j=T$AUor&ug9Vm^{el{&K>ip$1HR=+a7C06`J06EshNhL(kD)&SrfrZUWVOT zYbiK%1+^LsK*{fL9(lGI&ZqY070C$0CE*!i-x<8Xc*Zh?d5KcU7q@qEW1 z3BO+60gIN_Q$H=lf8Mcd_E?4GRs`{`9Y$EYeHED*XJLBl7cy$p=0lD%DER9jlB|3# z?m4E*nX@v`{do#o4@}@k<{!ns^KJ>ZmbHl|DrKO1{zuA`X1u=_hGXfu4WziDMDot0 z2f5u?0HqJ@F;X!WqpsR=|1H(<)!(1Dx*nn@y)V-5ecJGIXrpNVHXUrh1?e9}DHQ5%8-m36(_b~4HHM%^;vy1LYbGTNl zg(v2$qfg(w`S($4&iQ7(z*=i#ny0N+@w#C_Iug5tI;F;(`J&}Zp; z5=*!&l6!i0G}2l63(%`i3K_N!fM6B^iFTEr83b6=;Lr9XhpD2d&+; z=tr9u{u^%1Yep^=W_yU#?_jl1kQL5THfDp1Src@98iIq1y;*yW3#6MBgG!RRSpB(* z<_(PG+iP5K*f3{UVz~=?uUH0Fk0P;Ry&Znp5-TLfrnCD}Cw^+xlMmXvqNDtBpqpBJ z?MVWwbz4bGr(K7$KMnBYlwaVy=L%?AEf%}SZh-evoise!0DV97#W`Pfsr5fkJoNHA zSX)^k-FL@P^D05FLR&Np3Z!y%MeN(RC!SiGDqwG!Ambfp;lbuLJJb@0={wX}B16G$jC=gVfk*kf-LeGKS?@CDmw?O#h# z=EgGNTY@zel}hW1!o`#^_%K9$)4|DFiz|9&J5bVdBmObn4kHfvvEttC^x@ezA-U9& zv*sFLU~&Yeo4a!C5G8E+5`%N2Qs}qT2WWnxfi6p8c#3v|sJc|jq)I~YuHqlq|8fT1 zn)V5*0t=~|xLwe%=%jpG*+TLc2-F;VwKb`(F*5_aoQ)7sn|oWJFd&GBS#+)aQK; zi6qgGQA8v+QE9l{QYsC%G*#-hP>61HDM_FAISpJ5w_Bns+8I~tE3~OO#qZ2-f6H#!e~oOWXfg_+QfrvT_JV9n&wMqA!Eo`R5^J z#4fS=R0cG3yKqc&HjNos3p#s%*V_;0j@pMpQGN?lE7sEc9e#K&tx)I)Dir^{H4j?r^t_6pWv(~D(a(@R zx%?^HDQTCu+?nKa(wpb3I6%8sS#!+gF_@O}icDP})5VpMoOEF-mi?^8eHtF<;5iS^ z<$Fta0_^$y*gnuH8-_y%mecVAJ#fO>9eX>XP?3Z2ja4s<=_#O?-Htdh%9F=0%MlV) zztcbAt>hKi2BUXfpeLXJ*>!H5p*o0fDJ;T(4d#4Gd+{WD$?M;YxGyX*QDF5oUDV}a zDMV<5z!pPqa*S%cYIkb@p0)M>q03o#_GTm}rLK`X9{-&pt}Vc3RXq-i*h_&Ki?O9s z>V_o0uVG||zkCT|kHXOyC+*7hOg@O(PZqJAqAuIj9Dpfl0cf~xC>Plqv*y|tP(5}s zbfzTG*hn=|OJW&UpZ4XoS0Z7;#J&)#T%8&ZcGa*{^O29UC?g{dyl$hVc}Pm$;OqnsMCrNr&5N`d$6^@G~kLroy+o zXF+c0Dk@J2C#7mnI zcXK)RwJF5Uu;F{wo?P9bA?~xgNq=-@(Y1e*>7x5{+V2>_&&{(0qbL1EF*y@DHl78; zc0=B+Y{6&#T2DW8*TBZYR(ks*3QjgV@#iZlbUrgieiat-uiLD+PI)v=9UBiFQvdZw z`!#B>G{Vdcj_7iwj;hv_z$#5A{ID+$iag&+I=-8FU2QQ-!Cm;gaJx7t!vH*QoQKkq z%QV@1AZt80I%(;q9HDcn5{2ui;dqJpK7RQvEG*Z=hnF-k#(N6a-CBUQi{Fa{10+4@ zv6^xt(y8p}DdA{h1%2D2g%5U@!QWdau-Vb=^7ObVyk6S<)qDH$U$)-jQePcvUSh(5 zh9U6px_XK*u;Ip$YScfd0g4~qhv$zC#p1eHx>h`oHNXeU|C-M2zobFrks4Z!QJSam^e+O@TwqwHQor_)yDJc#$di1vKA8D_tL&%Js6dyhtEn=WXf)i z+}};c50=ithBRwlZk$QqoxQN*y%BzX_!HIK_?6~m-G^Qd5~9kY#evOVxqsXn$EDzntSz>dZC zT+-Vu%!~a_t@0&c7x75);5OXkHG!K$JgBYzB>XI6h)yu&HZzGyHr3hCRNE`rPG{VMLs<~PqewHz+nX+iUK6NsmQe5?Uf0`REUjG6%m(F2TCFH}ao!M-z8SZft&}6(1U#Y1kU-Fi=H?1HZ zI4QA@rDFV&bZQ$uf~O2vCH5_}#loa%cTOC*&V%obbX9ci1$2 z7v$YH!I(XvY@Q#6_iI(rugC&(}e6-(GwCi64D=!w2>&!r$ z5nlttDldsM-}}p75ba^3S~A@ZoI-&>Vg`CgoAd}GDij1 zzOlvHZXJBsH3?cXLO3ucQ7~FJhTRl0Dcms%E?8@G(GXqyHAUb{BZh*VLLZp^0O$#q z(V~Mh;JwW|Nb6L^$L1w4vBsZ9B>D03C&uC5A>sXedA#HWp&Ow7a)w?_(ML$U&ap0w!_(%G^A;nsKy zcFHlMy4EmMF3T1^og7HBH%`DEf%oMpLC;}u;6ow4ZZ?ijv&R+wtA*A3hw+&0^@6Un zo%F9f59d*aLn=ct?d3FZI;e$iQnuMYtc)NnhzFV2QNU(rzOV9y8dB$S)^1m9i_r(S z-$|}4IKj^DMYv~#EyhZ0na$;Oj+I5k~b}Apb?9X&`f;? zVb<$lPOI8XsriSX>V^tGob7;0A3~&CKCaxBa}o@PctDfOP)`1J5e`VPLKW2%N>*(q zt8M4eP;-bd+7?PapBZzH%07QuEXOgX<`rwVUDJcDI3ja|w2UY*{#3%bg zaA(&{K6Z8po^5LprZqN$PkbXpb-V*Dv%5mW$}-{d5?45OV+I?Jb3g;ddTM$xo3Ewn za)6|#@@p?ZM0z)v2QNT3nL0dsI)Z=n8S(zcTa@%{I_|7+0rzp?ygjoLk`9Ehqpu5_ zzMYBY*}i=C+b-eN(omf3U;!1{Bu=tnNZhS~wQNrn_x4f8?_T0}#d;VruNm^oZTaN( zQhGc*NPhgCGd2|3Vc>N@H#28&SDVb?&kaC5XbZhMP)P6g8neax`P_P>hl*4d!Y1k6 zTYk|(5d$hq$R*(-?a4ewrN<07=e#=h&~3;)x$Sl|J>mG8tP30gqhmz!~_27UUH|?4!rf>abT{eOy(0!|SL7)xc&I@Qg5H{-pM@qbju(ac9xL2% zB~i$Y55TN#hS5`!iA(5{tY`Nje&d%TboDbjPDR+x2W8aBUP#8z{p?rxMHwRvUIx{wu=80U+lFD&3~NuFGHQXj`HvtqGT z`b$mBaNtkRY2!x=ylCu;IZK_ys%Hf>;bbMy-v6K=Z97DbSH~I7)7h6D`1ZhbdLbK$ zXH3E{p+RD@1FPhAb}6u~_dn9Ie@gOQ195Qs2-tF9Dy}k-;o6<~^rTf)u+5%MPhUsj zu=qlH>u{P{=MQGLm~T)zMNTV%&cG4x7ZPvHKS^4m3c>a1Ke_zr@C&D^}Q9fvmg*cp+W?2!dZ*N3v* zza!XSbTI$KgK4mWfU%uM=y|~ldiC{qtkrz{Iye(5>}|yFx-P7ruwM95un5DH-7(?L zJ@SvX;ZUtwDs=gq)@XOjdOkPE%@*ILk-2vG{kkJ+mds^i>j~(pVF$Z6c0$e8!*JmU z@X6e8IBh+di%uJ0zkQy3=ZG$?&U{J>IgsV$5#(k0K-v+{7Tbp(f~DuI;kBI;&g~us zg+EWi6hEmG(tiMx3)cvDrJO5qKp;j73&CU5SPrki zcjz|K-o`XiF^%BtrTwX?-xCV8(?XXmz48dX6lk1o!g*;%%xeBzf8mfkU3(OJKJw%X z$#Z$Dbb~I=c>%4=*W$Xn!(mTKiO>@L37S`CLBE$fXx?9QP`g`)HNWLuiH`pQj{KS4 TW%j}n_v7$jK$$o;z6<^joB08E literal 0 HcmV?d00001 diff --git a/program.json b/program.json new file mode 100644 index 0000000..f087d28 --- /dev/null +++ b/program.json @@ -0,0 +1,48 @@ +{ + "traces": [], + "train": [], + "demos": [], + "signature": { + "instructions": "Evaluate and compare the quality of two responses (Response A and Response B) given a specific question.\nDetermine which response better addresses the question by focusing on factual correctness, completeness,\nand adherence to any specific requirements mentioned in the question prompt.\n\nBefore yielding your decision, think step by step and explain your reasoning in the reasoning field.\nBe sure to verbally express your uncertainty in your thought process.\n\nDetailed Instructions:\n\n1. **Understand the Question Context:**\n - Ensure you comprehend the full context and requirements specified by the question or problem statement.\n - Note any domain-specific terminologies or conditions.\n\n2. **Evaluate Each Response:**\n - Check for factual accuracy in the content, calculations, or recommendations provided.\n - Assess the response for completeness\u2014whether it completely addresses all aspects of the question.\n - Verify adherence to the specified question requirements.\n - Consider clarity and structure of the explanation or solution provided.\n\n3. **Decision Making:**\n - Determine which response (A or B) best meets the above criteria.\n - Select the response that is not only correct but also most aligns with the question's specific requirements.\n\n4. **Output Your Conclusion:**\n - Document your reasoning process in the reasoning field.\n - Output \"A>B\" if Response A is better, or \"B>A\" if Response B is better.", + "fields": [ + { + "prefix": "Question:", + "description": "The original question or prompt" + }, + { + "prefix": "Response A:", + "description": "First response to evaluate" + }, + { + "prefix": "Response B:", + "description": "Second response to evaluate" + }, + { + "prefix": "Reasoning:", + "description": "Your step by step reasoning for why you chose the better response. With verbally expressed uncertainty." + }, + { + "prefix": "Label:", + "description": "Which response is better: 'A>B' or 'B>A'" + } + ] + }, + "lm": { + "model": "together_ai/Qwen/Qwen3-VL-32B-Instruct", + "model_type": "chat", + "cache": true, + "num_retries": 3, + "finetuning_model": null, + "launch_kwargs": {}, + "train_kwargs": {}, + "temperature": null, + "max_tokens": null + }, + "metadata": { + "dependency_versions": { + "python": "3.13", + "dspy": "3.1.3", + "cloudpickle": "3.1" + } + } +} \ No newline at end of file