feat: монорепо миграция, Discover/SearxNG улучшения
- Миграция на монорепозиторий (apps/frontend, apps/chat-service, etc.) - Discover: проверка SearxNG, понятное empty state при ненастроенном поиске - searxng.ts: валидация URL, проверка JSON-ответа, авто-добавление http:// - docker/searxng-config: настройки для JSON API SearxNG Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
17
apps/shared-utils/package.json
Normal file
17
apps/shared-utils/package.json
Normal file
@@ -0,0 +1,17 @@
|
||||
{
|
||||
"name": "@gooseek/shared-utils",
|
||||
"version": "1.0.0",
|
||||
"dependencies": {
|
||||
"@gooseek/shared-types": "*"
|
||||
},
|
||||
"main": "./src/index.ts",
|
||||
"types": "./src/index.ts",
|
||||
"exports": {
|
||||
".": "./src/index.ts",
|
||||
"./serverUtils": "./src/serverUtils.ts",
|
||||
"./splitText": "./src/splitText.ts",
|
||||
"./computeSimilarity": "./src/computeSimilarity.ts",
|
||||
"./files": "./src/files.ts",
|
||||
"./formatHistory": "./src/formatHistory.ts"
|
||||
}
|
||||
}
|
||||
22
apps/shared-utils/src/computeSimilarity.ts
Normal file
22
apps/shared-utils/src/computeSimilarity.ts
Normal file
@@ -0,0 +1,22 @@
|
||||
const computeSimilarity = (x: number[], y: number[]): number => {
|
||||
if (x.length !== y.length)
|
||||
throw new Error('Vectors must be of the same length');
|
||||
|
||||
let dotProduct = 0;
|
||||
let normA = 0;
|
||||
let normB = 0;
|
||||
|
||||
for (let i = 0; i < x.length; i++) {
|
||||
dotProduct += x[i] * y[i];
|
||||
normA += x[i] * x[i];
|
||||
normB += y[i] * y[i];
|
||||
}
|
||||
|
||||
if (normA === 0 || normB === 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
|
||||
};
|
||||
|
||||
export default computeSimilarity;
|
||||
17
apps/shared-utils/src/files.ts
Normal file
17
apps/shared-utils/src/files.ts
Normal file
@@ -0,0 +1,17 @@
|
||||
import path from 'path';
|
||||
import fs from 'fs';
|
||||
|
||||
export const getFileDetails = (fileId: string) => {
|
||||
const fileLoc = path.join(
|
||||
process.cwd(),
|
||||
'./uploads',
|
||||
fileId + '-extracted.json',
|
||||
);
|
||||
|
||||
const parsedFile = JSON.parse(fs.readFileSync(fileLoc, 'utf8'));
|
||||
|
||||
return {
|
||||
name: parsedFile.title,
|
||||
fileId: fileId,
|
||||
};
|
||||
};
|
||||
12
apps/shared-utils/src/formatHistory.ts
Normal file
12
apps/shared-utils/src/formatHistory.ts
Normal file
@@ -0,0 +1,12 @@
|
||||
import type { ChatTurnMessage } from '../../shared-types/src/types';
|
||||
|
||||
const formatChatHistoryAsString = (history: ChatTurnMessage[]) => {
|
||||
return history
|
||||
.map(
|
||||
(message) =>
|
||||
`${message.role === 'assistant' ? 'AI' : 'User'}: ${message.content}`,
|
||||
)
|
||||
.join('\n');
|
||||
};
|
||||
|
||||
export default formatChatHistoryAsString;
|
||||
6
apps/shared-utils/src/index.ts
Normal file
6
apps/shared-utils/src/index.ts
Normal file
@@ -0,0 +1,6 @@
|
||||
export { hashObj } from './serverUtils';
|
||||
export { splitText } from './splitText';
|
||||
export { default as computeSimilarity } from './computeSimilarity';
|
||||
export { getFileDetails } from './files';
|
||||
export { cn, formatTimeDifference } from './utils';
|
||||
export { default as formatHistory } from './formatHistory';
|
||||
7
apps/shared-utils/src/serverUtils.ts
Normal file
7
apps/shared-utils/src/serverUtils.ts
Normal file
@@ -0,0 +1,7 @@
|
||||
import crypto from 'crypto';
|
||||
|
||||
export const hashObj = (obj: { [key: string]: any }) => {
|
||||
const json = JSON.stringify(obj, Object.keys(obj).sort());
|
||||
const hash = crypto.createHash('sha256').update(json).digest('hex');
|
||||
return hash;
|
||||
};
|
||||
74
apps/shared-utils/src/splitText.ts
Normal file
74
apps/shared-utils/src/splitText.ts
Normal file
@@ -0,0 +1,74 @@
|
||||
import { getEncoding } from 'js-tiktoken';
|
||||
|
||||
const splitRegex = /(?<=\. |\n|! |\? |; |:\s|\d+\.\s|- |\* )/g;
|
||||
|
||||
const enc = getEncoding('cl100k_base');
|
||||
|
||||
const getTokenCount = (text: string): number => {
|
||||
try {
|
||||
return enc.encode(text).length;
|
||||
} catch {
|
||||
return Math.ceil(text.length / 4);
|
||||
}
|
||||
};
|
||||
|
||||
export const splitText = (
|
||||
text: string,
|
||||
maxTokens = 512,
|
||||
overlapTokens = 64,
|
||||
): string[] => {
|
||||
const segments = text.split(splitRegex).filter(Boolean);
|
||||
|
||||
if (segments.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const segmentTokenCounts = segments.map(getTokenCount);
|
||||
|
||||
const result: string[] = [];
|
||||
|
||||
let chunkStart = 0;
|
||||
|
||||
while (chunkStart < segments.length) {
|
||||
let chunkEnd = chunkStart;
|
||||
let currentTokenCount = 0;
|
||||
|
||||
while (chunkEnd < segments.length && currentTokenCount < maxTokens) {
|
||||
if (currentTokenCount + segmentTokenCounts[chunkEnd] > maxTokens) {
|
||||
break;
|
||||
}
|
||||
|
||||
currentTokenCount += segmentTokenCounts[chunkEnd];
|
||||
chunkEnd++;
|
||||
}
|
||||
|
||||
let overlapBeforeStart = Math.max(0, chunkStart - 1);
|
||||
let overlapBeforeTokenCount = 0;
|
||||
|
||||
while (overlapBeforeStart >= 0 && overlapBeforeTokenCount < overlapTokens) {
|
||||
if (
|
||||
overlapBeforeTokenCount + segmentTokenCounts[overlapBeforeStart] >
|
||||
overlapTokens
|
||||
) {
|
||||
break;
|
||||
}
|
||||
|
||||
overlapBeforeTokenCount += segmentTokenCounts[overlapBeforeStart];
|
||||
overlapBeforeStart--;
|
||||
}
|
||||
|
||||
const overlapStartIndex = Math.max(0, overlapBeforeStart + 1);
|
||||
|
||||
const overlapBeforeContent = segments
|
||||
.slice(overlapStartIndex, chunkStart)
|
||||
.join('');
|
||||
|
||||
const chunkContent = segments.slice(chunkStart, chunkEnd).join('');
|
||||
|
||||
result.push(overlapBeforeContent + chunkContent);
|
||||
|
||||
chunkStart = chunkEnd;
|
||||
}
|
||||
|
||||
return result;
|
||||
};
|
||||
27
apps/shared-utils/src/utils.ts
Normal file
27
apps/shared-utils/src/utils.ts
Normal file
@@ -0,0 +1,27 @@
|
||||
import clsx, { ClassValue } from 'clsx';
|
||||
import { twMerge } from 'tailwind-merge';
|
||||
|
||||
export const cn = (...classes: ClassValue[]) => twMerge(clsx(...classes));
|
||||
|
||||
export const formatTimeDifference = (
|
||||
date1: Date | string,
|
||||
date2: Date | string,
|
||||
): string => {
|
||||
date1 = new Date(date1);
|
||||
date2 = new Date(date2);
|
||||
|
||||
const diffInSeconds = Math.floor(
|
||||
Math.abs(date2.getTime() - date1.getTime()) / 1000,
|
||||
);
|
||||
|
||||
if (diffInSeconds < 60)
|
||||
return `${diffInSeconds} second${diffInSeconds !== 1 ? 's' : ''}`;
|
||||
else if (diffInSeconds < 3600)
|
||||
return `${Math.floor(diffInSeconds / 60)} minute${Math.floor(diffInSeconds / 60) !== 1 ? 's' : ''}`;
|
||||
else if (diffInSeconds < 86400)
|
||||
return `${Math.floor(diffInSeconds / 3600)} hour${Math.floor(diffInSeconds / 3600) !== 1 ? 's' : ''}`;
|
||||
else if (diffInSeconds < 31536000)
|
||||
return `${Math.floor(diffInSeconds / 86400)} day${Math.floor(diffInSeconds / 86400) !== 1 ? 's' : ''}`;
|
||||
else
|
||||
return `${Math.floor(diffInSeconds / 31536000)} year${Math.floor(diffInSeconds / 31536000) !== 1 ? 's' : ''}`;
|
||||
};
|
||||
Reference in New Issue
Block a user