@app.post('/generate_search_ideas')
async def generate_search_ideas(request: Request):
start = time.time()
request_id = str(uuid.uuid4())[:8]
print(f"[{request_id}] Request received at: {start}")
# Retrieve the session cookie from the request
session_cookie = request.cookies.get('session')
# If no session cookie is present, raise an Unauthorized error
if not session_cookie:
raise HTTPException(status_code=401, detail='Unauthorized')
try:
# Verify the session cookie and check if it has been revoked
decoded_claims = auth.verify_session_cookie(session_cookie, check_revoked=True)
# Extract the user_id from the decoded claims
user_id = decoded_claims['user_id']
except auth.InvalidSessionCookieError:
# If the session cookie is invalid, raise an Unauthorized error
raise HTTPException(status_code=401, detail='Unauthorized')
try:
data = await request.json()
print(f"[{request_id}] Request parsed at: {time.time() - start:.3f}s")
userInput = data['userInput']
systemPrompt = "You are helpful and assist by generating related ideas for brainstorming."
# Time the LLM calls
llm_start = time.time()
isFiction = await isFictionRelated(userInput)
print(f"[{request_id}] Fiction check completed at: {time.time() - start:.3f}s")
modifiedUserInput = userInput + " Unconventional ideas please." if isFiction else userInput
initialIdeas = await fetchIdeas(modifiedUserInput, systemPrompt)
print(f"[{request_id}] Initial ideas fetched at: {time.time() - start:.3f}s")
print(f"""
Time waiting for LLM: {time.time() - llm_start}
Total request time: {time.time() - start}
""")
validInitialIdeas = list(filter(filterShortIdeas, initialIdeas))
targetIdeaCount = 6
if len(validInitialIdeas) < targetIdeaCount:
print("running")
additionalIdeas = await generateAdditionalIdeas(modifiedUserInput, validInitialIdeas, targetIdeaCount - len(validInitialIdeas))
validAdditionalIdeas = list(filter(filterShortIdeas, additionalIdeas))
allIdeas = validInitialIdeas + validAdditionalIdeas
return JSONResponse(content= allIdeas, status_code=200)
else:
print(f"[{request_id}] Response ready at: {time.time() - start:.3f}s")
response = JSONResponse(content=validInitialIdeas, status_code=200)
print(f"[{request_id}] Response sent at: {time.time() - start:.3f}s")
return response
except Exception as e:
logger.error(f"Error generating search ideas: {str(e)}")
raise HTTPException(status_code=500, detail=f"An error occurred while generating search ideas: {str(e)}")
async def isFictionRelated(prompt):
systemMessage = "Is the following prompt related to fiction or storytelling? Reply with 'yes' or 'no' only.";
response = await openrouter_interface(openrouter_api_key, prompt, model="mistralai/mixtral-8x7b-instruct", system_message=systemMessage, max_tokens=10, temperature=0.5);
# this returns false if the response is not EXACTLY 'yes'
return response.strip().lower() == 'yes'
async def openrouter_interface(openrouter_api_key, prompt, model="", system_message="", max_tokens=750, temperature=0.9, top_p=0.7, top_k=50, repetition_penalty=1.09, max_retries=1, retry_delay=35):
"""
Interface with Fireworks.ai API using OpenAI compatibility layer.
"""
client = OpenAI(
base_url="https://api.fireworks.ai/inference/v1",
api_key=openrouter_api_key
)
# Map model names
model_mapping = {
"google/gemini-flash-1.5": "accounts/fireworks/models/llama-v3p1-70b-instruct",
"cohere/command-r-08-2024": "accounts/fireworks/models/llama-v3p1-70b-instruct",
"teknium/openhermes-2.5-mistral-7b": "accounts/fireworks/models/llama-v3p2-3b-instruct",
"meta-llama/llama-3-8b-instruct:nitro": "accounts/fireworks/models/llama-v3p2-3b-instruct",
"mistralai/mixtral-8x7b-instruct": "accounts/fireworks/models/llama-v3p2-3b-instruct"
}
fireworks_model = model_mapping.get(model, "accounts/fireworks/models/llama-v3p1-70b-instruct")
messages = [{"role": "system", "content": system_message}, {"role": "user", "content": prompt}]
print(f"System Message: {system_message}")
for attempt in range(max_retries):
try:
completion = client.chat.completions.create(
model=fireworks_model,
messages=messages,
max_tokens=max_tokens,
temperature=temperature,
top_p=1.0,
presence_penalty=0.0,
frequency_penalty=0.0,
extra_body=dict(top_k=40)
)
response_text = completion.choices[0].message.content.strip()
return response_text
except Exception as err:
print(f"Attempt {attempt + 1}: Error during request: {err}")
if attempt + 1 == max_retries:
return f"Failed to generate response after {max_retries} attempts. Please try again later."
time.sleep(retry_delay)
async def fetchIdeas(input, systemPrompt):
# category is never user
# category = await categorizeInput(input);
commaCount = input.count(',')
userMessagePrefix = ("Generate exactly 6 potential video related ideas for the following combined concepts that the user input -"
if commaCount >= 3 else
"Generate exactly 6 potential video related ideas for the following user input -")
inputEnhancement = "";
randomValue = random.random()
if randomValue < 0.07:
inputEnhancement = ", vox perspective"
elif randomValue < 0.14:
inputEnhancement = ", slight influence in terms of concepts if a speaker from TED talks were to be conceptualizing these"
elif randomValue < 0.21:
inputEnhancement = ", vsauce perspective"
additionalInstruction = "";
additionalRandomValue = random.random()
if additionalRandomValue < 0.08:
additionalInstruction = "Keep in mind, very strange/unexpected concepts are encouraged. Just make sure that they are compelling."
elif additionalRandomValue < 0.16:
additionalInstruction = "Keep in mind, very strange perspectives are encouraged."
request_message = f'{userMessagePrefix} "{input}{inputEnhancement}". They are looking for video ideas related to this - so try to make inferences as to what they may find interesting/may be looking for. For each idea, format it as follows: First line should be the title in quotes (e.g. "The Hidden World of Dreams"), and the second line should be the description. Make sure each idea is separated by a line break. Half should be a little bit more loosely related to the user\'s query (for variety) and the other half can be a bit more closely related [Also make sure to add in a couple that are a bit more unconventional/out of left-field]. Also keep in mind that the context for these videos is that there will be generated (solo narrator) and for the visuals, we have an art generator generating imagery/visuals throughout the video that correlate with the narration - so you have some context; keep that in the back of your mind. Remember each description should be 2 sentences long. Make sure the ideas fall into the category of either interesting, entertaining, captivating, or compelling. {additionalInstruction} Remember that the videos are kind of driven by the narration, although the visuals are still important. You can provide some direction for both the narration and the visuals.'
response = await openrouter_interface(openrouter_api_key, request_message, model="google/gemini-flash-1.5", system_message=systemPrompt, max_tokens=1600, temperature=0.65)
# Split into individual ideas (title + description pairs)
raw_ideas = [idea.strip() for idea in response.split('\n\n') if idea.strip()]
parsed_ideas = []
for idea_block in raw_ideas:
lines = idea_block.split('\n')
if len(lines) >= 2:
# Extract title (removing quotes) and description
title_match = re.search(r'"([^"]*)"', lines[0])
if title_match:
title = title_match.group(1)
description = ' '.join(lines[1:]).strip()
# Apply replacements
title = re.sub(r'vox', 'thought-provoking', title, flags=re.IGNORECASE)
title = re.sub(r'vsauce', 'intriguing', title, flags=re.IGNORECASE)
title = re.sub(r'ted talk', 'thought-provoking talk', title, flags=re.IGNORECASE)
description = re.sub(r'vox', 'thought-provoking', description, flags=re.IGNORECASE)
description = re.sub(r'vsauce', 'intriguing', description, flags=re.IGNORECASE)
description = re.sub(r'ted talk', 'thought-provoking talk', description, flags=re.IGNORECASE)
parsed_ideas.append(f'"{title}" - {description}')
if len(parsed_ideas) > 0:
first_idea_is_intro = await isIntroduction(parsed_ideas[0])
if first_idea_is_intro:
print("First item is an introduction. Removing it from the list of ideas.")
return parsed_ideas[1:]
return parsed_ideas