Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 120 additions & 0 deletions myeongsung/app/api/experience_extraction_v2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
import json
from typing import List, Optional

from fastapi import APIRouter, File, Form, HTTPException, UploadFile
from pydantic import TypeAdapter

from app.schemas.resume_dto import (
ExperiencePresetSchema,
ExperienceSummary,
MergeExperiencePayload,
Step2V2ExtractionResponse,
)


router = APIRouter()


def extract_step2_v2_from_text(text, selected_experiences, preset_schemas):
from app.services.experience_extraction_service import (
extract_step2_v2_from_text as implementation,
)

return implementation(text, selected_experiences, preset_schemas)


def extract_step2_v2_from_url(url, selected_experiences, preset_schemas):
from app.services.experience_extraction_service import (
extract_step2_v2_from_url as implementation,
)

return implementation(url, selected_experiences, preset_schemas)


def extract_step2_v2_from_pdf(file_content, selected_experiences, preset_schemas):
from app.services.experience_extraction_service import (
extract_step2_v2_from_pdf as implementation,
)

return implementation(file_content, selected_experiences, preset_schemas)


def apply_sequential_merge_results_to_step2(experiences, existing_experiences):
from app.services.experience_merge_service import (
apply_sequential_merge_results_to_step2 as implementation,
)

return implementation(experiences, existing_experiences)


@router.post(
"/extract-experiences/step2-v2",
response_model=Step2V2ExtractionResponse,
)
async def extract_experiences_step2_v2(
file: Optional[UploadFile] = File(None),
url: Optional[str] = Form(None),
text: Optional[str] = Form(None),
selected_experiences: str = Form(...),
existing_experiences: str = Form("[]"),
preset_schemas: str = Form(...),
):
if not file and not (url and url.strip()) and not (text and text.strip()):
raise HTTPException(
status_code=400,
detail="file (업로드 파일), url, text 중 최소 하나는 제공되어야 합니다.",
)

try:
selected_list = TypeAdapter(List[ExperienceSummary]).validate_python(
json.loads(selected_experiences)
)
existing_list = TypeAdapter(List[MergeExperiencePayload]).validate_python(
json.loads(existing_experiences)
)
preset_list = TypeAdapter(List[ExperiencePresetSchema]).validate_python(
json.loads(preset_schemas)
)
except Exception as e:
raise HTTPException(
status_code=400,
detail=f"Step2 V2 요청 JSON 파싱 오류: {str(e)}",
) from e

try:
if file and file.filename:
file_content = await file.read()
if file.filename.lower().endswith(".pdf"):
result = extract_step2_v2_from_pdf(
file_content,
selected_list,
preset_list,
)
else:
result = extract_step2_v2_from_text(
file_content.decode("utf-8"),
selected_list,
preset_list,
)
elif url and url.strip():
result = extract_step2_v2_from_url(
url.strip(),
selected_list,
preset_list,
)
else:
result = extract_step2_v2_from_text(
text.strip(),
selected_list,
preset_list,
)

result.experiences = apply_sequential_merge_results_to_step2(
result.experiences,
existing_list,
)
return result
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e)) from e
except Exception as e:
raise HTTPException(status_code=500, detail=str(e)) from e
7 changes: 6 additions & 1 deletion myeongsung/app/api/router.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
Step2ExtractionResponse,
)

from app.api.experience_extraction_v2 import router as experience_extraction_v2_router
from app.services.resume_service import create_workflow, parse_and_validate_experiences
from app.services.job_analysis_service import analyze_job_url
from app.services.pdf_analysis_service import analyze_job_pdf
Expand All @@ -38,11 +39,15 @@
extract_step2_from_url,
extract_step2_from_pdf,
)
from app.services.experience_merge_service import apply_merge_results_to_step2, check_merge_candidates
from app.services.experience_merge_service import (
apply_merge_results_to_step2,
check_merge_candidates,
)
from app.services.eval_service import log_evaluation


router = APIRouter()
router.include_router(experience_extraction_v2_router)
workflow = create_workflow()


Expand Down
134 changes: 124 additions & 10 deletions myeongsung/app/services/experience_extraction_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,17 @@
import fitz # PyMuPDF
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from typing import List
from app.schemas.resume_dto import ExperienceExtractionResponse, Step1ExtractionResponse, ExperienceSummary, Step2ExtractionResponse, Step2ExtractedExperience
from typing import Any, List, Optional
from app.schemas.resume_dto import (
ExperienceExtractionResponse,
ExperiencePresetSchema,
ExperienceSummary,
Step1ExtractionResponse,
Step2ExtractionResponse,
Step2ExtractedExperience,
Step2V2ExtractionResponse,
)
from app.services.experience_preset_service import build_dynamic_step2_model
def extract_step1_from_text(text: str) -> Step1ExtractionResponse:
"""
텍스트에서 1차 경험 추출 (상세 증빙형 / 스펙 증빙형 분류 및 경험명 추출)
Expand Down Expand Up @@ -173,14 +182,8 @@ def extract_experiences_from_pdf(file_content: bytes) -> ExperienceExtractionRes
except Exception as e:
raise ValueError(f"PDF 분석 중 오류가 발생했습니다: {str(e)}")

def extract_step2_from_text(text: str, selected_experiences: List[ExperienceSummary]) -> Step2ExtractionResponse:
"""
1차 추출에서 사용자가 선택한 경험 리스트를 바탕으로, 각 경험의 소분류에 맞는 맞춤형 필드를 원문에서 추출합니다.
(TPM 제한 방지를 위해 각 경험별로 개별 추출 후 병합합니다.)
"""
llm = ChatOpenAI(model="gpt-4o", temperature=0)

prompt = ChatPromptTemplate.from_messages([
def _step2_prompt() -> ChatPromptTemplate:
return ChatPromptTemplate.from_messages([
("system", (
"당신은 사용자의 원문 텍스트에서 특정 경험의 상세 항목을 추출하는 전문가입니다.\n"
"사용자가 제공하는 '선택된 경험'에 대하여 원문에서 해당하는 내용을 찾아 지정된 스키마에 맞게 상세 정보를 추출하세요.\n\n"
Expand All @@ -204,6 +207,15 @@ def extract_step2_from_text(text: str, selected_experiences: List[ExperienceSumm
)),
("user", "다음은 원문 텍스트입니다:\n\n<TEXT>\n{text}\n</TEXT>\n\n다음은 상세 내용을 추출해야 할 선택된 경험입니다:\n{selected_experience}")
])


def extract_step2_from_text(text: str, selected_experiences: List[ExperienceSummary]) -> Step2ExtractionResponse:
"""
1차 추출에서 사용자가 선택한 경험 리스트를 바탕으로, 각 경험의 소분류에 맞는 맞춤형 필드를 원문에서 추출합니다.
(TPM 제한 방지를 위해 각 경험별로 개별 추출 후 병합합니다.)
"""
llm = ChatOpenAI(model="gpt-4o", temperature=0)
prompt = _step2_prompt()

chain = prompt | llm.with_structured_output(Step2ExtractedExperience)

Expand All @@ -226,6 +238,53 @@ def extract_step2_from_text(text: str, selected_experiences: List[ExperienceSumm

return Step2ExtractionResponse(experiences=extracted_experiences)


def extract_step2_v2_from_text(
text: str,
selected_experiences: List[ExperienceSummary],
preset_schemas: List[ExperiencePresetSchema],
llm: Optional[Any] = None,
) -> Step2V2ExtractionResponse:
preset_by_type = {
preset.experience_type_name: preset
for preset in preset_schemas
}
prompt = _step2_prompt()
extraction_llm = llm or ChatOpenAI(model="gpt-4o", temperature=0)
extracted_experiences = []

for experience in selected_experiences:
preset = preset_by_type.get(experience.experience_type)
if preset is None:
raise ValueError(
f"'{experience.experience_type}' 경험의 프리셋 스키마가 없습니다."
)
output_model = build_dynamic_step2_model(experience, preset)
chain = prompt | extraction_llm.with_structured_output(output_model)
try:
result = chain.invoke(
{
"text": text,
"selected_experience": experience.model_dump(),
},
config={
"run_name": f"experience-step2-v2-extraction-{experience.experience_name}",
"tags": ["experience-extraction", "step2-v2"],
},
)
except Exception as e:
raise ValueError(
f"'{experience.experience_name}' 2차 V2 경험 추출 중 오류가 발생했습니다: {str(e)}"
)

if result.experience_type != experience.experience_type:
raise ValueError("AI 응답의 경험 유형이 선택 경험과 일치하지 않습니다.")
if result.experience_group != experience.experience_group:
raise ValueError("AI 응답의 경험 그룹이 선택 경험과 일치하지 않습니다.")
extracted_experiences.append(result.model_dump())

return Step2V2ExtractionResponse(experiences=extracted_experiences)

def extract_step2_from_url(url: str, selected_experiences: List[ExperienceSummary]) -> Step2ExtractionResponse:
try:
response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"}, timeout=10)
Expand Down Expand Up @@ -266,3 +325,58 @@ def extract_step2_from_pdf(file_content: bytes, selected_experiences: List[Exper
except Exception as e:
raise ValueError(f"PDF 분석 중 오류가 발생했습니다: {str(e)}")


def extract_step2_v2_from_url(
url: str,
selected_experiences: List[ExperienceSummary],
preset_schemas: List[ExperiencePresetSchema],
) -> Step2V2ExtractionResponse:
try:
response = requests.get(
url,
headers={"User-Agent": "Mozilla/5.0"},
timeout=10,
)
response.raise_for_status()

content_type = response.headers.get("Content-Type", "").lower()
if "application/pdf" in content_type or url.lower().split("?")[0].endswith(".pdf"):
doc = fitz.open(stream=response.content, filetype="pdf")
full_text = "\n".join(page.get_text() for page in doc)
else:
soup = BeautifulSoup(response.text, "html.parser")
for script in soup(["script", "style"]):
script.decompose()
raw_text = soup.get_text(separator="\n")
lines = (line.strip() for line in raw_text.splitlines())
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
full_text = "\n".join(chunk for chunk in chunks if chunk)

if not full_text.strip():
raise ValueError("URL에서 유의미한 텍스트를 추출하지 못했습니다.")
return extract_step2_v2_from_text(
full_text,
selected_experiences,
preset_schemas,
)
except Exception as e:
raise ValueError(f"URL 분석 중 오류가 발생했습니다: {str(e)}")


def extract_step2_v2_from_pdf(
file_content: bytes,
selected_experiences: List[ExperienceSummary],
preset_schemas: List[ExperiencePresetSchema],
) -> Step2V2ExtractionResponse:
try:
doc = fitz.open(stream=file_content, filetype="pdf")
full_text = "\n".join(page.get_text() for page in doc)
if not full_text.strip():
raise ValueError("PDF에서 유의미한 텍스트를 추출하지 못했습니다.")
return extract_step2_v2_from_text(
full_text,
selected_experiences,
preset_schemas,
)
except Exception as e:
raise ValueError(f"PDF 분석 중 오류가 발생했습니다: {str(e)}")
16 changes: 11 additions & 5 deletions myeongsung/docs/ai-harness/user-flows.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@ Open decisions:
Confirm any new experience classification types before adding them.

## Flow: Experience Extraction Step2
Date: 2026-06-02
Status: draft
Date: 2026-06-20
Status: changed

User action:
User selects step1 experience candidates for detailed extraction.
Expand All @@ -66,22 +66,28 @@ Spring API:
`POST /api/experiences/extract/step2`.

FastAPI API:
`POST /api/v1/extract-experiences/step2`.
`POST /api/v1/extract-experiences/step2` or
`POST /api/v1/extract-experiences/step2-v2`.

Input source:
Original file, URL, or text plus `selected_experiences`.
Original file, URL, or text plus `selected_experiences`. V2 also requires
Spring `PresetRegistry` schemas and accepts existing experiences.

Service flow:
FastAPI extracts detailed fields for each selected experience and applies merge candidate detection when existing experiences are provided.
V2 builds each `basic_info` output model from the runtime preset, rejects undeclared fields,
and checks each result against existing experiences plus earlier accepted results in selection order.

External APIs:
May use LLM, embeddings, URL parsing, and document parsing services.

Response:
Detailed experiences including `basic_info`, keywords, content, and merge metadata.
Batch-local merge candidates use IDs in the form `batch:{selected_index}` for Spring to resolve.

Failure cases:
Missing source, invalid `selected_experiences` JSON, invalid existing experience payload, external API failure, timeout.
Missing source, invalid request JSON, missing or mismatched preset schema, undeclared `basic_info`
field, external API failure, or timeout.

Spring compatibility:
Response must remain compatible with Spring `AiStep2Response`.
Expand Down
Loading