get_company_profile
Retrieve LinkedIn company profiles including about information, posts, and job listings to analyze business details and activity.
Instructions
Get a specific company's LinkedIn profile.
Args: company_name: LinkedIn company name (e.g., 'google', 'stripe', 'openai') sections: Comma-separated list of extra sections to scrape. The about page is always included. Available sections: posts, jobs Default (None) scrapes only the about page.
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| company_name | Yes | ||
| sections | No |
Implementation Reference
- The main MCP tool handler for get_company_profile - registered with @mcp.tool decorator, accepts company_name and optional sections, executes the use case, serializes results, and handles errors.
@mcp.tool( name="get_company_profile", description=( "Get a specific company's LinkedIn profile.\n\n" "Args:\n" " company_name: LinkedIn company name (e.g., 'google', 'stripe', 'openai')\n" " sections: Comma-separated list of extra sections to scrape.\n" " The about page is always included.\n" " Available sections: posts, jobs\n" " Default (None) scrapes only the about page." ), ) async def get_company_profile( company_name: str, ctx: Context, sections: str | None = None, ) -> dict[str, Any]: try: result = await scrape_company_uc.execute(company_name, sections) response: dict[str, Any] = { "url": result.url, "sections": serialize_sections(result.sections), } if result.unknown_sections: response["unknown_sections"] = result.unknown_sections return response except Exception as e: map_domain_error(e, "get_company_profile") - Registration function that creates and registers the get_company_profile tool with the FastMCP server, injecting the ScrapeCompanyUseCase dependency.
def register_company_tools( mcp: FastMCP, scrape_company_uc: ScrapeCompanyUseCase, ) -> None: """Register company-related MCP tools.""" @mcp.tool( name="get_company_profile", description=( "Get a specific company's LinkedIn profile.\n\n" "Args:\n" " company_name: LinkedIn company name (e.g., 'google', 'stripe', 'openai')\n" " sections: Comma-separated list of extra sections to scrape.\n" " The about page is always included.\n" " Available sections: posts, jobs\n" " Default (None) scrapes only the about page." ), ) async def get_company_profile( company_name: str, ctx: Context, sections: str | None = None, ) -> dict[str, Any]: try: result = await scrape_company_uc.execute(company_name, sections) response: dict[str, Any] = { "url": result.url, "sections": serialize_sections(result.sections), } if result.unknown_sections: response["unknown_sections"] = result.unknown_sections return response except Exception as e: map_domain_error(e, "get_company_profile") - Business logic use case that handles the actual scraping - validates sections, ensures authentication, iterates through requested sections, extracts HTML from LinkedIn, parses content, and returns a ScrapeResponse.
async def execute( self, company_name: str, sections: str | None = None, ) -> ScrapeResponse: await self._auth.ensure_authenticated() requested, unknown = parse_company_sections(sections) requested = ( set(COMPANY_SECTIONS.keys()) if not requested else requested | {"about"} ) base_url = f"https://www.linkedin.com/company/{company_name}" parsed_sections: dict[str, Any] = {} first = True for section_name, section_config in COMPANY_SECTIONS.items(): if section_name not in requested: continue if not first: await asyncio.sleep(_NAV_DELAY) first = False url = base_url + section_config.url_suffix content = await self._browser.extract_page_html(url) if content.html: try: parsed_sections[section_name] = parse_section( section_name, content.html, entity_type="company", include_raw=self._debug, ) except NotImplementedError: logger.warning( "Parser not implemented for section '%s', using generic", section_name, ) parsed_sections[section_name] = parse_generic( content.html, include_raw=self._debug ) return ScrapeResponse( url=f"{base_url}/", sections=parsed_sections, unknown_sections=unknown, ) - Domain model defining the schema for company about section data - includes fields like name, overview, website, industry, size, headquarters, etc.
@dataclass class CompanyAbout: """Company about/overview page — extracted from /company/{name}/about/.""" name: str | None = None overview: str | None = None website: str | None = None phone: str | None = None industry: str | None = None company_size: str | None = None headquarters: str | None = None type: str | None = None founded: str | None = None specialties: str | None = None followers: str | None = None employees_on_linkedin: str | None = None logo_url: str | None = None raw: str | None = None @dataclass class CompanyJobEntry: """A single job listing from a company's jobs page.""" title: str | None = None job_id: str | None = None job_url: str | None = None company: str | None = None location: str | None = None posted_time: str | None = None metadata: str | None = None @dataclass class CompanyJobsSection: """Company jobs page — extracted from /company/{name}/jobs/.""" total_openings: str | None = None jobs: list[CompanyJobEntry] = field(default_factory=list) raw: str | None = None @dataclass class CompanyPostEntry: """A single post from a company's feed.""" text: str | None = None time_posted: str | None = None reactions: str | None = None comments: str | None = None reposts: str | None = None @dataclass class CompanyPostsSection: """Company posts feed — extracted from /company/{name}/posts/.""" posts: list[CompanyPostEntry] = field(default_factory=list) raw: str | None = None - Helper function that serializes all parsed sections by converting typed dataclass models to JSON-serializable dictionaries, removing None values.
def serialize_sections(sections: dict[str, Any]) -> dict[str, Any]: """Serialize all sections in a response, stripping None values from each.""" return {name: serialize_section(section) for name, section in sections.items()}