-
Notifications
You must be signed in to change notification settings - Fork 728
feat: tnc connectors implementation [CM-1010] #3894
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
fd9b1c8
4e3cbe0
3a96f52
8595dfa
3a22b74
678edcc
34dcd63
79399da
913d335
fa8cdcf
e5e6c94
f9cbea7
b439205
64bd8de
2b10995
947aa7c
5a1d27a
3f412b4
bdf6faf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,6 @@ | ||
| INSERT INTO "activityTypes" ("activityType", platform, "isCodeContribution", "isCollaboration", description, "label") VALUES | ||
| ('enrolled-certification', 'tnc', false, false, 'Successful payment purchase of certification enrollment', 'Enrolled in certification'), | ||
| ('enrolled-training', 'tnc', false, false, 'Successful payment purchase of training enrollment', 'Enrolled in training'), | ||
| ('issued-certification', 'tnc', false, false, 'User is granted a certification', 'Issued certification'), | ||
| ('attempted-course', 'tnc', false, false, 'Certification course is completed', 'Attempted course'), | ||
| ('attempted-exam', 'tnc', false, false, 'Certification exam is completed', 'Attempted exam'); | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,103 @@ | ||
| import { IS_PROD_ENV } from '@crowd/common' | ||
|
|
||
| // Main: analytics.silver_fact.certificates (certificate data) | ||
| // Joins: | ||
| // - analytics.silver_dim._crowd_dev_segments_union (segment resolution) | ||
| // - analytics.bronze_fivetran_salesforce.bronze_salesforce_merged_user (LFID) | ||
| // - analytics.silver_dim.users (LFID fallback) | ||
| // - analytics.bronze_fivetran_salesforce.accounts + analytics.bronze_fivetran_salesforce_b2b.accounts (org data) | ||
|
|
||
| const CDP_MATCHED_SEGMENTS = ` | ||
| cdp_matched_segments AS ( | ||
| SELECT DISTINCT | ||
| s.SOURCE_ID AS sourceId, | ||
| s.slug | ||
| FROM ANALYTICS.SILVER_DIM._CROWD_DEV_SEGMENTS_UNION s | ||
| WHERE s.PARENT_SLUG IS NOT NULL | ||
| AND s.GRANDPARENTS_SLUG IS NOT NULL | ||
| AND s.SOURCE_ID IS NOT NULL | ||
| )` | ||
|
|
||
| const ORG_ACCOUNTS = ` | ||
| org_accounts AS ( | ||
| SELECT account_id, account_name, website, domain_aliases, LOGO_URL, INDUSTRY, N_EMPLOYEES | ||
| FROM analytics.bronze_fivetran_salesforce.accounts | ||
| WHERE website IS NOT NULL | ||
| UNION ALL | ||
| SELECT account_id, account_name, website, domain_aliases, NULL AS LOGO_URL, NULL AS INDUSTRY, NULL AS N_EMPLOYEES | ||
| FROM analytics.bronze_fivetran_salesforce_b2b.accounts | ||
| WHERE website IS NOT NULL | ||
| )` | ||
mbani01 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| const LFID_COALESCE = `COALESCE(mu.user_name, u.lf_username)` | ||
|
|
||
| export const buildSourceQuery = (sinceTimestamp?: string): string => { | ||
| let select = ` | ||
| SELECT | ||
| c.*, | ||
| cms.slug AS PROJECT_SLUG, | ||
mbani01 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| org.account_name AS ORGANIZATION_NAME, | ||
| org.website AS ORG_WEBSITE, | ||
| org.domain_aliases AS ORG_DOMAIN_ALIASES, | ||
| org.logo_url AS LOGO_URL, | ||
| org.industry AS ORGANIZATION_INDUSTRY, | ||
| CAST(org.n_employees AS VARCHAR) AS ORGANIZATION_SIZE, | ||
| ${LFID_COALESCE} AS LFID | ||
| FROM analytics.silver_fact.certificates c | ||
| INNER JOIN cdp_matched_segments cms | ||
| ON cms.sourceId = c.project_id | ||
| LEFT JOIN analytics.bronze_fivetran_salesforce.bronze_salesforce_merged_user mu | ||
| ON c.user_id = mu.user_id | ||
| AND mu.user_name IS NOT NULL | ||
| LEFT JOIN analytics.silver_dim.users u | ||
| ON LOWER(c.user_email) = LOWER(u.email) | ||
| AND u.lf_username IS NOT NULL | ||
| LEFT JOIN org_accounts org | ||
| ON c.account_id = org.account_id | ||
| WHERE c.user_email IS NOT NULL` | ||
|
|
||
| // Limit to a single project in non-prod to avoid exporting all projects data | ||
| if (!IS_PROD_ENV) { | ||
| select += ` AND cms.slug = 'cncf'` | ||
| } | ||
|
Comment on lines
+60
to
+62
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nitpick: Can we add a comment here just to explain that this is being added for staging/testing purposes? |
||
|
|
||
| const dedup = ` | ||
| QUALIFY ROW_NUMBER() OVER (PARTITION BY c.certificate_id ORDER BY org.website DESC) = 1` | ||
|
|
||
| if (!sinceTimestamp) { | ||
| return ` | ||
| WITH ${ORG_ACCOUNTS}, | ||
| ${CDP_MATCHED_SEGMENTS} | ||
| ${select} | ||
| ${dedup}`.trim() | ||
| } | ||
|
|
||
| return ` | ||
| WITH ${ORG_ACCOUNTS}, | ||
| ${CDP_MATCHED_SEGMENTS}, | ||
| new_cdp_segments AS ( | ||
| SELECT DISTINCT | ||
| s.SOURCE_ID AS sourceId, | ||
| s.slug | ||
| FROM ANALYTICS.SILVER_DIM._CROWD_DEV_SEGMENTS_UNION s | ||
| WHERE s.CREATED_TS >= '${sinceTimestamp}' | ||
| AND s.PARENT_SLUG IS NOT NULL | ||
| AND s.GRANDPARENTS_SLUG IS NOT NULL | ||
| AND s.SOURCE_ID IS NOT NULL | ||
| ) | ||
|
|
||
| -- New certificates since last export | ||
| ${select} | ||
| AND c.issued_ts >= '${sinceTimestamp}' | ||
| ${dedup} | ||
|
|
||
| UNION | ||
|
|
||
| -- All certificates in newly created segments | ||
| ${select} | ||
| AND EXISTS ( | ||
| SELECT 1 FROM new_cdp_segments ncs | ||
| WHERE ncs.slug = cms.slug AND ncs.sourceId = cms.sourceId | ||
| ) | ||
| ${dedup}`.trim() | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,101 @@ | ||
| import { TNC_GRID, TncActivityType } from '@crowd/integrations' | ||
| import { getServiceChildLogger } from '@crowd/logging' | ||
| import { | ||
| IActivityData, | ||
| IMemberData, | ||
| MemberAttributeName, | ||
| MemberIdentityType, | ||
| PlatformType, | ||
| } from '@crowd/types' | ||
|
|
||
| import { TransformedActivity } from '../../../core/transformerBase' | ||
| import { TncTransformerBase } from '../tncTransformerBase' | ||
|
|
||
| const log = getServiceChildLogger('tncCertificatesTransformer') | ||
|
|
||
| export class TncCertificatesTransformer extends TncTransformerBase { | ||
| transformRow(row: Record<string, unknown>): TransformedActivity | null { | ||
| const email = (row.USER_EMAIL as string | null)?.trim() || null | ||
| if (!email) { | ||
| log.debug({ certificateId: row.CERTIFICATE_ID }, 'Skipping row: missing email') | ||
| return null | ||
| } | ||
|
|
||
| const certificateId = (row.CERTIFICATE_ID as string)?.trim() | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Missing sourceId validation in certificates and enrollments transformersMedium Severity The certificates transformer extracts Additional Locations (1) |
||
| const learnerName = (row.LEARNER_NAME as string | null)?.trim() || null | ||
| const lfUsername = (row.LFID as string | null)?.trim() || null | ||
|
|
||
| const identities: IMemberData['identities'] = [] | ||
| const sourceId = (row.USER_ID as string | null) || undefined | ||
|
|
||
| if (lfUsername) { | ||
| identities.push( | ||
| { | ||
| platform: PlatformType.TNC, | ||
| value: email, | ||
| type: MemberIdentityType.EMAIL, | ||
| verified: true, | ||
| sourceId, | ||
| }, | ||
| { | ||
| platform: PlatformType.TNC, | ||
| value: lfUsername, | ||
| type: MemberIdentityType.USERNAME, | ||
| verified: true, | ||
| sourceId, | ||
| }, | ||
| { | ||
| platform: PlatformType.LFID, | ||
| value: lfUsername, | ||
| type: MemberIdentityType.USERNAME, | ||
| verified: true, | ||
| sourceId, | ||
| }, | ||
| ) | ||
| } else { | ||
| identities.push({ | ||
| platform: PlatformType.TNC, | ||
| value: email, | ||
| type: MemberIdentityType.USERNAME, | ||
| verified: true, | ||
| sourceId, | ||
| }) | ||
| } | ||
|
|
||
| const activity: IActivityData = { | ||
| type: TncActivityType.ISSUED_CERTIFICATION, | ||
| platform: PlatformType.TNC, | ||
| timestamp: (row.ISSUED_TS as string | null) || null, | ||
| score: TNC_GRID[TncActivityType.ISSUED_CERTIFICATION].score, | ||
| sourceId: certificateId, | ||
| sourceParentId: (row.COURSE_ID as string | null) || undefined, | ||
| member: { | ||
| displayName: learnerName || email.split('@')[0], | ||
| identities, | ||
| organizations: this.buildOrganizations(row), | ||
| attributes: { | ||
| ...((row.JOB_TITLE as string | null) && { | ||
| [MemberAttributeName.JOB_TITLE]: { [PlatformType.TNC]: row.JOB_TITLE as string }, | ||
| }), | ||
| ...((row.USER_COUNTRY as string | null) && { | ||
| [MemberAttributeName.COUNTRY]: { [PlatformType.TNC]: row.USER_COUNTRY as string }, | ||
| }), | ||
| }, | ||
| }, | ||
| attributes: { | ||
| productName: (row.COURSE_NAME as string | null) || null, | ||
| technology: (row.TECHNOLOGIES_LIST as string | null) || null, | ||
| didExpire: row.DID_EXPIRE as boolean | null, | ||
| expirationDate: (row.EXPIRATION_DATE as string | null) || null, | ||
| }, | ||
| } | ||
|
|
||
| const segmentSlug = (row.PROJECT_SLUG as string | null)?.trim() || null | ||
| const segmentSourceId = (row.PROJECT_ID as string | null)?.trim() || null | ||
| if (!segmentSlug || !segmentSourceId) { | ||
| return null | ||
| } | ||
|
|
||
| return { activity, segment: { slug: segmentSlug, sourceId: segmentSourceId } } | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,114 @@ | ||
| import { IS_PROD_ENV } from '@crowd/common' | ||
|
|
||
| // Main: analytics.bronze_census_ti.course_actions (course action data) | ||
| // Joins: | ||
| // - analytics.bronze_census_ti.users (user resolution via internal_ti_user_id) | ||
| // - analytics.bronze_census_ti.courses (course metadata) | ||
| // - analytics.silver_fact.enrollments (segment + org resolution via email + course_id) | ||
| // - analytics.silver_dim._crowd_dev_segments_union (segment resolution) | ||
| // - analytics.bronze_fivetran_salesforce.accounts + analytics.bronze_fivetran_salesforce_b2b.accounts (org data) | ||
|
|
||
| const CDP_MATCHED_SEGMENTS = ` | ||
| cdp_matched_segments AS ( | ||
| SELECT DISTINCT | ||
| s.SOURCE_ID AS sourceId, | ||
| s.slug | ||
| FROM ANALYTICS.SILVER_DIM._CROWD_DEV_SEGMENTS_UNION s | ||
| WHERE s.PARENT_SLUG IS NOT NULL | ||
| AND s.GRANDPARENTS_SLUG IS NOT NULL | ||
| AND s.SOURCE_ID IS NOT NULL | ||
| )` | ||
|
|
||
| const ORG_ACCOUNTS = ` | ||
| org_accounts AS ( | ||
| SELECT account_id, account_name, website, domain_aliases, LOGO_URL, INDUSTRY, N_EMPLOYEES | ||
| FROM analytics.bronze_fivetran_salesforce.accounts | ||
| WHERE website IS NOT NULL | ||
| UNION ALL | ||
| SELECT account_id, account_name, website, domain_aliases, NULL AS LOGO_URL, NULL AS INDUSTRY, NULL AS N_EMPLOYEES | ||
| FROM analytics.bronze_fivetran_salesforce_b2b.accounts | ||
| WHERE website IS NOT NULL | ||
| )` | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. SQL CTE constants duplicated across three filesLow Severity The Additional Locations (2) |
||
|
|
||
| export const buildSourceQuery = (sinceTimestamp?: string): string => { | ||
| let select = ` | ||
| SELECT | ||
| ca.*, | ||
| co.*, | ||
mbani01 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| tu.user_email, | ||
| tu.lfid, | ||
| tu.learner_name, | ||
| tu.user_country, | ||
| tu.job_title, | ||
| e.project_slug AS PROJECT_SLUG, | ||
| e.project_id AS PROJECT_ID, | ||
| e.account_id, | ||
| org.account_name AS ORGANIZATION_NAME, | ||
| org.website AS ORG_WEBSITE, | ||
| org.domain_aliases AS ORG_DOMAIN_ALIASES, | ||
| org.logo_url AS LOGO_URL, | ||
| org.industry AS ORGANIZATION_INDUSTRY, | ||
| CAST(org.n_employees AS VARCHAR) AS ORGANIZATION_SIZE | ||
| FROM analytics.bronze_census_ti.course_actions ca | ||
| INNER JOIN analytics.bronze_census_ti.users tu | ||
| ON ca.internal_ti_user_id = tu.internal_ti_user_id | ||
| INNER JOIN analytics.bronze_census_ti.courses co | ||
| ON ca.course_id = co.course_id | ||
| INNER JOIN analytics.silver_fact.enrollments e | ||
| ON e.course_id = ca.course_id | ||
| AND LOWER(e.user_email) = LOWER(tu.user_email) | ||
| INNER JOIN cdp_matched_segments cms | ||
| ON cms.slug = e.project_slug | ||
| AND cms.sourceId = e.project_id | ||
| LEFT JOIN org_accounts org | ||
| ON e.account_id = org.account_id | ||
| WHERE ca.type = 'status_change' | ||
| AND ca.source = 'course_started' | ||
| AND co.is_test_or_archived = false | ||
| AND tu.user_email IS NOT NULL` | ||
|
|
||
| // Limit to a single project in non-prod to avoid exporting all projects data | ||
| if (!IS_PROD_ENV) { | ||
| select += ` AND e.project_slug = 'cncf'` | ||
| } | ||
|
|
||
| const dedup = ` | ||
| QUALIFY ROW_NUMBER() OVER (PARTITION BY ca.course_action_id ORDER BY org.website DESC) = 1` | ||
|
|
||
| if (!sinceTimestamp) { | ||
| return ` | ||
| WITH ${ORG_ACCOUNTS}, | ||
| ${CDP_MATCHED_SEGMENTS} | ||
| ${select} | ||
| ${dedup}`.trim() | ||
| } | ||
|
|
||
| return ` | ||
| WITH ${ORG_ACCOUNTS}, | ||
| ${CDP_MATCHED_SEGMENTS}, | ||
| new_cdp_segments AS ( | ||
| SELECT DISTINCT | ||
| s.SOURCE_ID AS sourceId, | ||
| s.slug | ||
| FROM ANALYTICS.SILVER_DIM._CROWD_DEV_SEGMENTS_UNION s | ||
| WHERE s.CREATED_TS >= '${sinceTimestamp}' | ||
| AND s.PARENT_SLUG IS NOT NULL | ||
| AND s.GRANDPARENTS_SLUG IS NOT NULL | ||
| AND s.SOURCE_ID IS NOT NULL | ||
| ) | ||
|
|
||
| -- New course actions since last export | ||
| ${select} | ||
| AND ca.timestamp >= '${sinceTimestamp}' | ||
| ${dedup} | ||
|
|
||
| UNION | ||
|
|
||
| -- All course actions in newly created segments | ||
| ${select} | ||
| AND EXISTS ( | ||
| SELECT 1 FROM new_cdp_segments ncs | ||
| WHERE ncs.slug = cms.slug AND ncs.sourceId = cms.sourceId | ||
| ) | ||
| ${dedup}`.trim() | ||
| } | ||


Uh oh!
There was an error while loading. Please reload this page.