1717 */
1818package org .apache .beam .sdk .metrics ;
1919
20+ import static org .apache .beam .vendor .guava .v32_1_2_jre .com .google .common .base .Preconditions .checkNotNull ;
21+
2022import java .util .ArrayList ;
2123import java .util .HashSet ;
2224import java .util .Iterator ;
2325import java .util .List ;
2426import java .util .Set ;
2527import java .util .regex .Pattern ;
2628import org .apache .beam .sdk .annotations .Internal ;
29+ import org .apache .beam .sdk .lineage .LineageBase ;
30+ import org .apache .beam .sdk .lineage .LineageOptions ;
2731import org .apache .beam .sdk .metrics .Metrics .MetricsFlag ;
32+ import org .apache .beam .sdk .options .PipelineOptions ;
2833import org .apache .beam .vendor .guava .v32_1_2_jre .com .google .common .annotations .VisibleForTesting ;
2934import org .apache .beam .vendor .guava .v32_1_2_jre .com .google .common .base .Splitter ;
30- import org .apache .beam .vendor .guava .v32_1_2_jre .com .google .common .collect .ImmutableList ;
3135import org .checkerframework .checker .nullness .qual .Nullable ;
36+ import org .slf4j .Logger ;
37+ import org .slf4j .LoggerFactory ;
3238
3339/**
3440 * Standard collection of metrics used to record source and sinks information for lineage tracking.
3541 */
3642public class Lineage {
37-
3843 public static final String LINEAGE_NAMESPACE = "lineage" ;
39- private static final Lineage SOURCES = new Lineage (Type .SOURCE );
40- private static final Lineage SINKS = new Lineage (Type .SINK );
44+ private static final Logger LOG = LoggerFactory .getLogger (Lineage .class );
45+
46+ private static volatile @ Nullable Lineage sources ;
47+ private static volatile @ Nullable Lineage sinks ;
48+ private static volatile @ Nullable Class <? extends LineageBase > currentLineageType ;
49+
50+ private static final Object INIT_LOCK = new Object ();
51+
4152 // Reserved characters are backtick, colon, whitespace (space, \t, \n) and dot.
4253 private static final Pattern RESERVED_CHARS = Pattern .compile ("[:\\ s.`]" );
4354
44- private final Metric metric ;
55+ private final LineageBase delegate ;
4556
46- private Lineage (Type type ) {
47- if (MetricsFlag .lineageRollupEnabled ()) {
48- this .metric =
49- Metrics .boundedTrie (
50- LINEAGE_NAMESPACE ,
51- type == Type .SOURCE ? Type .SOURCEV2 .toString () : Type .SINKV2 .toString ());
52- } else {
53- this .metric = Metrics .stringSet (LINEAGE_NAMESPACE , type .toString ());
57+ public enum LineageDirection {
58+ SOURCE ,
59+ SINK
60+ }
61+
62+ private Lineage (LineageBase delegate ) {
63+ this .delegate = checkNotNull (delegate , "delegate cannot be null" );
64+ }
65+
66+ @ Internal
67+ public static void setDefaultPipelineOptions (PipelineOptions options ) {
68+ checkNotNull (options , "options cannot be null" );
69+ Class <? extends LineageBase > requestedType = options .as (LineageOptions .class ).getLineageType ();
70+
71+ if (canSkipInit (requestedType )) {
72+ return ;
73+ }
74+ synchronized (INIT_LOCK ) {
75+ if (canSkipInit (requestedType )) {
76+ return ;
77+ }
78+ sources = createLineage (options , LineageDirection .SOURCE );
79+ sinks = createLineage (options , LineageDirection .SINK );
80+ currentLineageType = requestedType ;
81+ LOG .debug ("Lineage initialized with type {}" , requestedType );
82+ }
83+ }
84+
85+ private static boolean canSkipInit (@ Nullable Class <? extends LineageBase > requestedType ) {
86+ if (sources == null ) {
87+ return false ;
88+ }
89+ // When no type is requested, preserve whatever is already initialized.
90+ // When a type is requested, only re-init if it differs from the active type.
91+ return requestedType == null || requestedType .equals (currentLineageType );
92+ }
93+
94+ private static Lineage createLineage (PipelineOptions options , LineageDirection direction ) {
95+ Class <? extends LineageBase > lineageClass = options .as (LineageOptions .class ).getLineageType ();
96+
97+ if (lineageClass != null ) {
98+ try {
99+ LineageBase lineage =
100+ lineageClass
101+ .getDeclaredConstructor (PipelineOptions .class , LineageDirection .class )
102+ .newInstance (options , direction );
103+ LOG .info ("Using {} for lineage direction {}" , lineageClass .getName (), direction );
104+ return new Lineage (lineage );
105+ } catch (ReflectiveOperationException e ) {
106+ throw new IllegalArgumentException (
107+ "Failed to instantiate lineage implementation: "
108+ + lineageClass .getName ()
109+ + ". The class must have a public constructor accepting "
110+ + "(PipelineOptions, Lineage.LineageDirection)." ,
111+ e );
112+ }
54113 }
114+
115+ LOG .debug ("Using default Metrics-based lineage for direction {}" , direction );
116+ LineageBase defaultLineage =
117+ MetricsFlag .lineageRollupEnabled ()
118+ ? new BoundedTrieMetricsLineage (options , direction )
119+ : new StringSetMetricsLineage (options , direction );
120+ return new Lineage (defaultLineage );
55121 }
56122
57123 /** {@link Lineage} representing sources and optionally side inputs. */
58124 public static Lineage getSources () {
59- return SOURCES ;
125+ return checkNotNull (
126+ sources ,
127+ "Lineage not initialized. FileSystems.setDefaultPipelineOptions must be called first." );
60128 }
61129
62130 /** {@link Lineage} representing sinks. */
63131 public static Lineage getSinks () {
64- return SINKS ;
132+ return checkNotNull (
133+ sinks ,
134+ "Lineage not initialized. FileSystems.setDefaultPipelineOptions must be called first." );
65135 }
66136
67137 @ VisibleForTesting
@@ -140,12 +210,7 @@ public void add(String system, Iterable<String> segments) {
140210 * <p>In particular, this means they will often have trailing delimiters.
141211 */
142212 public void add (Iterable <String > rollupSegments ) {
143- ImmutableList <String > segments = ImmutableList .copyOf (rollupSegments );
144- if (MetricsFlag .lineageRollupEnabled ()) {
145- ((BoundedTrie ) this .metric ).add (segments );
146- } else {
147- ((StringSet ) this .metric ).add (String .join ("" , segments ));
148- }
213+ delegate .add (rollupSegments );
149214 }
150215
151216 /**
@@ -156,6 +221,8 @@ public void add(Iterable<String> rollupSegments) {
156221 * @param truncatedMarker the marker to use to represent truncated FQNs.
157222 * @return A flat representation of all FQNs. If the FQN was truncated then it has a trailing
158223 * truncatedMarker.
224+ * <p>NOTE: When using a custom Lineage plugin, this method will return empty results since
225+ * lineage is not stored in Metrics.
159226 */
160227 public static Set <String > query (MetricResults results , Type type , String truncatedMarker ) {
161228 MetricQueryResults lineageQueryResults = getLineageQueryResults (results , type );
@@ -184,6 +251,8 @@ public static Set<String> query(MetricResults results, Type type, String truncat
184251 * @param results FQNs from the result
185252 * @param type sources or sinks
186253 * @return A flat representation of all FQNs. If the FQN was truncated then it has a trailing '*'.
254+ * <p>NOTE: When using a custom Lineage plugin, this method will return empty results since
255+ * lineage is not stored in Metrics.
187256 */
188257 public static Set <String > query (MetricResults results , Type type ) {
189258 if (MetricsFlag .lineageRollupEnabled ()) {
0 commit comments