apache
diff --git a/‎learning/prompts/code-explanation/java/07_io_json.md‎
Lines changed: 138 additions & 0 deletions b/‎learning/prompts/code-explanation/java/07_io_json.md‎
Lines changed: 138 additions & 0 deletions
diff --git a/‎learning/prompts/code-explanation/java/08_io_csv.md‎
Lines changed: 138 additions & 0 deletions b/‎learning/prompts/code-explanation/java/08_io_csv.md‎
Lines changed: 138 additions & 0 deletions
diff --git a/‎learning/prompts/code-explanation/java/09_io_avro.md‎
Lines changed: 92 additions & 0 deletions b/‎learning/prompts/code-explanation/java/09_io_avro.md‎
Lines changed: 92 additions & 0 deletions
@@ -0,0 +1,138 @@
+Prompt:
+What does this code do?
+
+```java
+package gcs;
+
+import org.apache.beam.sdk.Pipeline;
+import org.apache.beam.sdk.io.json.JsonIO;
+import org.apache.beam.sdk.options.Description;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.options.PipelineOptionsFactory;
+import org.apache.beam.sdk.options.Validation;
+import org.apache.beam.sdk.schemas.JavaFieldSchema;
+import org.apache.beam.sdk.schemas.annotations.DefaultSchema;
+import org.apache.beam.sdk.transforms.Create;
+
+import java.io.Serializable;
+import java.util.Arrays;
+import java.util.List;
+
+public class WriteJsonIO {
+
+    @DefaultSchema(JavaFieldSchema.class)
+    public static class SampleRecord implements Serializable {
+        public int id;
+        public String month;
+        public String amount
+
+        public SampleRecord() {
+        }
+
+        public SampleRecord(int id, String month, String amount) {
+            this.id = id;
+            this.month = month;
+            this.amount = amount;
+        }
+    }
+
+    public interface WriteJsonOptions extends PipelineOptions {
+
+        @Description("A file path to write JSON files to")
+        @Validation.Required
+        String getFilePath();
+
+        void setFilePath(String filePath);
+    }
+
+    public static void main(String[] args) {
+.
+        WriteJsonOptions options =
+                PipelineOptionsFactory.fromArgs(args)
+                        .withValidation().as(WriteJsonOptions.class);
+
+        Pipeline p = Pipeline.create(options);
+
+        List<SampleRecord> rows =
+            Arrays.asList(
+                new SampleRecord(1, "January", "$1000"),
+                new SampleRecord(2, "February", "$2000"),
+                new SampleRecord(3, "March", "$3000"));
+
+        p.apply("Create Records", Create.of(rows))
+            .apply(
+                "Write Records to JSON File",
+                    JsonIO.<SampleRecord>write(options.getFilePath())
+                        .withNumShards(1));
+        p.run();
+    }
+}
+```
+
+Response:
+This code snippet uses the Apache Beam JsonIO connector to write a list of `SampleRecord` objects to a JSON file.
+
+The `SampleRecord` class defines the schema for the JSON records and is annotated with `@DefaultSchema(JavaFieldSchema.class)` to indicate that its fields should be used as the schema for the JSON records.
+
+```java
+ @DefaultSchema(JavaFieldSchema.class)
+    public static class SampleRecord implements Serializable {
+        public int id;
+        public String month;
+        public String amount
+
+        public SampleRecord() {
+        }
+
+        public SampleRecord(int id, String month, String amount) {
+            this.id = id;
+            this.month = month;
+            this.amount = amount;
+        }
+    }
+```
+
+The `WriteJsonOptions` interface specifies command-line options for configuring the output file path. The `@Description` annotation provides a description of the option, and `@Validation.Required` indicates that the option is required.
+
+```java
+public interface WriteJsonOptions extends PipelineOptions {
+
+        @Description("A file path to write JSON files to")
+        @Validation.Required
+        String getFilePath();
+
+        void setFilePath(String filePath);
+    }
+```
+
+The `PipelineOptionsFactory` class generates `PipelineOptions` from command-line arguments. The `Pipeline.create` method creates a data pipeline, defined as a sequence of transformations to be applied to the data.
+
+```java
+  WriteJsonOptions options =
+                PipelineOptionsFactory.fromArgs(args)
+                        .withValidation().as(WriteJsonOptions.class);
+
+        Pipeline p = Pipeline.create(options);
+```
+
+The pipeline generates a list of `SampleRecord` objects and applies the `Create` transform to create a `PCollection` from this list. Subsequently, the `JsonIO.write` transform is used to write the `PCollection` to a JSON file, with the `withNumShards` method specifying the number of output shards.
+
+```java
+        List<SampleRecord> rows =
+            Arrays.asList(
+                new SampleRecord(1, "January", "$1000"),
+                new SampleRecord(2, "February", "$2000"),
+                new SampleRecord(3, "March", "$3000"));
+
+        p.apply("Create Records", Create.of(rows))
+            .apply(
+                "Write Records to JSON File",
+                    JsonIO.<SampleRecord>write(options.getFilePath())
+                        .withNumShards(1));
+```
+
+Finally, the code snippet invokes the `Pipeline.run` method to execute the pipeline.
+
+```java
+        p.run();
+```
@@ -0,0 +1,138 @@
+Prompt:
+What does this code do?
+
+```java
+public class WriteCsvFile {
+
+    @DefaultSchema(JavaFieldSchema.class)
+    public static class ExampleRecord implements Serializable {
+        public int id;
+        public String month;
+        public String amount;
+
+        public ExampleRecord() {
+        }
+
+        public ExampleRecord(int id, String month, String amount) {
+            this.id = id;
+            this.month = month;
+            this.amount = amount
+        }
+    }
+
+    public interface WriteCsvFileOptions extends PipelineOptions {
+
+        @Description("A file path to write CSV files to")
+        @Validation.Required
+        String getFilePath();
+
+        void setFilePath(String filePath);
+    }
+
+    public static void main(String[] args) {
+        WriteCsvFileOptions options =
+                PipelineOptionsFactory.fromArgs(args)
+                        .withValidation().as(WriteCsvFileOptions.class);
+
+        Pipeline p = Pipeline.create(options);
+
+        List<ExampleRecord> rows =
+            Arrays.asList(
+                new ExampleRecord(1, "January", "$1000"),
+                new ExampleRecord(2, "February", "$2000"),
+                new ExampleRecord(3, "March", "$3000"));
+
+        CSVFormat csvFormat =
+            CSVFormat.DEFAULT.withHeaderComments("CSV file created by Apache Beam")
+                .withCommentMarker('#');
+
+        p.apply("Create collection", Create.of(rows))
+            .apply(
+                "Write to CSV file",
+                    CsvIO.<ExampleRecord>write(options.getFilePathPrefix(), csvFormat)
+                        .withNumShards(1));
+        p.run();
+    }
+}
+
+```
+
+Response:
+This code snippet reads data from a CSV file using the `ReadFromCsv` transform from the CsvIO connector provided by Apache Beam.
+
+```java
+ @DefaultSchema(JavaFieldSchema.class)
+    public static class ExampleRecord implements Serializable {
+        public int id;
+        public String month;
+        public String amount;
+
+        public ExampleRecord() {
+        }
+
+        public ExampleRecord(int id, String month, String amount) {
+            this.id = id;
+            this.month = month;
+            this.amount = amount
+        }
+    }
+```
+
+The `ExampleRecord` class represents a row in the CSV file, defining three fields: `id`, `month`, and `amount`. The `@DefaultSchema(JavaFieldSchema.class)` annotation instructs the Apache Beam SDK to use the `JavaFieldSchema` class to generate the schema for the `ExampleRecord` class.
+
+```java
+public interface WriteCsvFileOptions extends PipelineOptions {
+
+        @Description("A file path to write CSV files to")
+        @Validation.Required
+        String getFilePath();
+
+        void setFilePath(String filePath);
+    }
+```
+
+The `WriteCsvFileOptions` interface defines a custom option for specifying the file path to write the CSV files to.
+
+```java
+  WriteCsvFileOptions options =
+                PipelineOptionsFactory.fromArgs(args)
+                        .withValidation().as(WriteCsvFileOptions.class);
+
+        Pipeline p = Pipeline.create(options);
+```
+
+The `Pipeline.create` method creates a data pipeline using the options defined in the `WriteCsvFileOptions` class.
+
+```java
+   List<ExampleRecord> rows =
+            Arrays.asList(
+                new ExampleRecord(1, "January", "$1000"),
+                new ExampleRecord(2, "February", "$2000"),
+                new ExampleRecord(3, "March", "$3000"));
+```
+
+Subsequently, the code snippet creates a list of `ExampleRecord` objects to be written to the CSV file.
+
+```java
+        CSVFormat csvFormat =
+            CSVFormat.DEFAULT.withHeaderComments("CSV file created by Apache Beam")
+                .withCommentMarker('#');
+```
+
+To write the data to a CSV file, the pipeline creates a `CSVFormat` object with a header comment and a comment marker.
+
+```java
+        p.apply("Create collection", Create.of(rows))
+            .apply(
+                "Write to CSV file",
+                    CsvIO.<ExampleRecord>write(options.getFilePathPrefix(), csvFormat)
+                        .withNumShards(1));
+```
+
+The code applies the `Create` transform to generate a collection of `ExampleRecord` objects. Then, the `CsvIO.write` transform is applied to write the collection to a CSV file, with the `withNumShards` method specifying the number of shards to use when writing the file.
+
+```java
+        p.run();
+```
+
+Finally, the code snippet invokes the `Pipeline.run` method to execute the pipeline.
@@ -0,0 +1,92 @@
+Prompt:
+What does this code do?
+
+```java
+public class ReadGenericAvroIO {
+
+    private static final Logger LOG = LoggerFactory.getLogger(ReadGenericAvroIO.class);
+
+    public interface ReadAvroOptions extends PipelineOptions {
+        @Description("Path to the Avro file to read from")
+        @Default.String("gs://<your-bucket-name]/[your-object-name]/filename.avro")
+        String getInput();
+
+        void setInput(String value);
+    }
+
+    public static void main(String[] args) {
+        ReadAvroOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(ReadAvroOptions.class);
+
+        Pipeline p = Pipeline.create(options);
+
+        String fields = "[{ \"name\": \"months\", \"type\": \"string\" }, { \"name\": \"amount\", \"type\": \"string\" }]";
+        String schema = "{\"type\": \"record\", \"name\": \"AvroRecord\",\"fields\":" + fields + "}";
+        Schema avroSchema = Schema.parse(schema);
+
+        p.apply(AvroIO.readGenericRecords(avroSchema).from(options.getInput()))
+         .apply("Parse and log", ParDo.of(new DoFn<GenericRecord, String>() {
+            @ProcessElement
+            public void processElement(ProcessContext c) {
+                GenericRecord record = c.element();
+                String message = "Month: " + record.get("month") + ". Amount: " + record.get("amount");
+                LOG.info(message);
+                c.output(message);
+            }
+        }));
+
+        p.run();
+    }
+}
+```
+
+Response:
+This code reads data from an Apache Avro file using the `ReadFromAvro` transform from the AvroIO connector provided by Apache Beam.
+
+```java
+    public interface ReadAvroOptions extends PipelineOptions {
+        @Description("Path to the Avro file to read from")
+        @Default.String("gs://<your-bucket-name]/[your-object-name]/filename.avro")
+        String getInput();
+
+        void setInput(String value);
+    }
+```
+
+The `ReadAvroOptions` interface defines the command-line argument `--input`, which specifies the path to the Avro file and sets the default value using the `@Default` annotation.
+
+```java
+        ReadAvroOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(ReadAvroOptions.class);
+
+        Pipeline p = Pipeline.create(options);
+```
+
+The `PipelineOptionsFactory` class parses the command-line arguments and creates a `ReadAvroOptions` instance. This instance is then used to create a pipeline with the provided pipeline options.
+
+```java
+        String fields = "[{ \"name\": \"months\", \"type\": \"string\" }, { \"name\": \"amount\", \"type\": \"string\" }]";
+        String schema = "{\"type\": \"record\", \"name\": \"AvroRecord\",\"fields\":" + fields + "}";
+        Schema avroSchema = Schema.parse(schema);
+```
+
+The AvroIO connector requires a schema to read Avro files. Hence, the schema is defined as a string and parsed into a `Schema` object.
+
+```java
+        p.apply(AvroIO.readGenericRecords(avroSchema).from(options.getInput()))
+         .apply("Parse and log", ParDo.of(new DoFn<GenericRecord, String>() {
+            @ProcessElement
+            public void processElement(ProcessContext c) {
+                GenericRecord record = c.element();
+                String message = "Month: " + record.get("month") + ". Amount: " + record.get("amount");
+                LOG.info(message);
+                c.output(message);
+            }
+        }))
+```
+
+In this segment, the `ParDo` transform processes each `GenericRecord` object from the Avro file. Each `GenericRecord` object is then parsed into a string and logged accordingly.
+
+```java
+        p.run();
+```
+
+Finally, the pipeline is executed to read the Avro file using the AvroIO connector, parse the `GenericRecord` objects, format them, and output the results.